vmm_sys_util/linux/epoll.rs
1// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3
4//! Safe wrappers over the
5//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.
6
7use std::io;
8use std::ops::{Deref, Drop};
9use std::os::unix::io::{AsRawFd, RawFd};
10
11#[cfg(any(target_os = "linux", target_os = "android"))]
12use bitflags::bitflags;
13use libc::{
14 epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
15 EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
16 EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
17};
18
19use crate::syscall::SyscallReturnCode;
20
21/// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
22#[derive(Debug)]
23#[repr(i32)]
24pub enum ControlOperation {
25 /// Add a file descriptor to the interest list.
26 Add = EPOLL_CTL_ADD,
27 /// Change the settings associated with a file descriptor that is
28 /// already in the interest list.
29 Modify = EPOLL_CTL_MOD,
30 /// Remove a file descriptor from the interest list.
31 Delete = EPOLL_CTL_DEL,
32}
33
34bitflags! {
35 /// The type of events we can monitor a file descriptor for.
36 pub struct EventSet: u32 {
37 /// The associated file descriptor is available for read operations.
38 const IN = EPOLLIN as u32;
39 /// The associated file descriptor is available for write operations.
40 const OUT = EPOLLOUT as u32;
41 /// Error condition happened on the associated file descriptor.
42 const ERROR = EPOLLERR as u32;
43 /// This can be used to detect peer shutdown when using Edge Triggered monitoring.
44 const READ_HANG_UP = EPOLLRDHUP as u32;
45 /// Sets the Edge Triggered behavior for the associated file descriptor.
46 /// The default behavior is Level Triggered.
47 const EDGE_TRIGGERED = EPOLLET as u32;
48 /// Hang up happened on the associated file descriptor. Note that `epoll_wait`
49 /// will always wait for this event and it is not necessary to set it in events.
50 const HANG_UP = EPOLLHUP as u32;
51 /// There is an exceptional condition on that file descriptor. It is mostly used to
52 /// set high priority for some data.
53 const PRIORITY = EPOLLPRI as u32;
54 /// The event is considered as being "processed" from the time when it is returned
55 /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
56 /// epoll file descriptor, the closure of that file descriptor, the removal of the
57 /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
58 /// for the event file descriptor via EPOLL_CTL_MOD.
59 const WAKE_UP = EPOLLWAKEUP as u32;
60 /// Sets the one-shot behavior for the associated file descriptor.
61 const ONE_SHOT = EPOLLONESHOT as u32;
62 /// Sets an exclusive wake up mode for the epoll file descriptor that is being
63 /// attached to the associated file descriptor.
64 /// When a wake up event occurs and multiple epoll file descriptors are attached to
65 /// the same target file using this mode, one or more of the epoll file descriptors
66 /// will receive an event with `epoll_wait`. The default here is for all those file
67 /// descriptors to receive an event.
68 const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
69 }
70}
71
72/// Wrapper over
73/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
74// We are using `transparent` here to be super sure that this struct and its fields
75// have the same alignment as those from the `epoll_event` struct from C.
76#[repr(transparent)]
77#[derive(Clone, Copy)]
78pub struct EpollEvent(epoll_event);
79
80impl std::fmt::Debug for EpollEvent {
81 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82 write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
83 }
84}
85
86impl Deref for EpollEvent {
87 type Target = epoll_event;
88 fn deref(&self) -> &Self::Target {
89 &self.0
90 }
91}
92
93impl Default for EpollEvent {
94 fn default() -> Self {
95 EpollEvent(epoll_event {
96 events: 0u32,
97 u64: 0u64,
98 })
99 }
100}
101
102impl EpollEvent {
103 /// Create a new epoll_event instance.
104 ///
105 /// # Arguments
106 ///
107 /// `events` - contains an event mask.
108 /// `data` - a user data variable. `data` field can be a fd on which
109 /// we want to monitor the events specified by `events`.
110 ///
111 /// # Examples
112 ///
113 /// ```
114 /// extern crate vmm_sys_util;
115 /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
116 ///
117 /// let event = EpollEvent::new(EventSet::IN, 2);
118 /// ```
119 pub fn new(events: EventSet, data: u64) -> Self {
120 EpollEvent(epoll_event {
121 events: events.bits(),
122 u64: data,
123 })
124 }
125
126 /// Returns the `events` from
127 /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// extern crate vmm_sys_util;
133 /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
134 ///
135 /// let event = EpollEvent::new(EventSet::IN, 2);
136 /// assert_eq!(event.events(), 1);
137 /// ```
138 pub fn events(&self) -> u32 {
139 self.events
140 }
141
142 /// Returns the `EventSet` corresponding to `epoll_event.events`.
143 ///
144 /// # Panics
145 ///
146 /// Panics if `libc::epoll_event` contains invalid events.
147 ///
148 ///
149 /// # Examples
150 ///
151 /// ```
152 /// extern crate vmm_sys_util;
153 /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
154 ///
155 /// let event = EpollEvent::new(EventSet::IN, 2);
156 /// assert_eq!(event.event_set(), EventSet::IN);
157 /// ```
158 pub fn event_set(&self) -> EventSet {
159 // This unwrap is safe because `epoll_events` can only be user created or
160 // initialized by the kernel. We trust the kernel to only send us valid
161 // events. The user can only initialize `epoll_events` using valid events.
162 EventSet::from_bits(self.events()).unwrap()
163 }
164
165 /// Returns the `data` from the `libc::epoll_event`.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// extern crate vmm_sys_util;
171 /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
172 ///
173 /// let event = EpollEvent::new(EventSet::IN, 2);
174 /// assert_eq!(event.data(), 2);
175 /// ```
176 pub fn data(&self) -> u64 {
177 self.u64
178 }
179
180 /// Converts the `libc::epoll_event` data to a RawFd.
181 ///
182 /// This conversion is lossy when the data does not correspond to a RawFd
183 /// (data does not fit in a i32).
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// extern crate vmm_sys_util;
189 /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
190 ///
191 /// let event = EpollEvent::new(EventSet::IN, 2);
192 /// assert_eq!(event.fd(), 2);
193 /// ```
194 pub fn fd(&self) -> RawFd {
195 self.u64 as i32
196 }
197}
198
199/// Wrapper over epoll functionality.
200#[derive(Debug)]
201pub struct Epoll {
202 epoll_fd: RawFd,
203}
204
205impl Epoll {
206 /// Create a new epoll file descriptor.
207 pub fn new() -> io::Result<Self> {
208 let epoll_fd = SyscallReturnCode(
209 // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
210 unsafe { epoll_create1(EPOLL_CLOEXEC) },
211 )
212 .into_result()?;
213 Ok(Epoll { epoll_fd })
214 }
215
216 /// Wrapper for `libc::epoll_ctl`.
217 ///
218 /// This can be used for adding, modifying or removing a file descriptor in the
219 /// interest list of the epoll instance.
220 ///
221 /// # Arguments
222 ///
223 /// * `operation` - refers to the action to be performed on the file descriptor.
224 /// * `fd` - the file descriptor on which we want to perform `operation`.
225 /// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// extern crate vmm_sys_util;
231 ///
232 /// use std::os::unix::io::AsRawFd;
233 /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
234 /// use vmm_sys_util::eventfd::EventFd;
235 ///
236 /// let epoll = Epoll::new().unwrap();
237 /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
238 /// epoll
239 /// .ctl(
240 /// ControlOperation::Add,
241 /// event_fd.as_raw_fd() as i32,
242 /// EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
243 /// )
244 /// .unwrap();
245 /// epoll
246 /// .ctl(
247 /// ControlOperation::Modify,
248 /// event_fd.as_raw_fd() as i32,
249 /// EpollEvent::new(EventSet::IN, 4),
250 /// )
251 /// .unwrap();
252 /// ```
253 pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
254 SyscallReturnCode(
255 // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
256 // to watch, as well as a valid epoll_event structure. We also check the return value.
257 unsafe {
258 epoll_ctl(
259 self.epoll_fd,
260 operation as i32,
261 fd,
262 &event as *const EpollEvent as *mut epoll_event,
263 )
264 },
265 )
266 .into_empty_result()
267 }
268
269 /// Wrapper for `libc::epoll_wait`.
270 /// Returns the number of file descriptors in the interest list that became ready
271 /// for I/O or `errno` if an error occurred.
272 ///
273 /// # Arguments
274 ///
275 /// * `timeout` - specifies for how long the `epoll_wait` system call will block
276 /// (measured in milliseconds).
277 /// * `events` - points to a memory area that will be used for storing the events
278 /// returned by `epoll_wait()` call.
279 ///
280 /// # Examples
281 ///
282 /// ```
283 /// extern crate vmm_sys_util;
284 ///
285 /// use std::os::unix::io::AsRawFd;
286 /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
287 /// use vmm_sys_util::eventfd::EventFd;
288 ///
289 /// let epoll = Epoll::new().unwrap();
290 /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
291 ///
292 /// let mut ready_events = vec![EpollEvent::default(); 10];
293 /// epoll
294 /// .ctl(
295 /// ControlOperation::Add,
296 /// event_fd.as_raw_fd() as i32,
297 /// EpollEvent::new(EventSet::OUT, 4),
298 /// )
299 /// .unwrap();
300 /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
301 /// assert_eq!(ev_count, 1);
302 /// ```
303 pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
304 let events_count = SyscallReturnCode(
305 // SAFETY: Safe because we give a valid epoll file descriptor and an array of
306 // epoll_event structures that will be modified by the kernel to indicate information
307 // about the subset of file descriptors in the interest list.
308 // We also check the return value.
309 unsafe {
310 epoll_wait(
311 self.epoll_fd,
312 events.as_mut_ptr() as *mut epoll_event,
313 events.len() as i32,
314 timeout,
315 )
316 },
317 )
318 .into_result()? as usize;
319
320 Ok(events_count)
321 }
322}
323
324impl AsRawFd for Epoll {
325 fn as_raw_fd(&self) -> RawFd {
326 self.epoll_fd
327 }
328}
329
330impl Drop for Epoll {
331 fn drop(&mut self) {
332 // SAFETY: Safe because this fd is opened with `epoll_create` and we trust
333 // the kernel to give us a valid fd.
334 unsafe {
335 libc::close(self.epoll_fd);
336 }
337 }
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 use crate::eventfd::EventFd;
345
346 #[test]
347 fn test_event_ops() {
348 let mut event = EpollEvent::default();
349 assert_eq!(event.events(), 0);
350 assert_eq!(event.data(), 0);
351
352 event = EpollEvent::new(EventSet::IN, 2);
353 assert_eq!(event.events(), 1);
354 assert_eq!(event.event_set(), EventSet::IN);
355
356 assert_eq!(event.data(), 2);
357 assert_eq!(event.fd(), 2);
358 }
359
360 #[test]
361 fn test_events_debug() {
362 let events = EpollEvent::new(EventSet::IN, 42);
363 assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
364 }
365
366 #[test]
367 fn test_epoll() {
368 const DEFAULT__TIMEOUT: i32 = 250;
369 const EVENT_BUFFER_SIZE: usize = 128;
370
371 let epoll = Epoll::new().unwrap();
372 assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());
373
374 // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.
375
376 let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
377 // For EPOLLOUT to be available it is enough only to be possible to write a value of
378 // at least 1 to the eventfd counter without blocking.
379 // If we write a value greater than 0 to this counter, the fd will be available for
380 // EPOLLIN events too.
381 event_fd_1.write(1).unwrap();
382
383 let mut event_1 =
384 EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);
385
386 // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
387 // the interest list of epoll instance.
388 assert!(epoll
389 .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
390 .is_ok());
391
392 // We can't add twice the same fd to epoll interest list.
393 assert!(epoll
394 .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
395 .is_err());
396
397 let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
398 event_fd_2.write(1).unwrap();
399 assert!(epoll
400 .ctl(
401 ControlOperation::Add,
402 event_fd_2.as_raw_fd(),
403 // For this fd, we want an Event instance that has `data` field set to other
404 // value than the value of the fd and `events` without EPOLLIN type set.
405 EpollEvent::new(EventSet::OUT, 10)
406 )
407 .is_ok());
408
409 // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
410 // event to not be available for this fd, even if we say that we want to monitor this type
411 // of event via EPOLL_CTL_ADD operation.
412 let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
413 let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
414 assert!(epoll
415 .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), event_3)
416 .is_ok());
417
418 // Let's check `epoll_wait()` behavior for our epoll instance.
419 let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
420 let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
421
422 // We expect to have 3 fds in the ready list of epoll instance.
423 assert_eq!(ev_count, 3);
424
425 // Let's check also the Event values that are now returned in the ready list.
426 assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
427 // For this fd, `data` field was populated with random data instead of the
428 // corresponding fd value.
429 assert_eq!(ready_events[1].data(), 10);
430 assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);
431
432 // EPOLLIN and EPOLLOUT should be available for this fd.
433 assert_eq!(
434 ready_events[0].events(),
435 (EventSet::IN | EventSet::OUT).bits()
436 );
437 // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
438 assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
439 // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
440 // greater than 0 to it).
441 assert_eq!(ready_events[2].events(), EventSet::OUT.bits());
442
443 // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
444 // behavior.
445 // We create here a new Event with some events, other than those previously set,
446 // that we want to monitor this time on event_fd_1.
447 event_1 = EpollEvent::new(EventSet::OUT, 20);
448 assert!(epoll
449 .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), event_1)
450 .is_ok());
451
452 let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
453 // Can't modify a fd that wasn't added to epoll interest list.
454 assert!(epoll
455 .ctl(
456 ControlOperation::Modify,
457 event_fd_4.as_raw_fd(),
458 EpollEvent::default()
459 )
460 .is_err());
461
462 let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
463
464 // Let's check that Event fields were indeed changed for the `event_fd_1` fd.
465 assert_eq!(ready_events[0].data(), 20);
466 // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
467 assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
468
469 // Now let's set for a fd to not have any events monitored.
470 assert!(epoll
471 .ctl(
472 ControlOperation::Modify,
473 event_fd_1.as_raw_fd(),
474 EpollEvent::default()
475 )
476 .is_ok());
477
478 // In this particular case we expect to remain only with 2 fds in the ready list.
479 ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
480 assert_eq!(ev_count, 2);
481
482 // Let's also delete a fd from the interest list.
483 assert!(epoll
484 .ctl(
485 ControlOperation::Delete,
486 event_fd_2.as_raw_fd(),
487 EpollEvent::default()
488 )
489 .is_ok());
490
491 // We expect to have only one fd remained in the ready list (event_fd_3).
492 ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
493
494 assert_eq!(ev_count, 1);
495 assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
496 assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
497
498 // If we try to remove a fd from epoll interest list that wasn't added before it will fail.
499 assert!(epoll
500 .ctl(
501 ControlOperation::Delete,
502 event_fd_4.as_raw_fd(),
503 EpollEvent::default()
504 )
505 .is_err());
506 }
507}