nix_compat/wire/bytes/reader/
mod.rs

1use std::{
2    future::Future,
3    io,
4    num::NonZeroU64,
5    ops::RangeBounds,
6    pin::Pin,
7    task::{self, ready, Poll},
8};
9use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, ReadBuf};
10
11use trailer::{read_trailer, ReadTrailer, Trailer};
12
13#[doc(hidden)]
14pub use self::trailer::Pad;
15pub(crate) use self::trailer::Tag;
16mod trailer;
17
18/// Reads a "bytes wire packet" from the underlying reader.
19/// The format is the same as in [crate::wire::bytes::read_bytes],
20/// however this structure provides a [AsyncRead] interface,
21/// allowing to not having to pass around the entire payload in memory.
22///
23/// It is constructed by reading a size with [BytesReader::new],
24/// and yields payload data until the end of the packet is reached.
25///
26/// It will not return the final bytes before all padding has been successfully
27/// consumed as well, but the full length of the reader must be consumed.
28///
29/// If the data is not read all the way to the end, or an error is encountered,
30/// the underlying reader is no longer usable and might return garbage.
31#[derive(Debug)]
32#[allow(private_bounds)]
33pub struct BytesReader<R, T: Tag = Pad> {
34    state: State<R, T>,
35}
36
37/// Split the `user_len` into `body_len` and `tail_len`, which are respectively
38/// the non-terminal 8-byte blocks, and the ≤8 bytes of user data contained in
39/// the trailer block.
40#[inline(always)]
41fn split_user_len(user_len: NonZeroU64) -> (u64, u8) {
42    let n = user_len.get() - 1;
43    let body_len = n & !7;
44    let tail_len = (n & 7) as u8 + 1;
45    (body_len, tail_len)
46}
47
48#[derive(Debug)]
49enum State<R, T: Tag> {
50    /// Full 8-byte blocks are being read and released to the caller.
51    /// NOTE: The final 8-byte block is *always* part of the trailer.
52    Body {
53        reader: Option<R>,
54        consumed: u64,
55        /// The total length of all user data contained in both the body and trailer.
56        user_len: NonZeroU64,
57    },
58    /// The trailer is in the process of being read.
59    ReadTrailer(ReadTrailer<R, T>),
60    /// The trailer has been fully read and validated,
61    /// and data can now be released to the caller.
62    ReleaseTrailer { consumed: u8, data: Trailer },
63}
64
65impl<R> BytesReader<R>
66where
67    R: AsyncRead + Unpin,
68{
69    /// Constructs a new BytesReader, using the underlying passed reader.
70    pub async fn new<S: RangeBounds<u64>>(reader: R, allowed_size: S) -> io::Result<Self> {
71        BytesReader::new_internal(reader, allowed_size).await
72    }
73}
74
75#[allow(private_bounds)]
76impl<R, T: Tag> BytesReader<R, T>
77where
78    R: AsyncRead + Unpin,
79{
80    /// Constructs a new BytesReader, using the underlying passed reader.
81    pub(crate) async fn new_internal<S: RangeBounds<u64>>(
82        mut reader: R,
83        allowed_size: S,
84    ) -> io::Result<Self> {
85        let size = reader.read_u64_le().await?;
86
87        if !allowed_size.contains(&size) {
88            return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid size"));
89        }
90
91        Ok(Self {
92            state: match NonZeroU64::new(size) {
93                Some(size) => State::Body {
94                    reader: Some(reader),
95                    consumed: 0,
96                    user_len: size,
97                },
98                None => State::ReleaseTrailer {
99                    consumed: 0,
100                    data: read_trailer::<R, T>(reader, 0).await?,
101                },
102            },
103        })
104    }
105
106    /// Returns whether there is any remaining data to be read.
107    pub fn is_empty(&self) -> bool {
108        self.len() == 0
109    }
110
111    /// Remaining data length, ie not including data already read.
112    pub fn len(&self) -> u64 {
113        match self.state {
114            State::Body {
115                consumed, user_len, ..
116            } => user_len.get() - consumed,
117            State::ReadTrailer(ref fut) => fut.len() as u64,
118            State::ReleaseTrailer { consumed, ref data } => data.len() as u64 - consumed as u64,
119        }
120    }
121}
122
123#[allow(private_bounds)]
124impl<R: AsyncRead + Unpin, T: Tag> AsyncRead for BytesReader<R, T> {
125    fn poll_read(
126        mut self: Pin<&mut Self>,
127        cx: &mut task::Context,
128        buf: &mut ReadBuf,
129    ) -> Poll<io::Result<()>> {
130        let this = &mut self.state;
131
132        // reading nothing always succeeds
133        if buf.remaining() == 0 {
134            return Ok(()).into();
135        }
136
137        loop {
138            match this {
139                State::Body {
140                    reader,
141                    consumed,
142                    user_len,
143                } => {
144                    let (body_len, tail_len) = split_user_len(*user_len);
145                    let remaining = body_len - *consumed;
146
147                    let reader = if remaining == 0 {
148                        let reader = reader.take().unwrap();
149                        *this = State::ReadTrailer(read_trailer(reader, tail_len));
150                        continue;
151                    } else {
152                        Pin::new(reader.as_mut().unwrap())
153                    };
154
155                    let bytes_read = ready!(with_limited(buf, remaining, |buf| {
156                        reader.poll_read(cx, buf).map_ok(|()| buf.filled().len())
157                    }))?;
158
159                    *consumed += bytes_read as u64;
160
161                    return if bytes_read != 0 {
162                        Ok(())
163                    } else {
164                        Err(io::ErrorKind::UnexpectedEof.into())
165                    }
166                    .into();
167                }
168                State::ReadTrailer(fut) => {
169                    *this = State::ReleaseTrailer {
170                        consumed: 0,
171                        data: ready!(Pin::new(fut).poll(cx))?,
172                    };
173                }
174                State::ReleaseTrailer { consumed, data } => {
175                    let data = &data[*consumed as usize..];
176                    let data = &data[..usize::min(data.len(), buf.remaining())];
177
178                    buf.put_slice(data);
179                    *consumed += data.len() as u8;
180
181                    return Ok(()).into();
182                }
183            }
184        }
185    }
186}
187
188#[allow(private_bounds)]
189impl<R: AsyncBufRead + Unpin, T: Tag> AsyncBufRead for BytesReader<R, T> {
190    fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> {
191        let this = &mut self.get_mut().state;
192
193        loop {
194            match this {
195                // This state comes *after* the following case,
196                // but we can't keep it in logical order because
197                // that would lengthen the borrow lifetime.
198                State::Body {
199                    reader,
200                    consumed,
201                    user_len,
202                } if {
203                    let (body_len, _) = split_user_len(*user_len);
204                    let remaining = body_len - *consumed;
205
206                    remaining == 0
207                } =>
208                {
209                    let reader = reader.take().unwrap();
210                    let (_, tail_len) = split_user_len(*user_len);
211
212                    *this = State::ReadTrailer(read_trailer(reader, tail_len));
213                }
214                State::Body {
215                    reader,
216                    consumed,
217                    user_len,
218                } => {
219                    let (body_len, _) = split_user_len(*user_len);
220                    let remaining = body_len - *consumed;
221
222                    let reader = Pin::new(reader.as_mut().unwrap());
223
224                    match ready!(reader.poll_fill_buf(cx))? {
225                        &[] => {
226                            return Err(io::ErrorKind::UnexpectedEof.into()).into();
227                        }
228                        mut buf => {
229                            if buf.len() as u64 > remaining {
230                                buf = &buf[..remaining as usize];
231                            }
232
233                            return Ok(buf).into();
234                        }
235                    }
236                }
237                State::ReadTrailer(fut) => {
238                    *this = State::ReleaseTrailer {
239                        consumed: 0,
240                        data: ready!(Pin::new(fut).poll(cx))?,
241                    };
242                }
243                State::ReleaseTrailer { consumed, data } => {
244                    return Ok(&data[*consumed as usize..]).into();
245                }
246            }
247        }
248    }
249
250    fn consume(mut self: Pin<&mut Self>, amt: usize) {
251        match &mut self.state {
252            State::Body {
253                reader,
254                consumed,
255                user_len,
256            } => {
257                let reader = Pin::new(reader.as_mut().unwrap());
258                let (body_len, _) = split_user_len(*user_len);
259
260                *consumed = consumed
261                    .checked_add(amt as u64)
262                    .filter(|&consumed| consumed <= body_len)
263                    .expect("consumed out of bounds");
264
265                reader.consume(amt);
266            }
267            State::ReadTrailer(_) => {
268                if amt != 0 {
269                    unreachable!();
270                }
271            }
272            State::ReleaseTrailer { consumed, data } => {
273                *consumed = amt
274                    .checked_add(*consumed as usize)
275                    .filter(|&consumed| consumed <= data.len())
276                    .expect("consumed out of bounds") as u8;
277            }
278        }
279    }
280}
281
282/// Make a limited version of `buf`, consisting only of up to `n` bytes of the unfilled section, and call `f` with it.
283/// After `f` returns, we propagate the filled cursor advancement back to `buf`.
284fn with_limited<R>(buf: &mut ReadBuf, n: u64, f: impl FnOnce(&mut ReadBuf) -> R) -> R {
285    let mut nbuf = buf.take(n.try_into().unwrap_or(usize::MAX));
286    let ptr = nbuf.initialized().as_ptr();
287    let ret = f(&mut nbuf);
288
289    // SAFETY: `ReadBuf::take` only returns the *unfilled* section of `buf`,
290    // so anything filled is new, initialized data.
291    //
292    // We verify that `nbuf` still points to the same buffer,
293    // so we're sure it hasn't been swapped out.
294    unsafe {
295        // ensure our buffer hasn't been swapped out
296        assert_eq!(nbuf.initialized().as_ptr(), ptr);
297
298        let n = nbuf.filled().len();
299        buf.assume_init(n);
300        buf.advance(n);
301    }
302
303    ret
304}
305
306#[cfg(test)]
307mod tests {
308    use std::sync::LazyLock;
309    use std::time::Duration;
310
311    use crate::wire::bytes::{padding_len, write_bytes};
312    use hex_literal::hex;
313    use rstest::rstest;
314    use tokio::io::{AsyncReadExt, BufReader};
315    use tokio_test::io::Builder;
316
317    use super::*;
318
319    /// The maximum length of bytes packets we're willing to accept in the test
320    /// cases.
321    const MAX_LEN: u64 = 1024;
322
323    pub static LARGE_PAYLOAD: LazyLock<Vec<u8>> =
324        LazyLock::new(|| (0..255).collect::<Vec<u8>>().repeat(4 * 1024));
325
326    /// Helper function, calling the (simpler) write_bytes with the payload.
327    /// We use this to create data we want to read from the wire.
328    async fn produce_packet_bytes(payload: &[u8]) -> Vec<u8> {
329        let mut exp = vec![];
330        write_bytes(&mut exp, payload).await.unwrap();
331        exp
332    }
333
334    /// Read bytes packets of various length, and ensure read_to_end returns the
335    /// expected payload.
336    #[rstest]
337    #[case::empty(&[])] // empty bytes packet
338    #[case::size_1b(&[0xff])] // 1 bytes payload
339    #[case::size_8b(&hex!("0001020304050607"))] // 8 bytes payload (no padding)
340    #[case::size_9b(&hex!("000102030405060708"))] // 9 bytes payload (7 bytes padding)
341    #[case::size_1m(LARGE_PAYLOAD.as_slice())] // larger bytes packet
342    #[tokio::test]
343    async fn read_payload_correct(#[case] payload: &[u8]) {
344        let mut mock = Builder::new()
345            .read(&produce_packet_bytes(payload).await)
346            .build();
347
348        let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64)
349            .await
350            .unwrap();
351        let mut buf = Vec::new();
352        r.read_to_end(&mut buf).await.expect("must succeed");
353
354        assert_eq!(payload, &buf[..]);
355    }
356
357    /// Read bytes packets of various length, and ensure copy_buf reads the
358    /// expected payload.
359    #[rstest]
360    #[case::empty(&[])] // empty bytes packet
361    #[case::size_1b(&[0xff])] // 1 bytes payload
362    #[case::size_8b(&hex!("0001020304050607"))] // 8 bytes payload (no padding)
363    #[case::size_9b(&hex!("000102030405060708"))] // 9 bytes payload (7 bytes padding)
364    #[case::size_1m(LARGE_PAYLOAD.as_slice())] // larger bytes packet
365    #[tokio::test]
366    async fn read_payload_correct_readbuf(#[case] payload: &[u8]) {
367        let mut mock = BufReader::new(
368            Builder::new()
369                .read(&produce_packet_bytes(payload).await)
370                .build(),
371        );
372
373        let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64)
374            .await
375            .unwrap();
376
377        let mut buf = Vec::new();
378        tokio::io::copy_buf(&mut r, &mut buf)
379            .await
380            .expect("copy_buf must succeed");
381
382        assert_eq!(payload, &buf[..]);
383    }
384
385    /// Fail if the bytes packet is larger than allowed
386    #[tokio::test]
387    async fn read_bigger_than_allowed_fail() {
388        let payload = LARGE_PAYLOAD.as_slice();
389        let mut mock = Builder::new()
390            .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet
391            .build();
392
393        assert_eq!(
394            BytesReader::new(&mut mock, ..2048)
395                .await
396                .unwrap_err()
397                .kind(),
398            io::ErrorKind::InvalidData
399        );
400    }
401
402    /// Fail if the bytes packet is smaller than allowed
403    #[tokio::test]
404    async fn read_smaller_than_allowed_fail() {
405        let payload = &[0x00, 0x01, 0x02];
406        let mut mock = Builder::new()
407            .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet
408            .build();
409
410        assert_eq!(
411            BytesReader::new(&mut mock, 1024..2048)
412                .await
413                .unwrap_err()
414                .kind(),
415            io::ErrorKind::InvalidData
416        );
417    }
418
419    /// Read the trailer immediately if there is no payload.
420    #[cfg(feature = "async")]
421    #[tokio::test]
422    async fn read_trailer_immediately() {
423        use crate::nar::wire::PadPar;
424
425        let mut mock = Builder::new()
426            .read(&[0; 8])
427            .read(&PadPar::PATTERN[8..])
428            .build();
429
430        BytesReader::<_, PadPar>::new_internal(&mut mock, ..)
431            .await
432            .unwrap();
433
434        // The mock reader will panic if dropped without reading all data.
435    }
436
437    /// Read the trailer even if we only read the exact payload size.
438    #[cfg(feature = "async")]
439    #[tokio::test]
440    async fn read_exact_trailer() {
441        use crate::nar::wire::PadPar;
442
443        let mut mock = Builder::new()
444            .read(&16u64.to_le_bytes())
445            .read(&[0x55; 16])
446            .read(&PadPar::PATTERN[8..])
447            .build();
448
449        let mut reader = BytesReader::<_, PadPar>::new_internal(&mut mock, ..)
450            .await
451            .unwrap();
452
453        let mut buf = [0; 16];
454        reader.read_exact(&mut buf).await.unwrap();
455        assert_eq!(buf, [0x55; 16]);
456
457        // The mock reader will panic if dropped without reading all data.
458    }
459
460    /// Fail if the padding is not all zeroes
461    #[tokio::test]
462    async fn read_fail_if_nonzero_padding() {
463        let payload = &[0x00, 0x01, 0x02];
464        let mut packet_bytes = produce_packet_bytes(payload).await;
465        // Flip some bits in the padding
466        packet_bytes[12] = 0xff;
467        let mut mock = Builder::new().read(&packet_bytes).build(); // We stop reading after the faulty bit
468
469        let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap();
470        let mut buf = Vec::new();
471
472        r.read_to_end(&mut buf).await.expect_err("must fail");
473    }
474
475    /// Start a 9 bytes payload packet, but have the underlying reader return
476    /// EOF in the middle of the size packet (after 4 bytes).
477    /// We should get an unexpected EOF error, already when trying to read the
478    /// first byte (of payload)
479    #[tokio::test]
480    async fn read_9b_eof_during_size() {
481        let payload = &hex!("FF0102030405060708");
482        let mut mock = Builder::new()
483            .read(&produce_packet_bytes(payload).await[..4])
484            .build();
485
486        assert_eq!(
487            BytesReader::new(&mut mock, ..MAX_LEN)
488                .await
489                .expect_err("must fail")
490                .kind(),
491            io::ErrorKind::UnexpectedEof
492        );
493    }
494
495    /// Start a 9 bytes payload packet, but have the underlying reader return
496    /// EOF in the middle of the payload (4 bytes into the payload).
497    /// We should get an unexpected EOF error, after reading the first 4 bytes
498    /// (successfully).
499    #[tokio::test]
500    async fn read_9b_eof_during_payload() {
501        let payload = &hex!("FF0102030405060708");
502        let mut mock = Builder::new()
503            .read(&produce_packet_bytes(payload).await[..8 + 4])
504            .build();
505
506        let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap();
507        let mut buf = [0; 9];
508
509        r.read_exact(&mut buf[..4]).await.expect("must succeed");
510
511        assert_eq!(
512            r.read_exact(&mut buf[4..=4])
513                .await
514                .expect_err("must fail")
515                .kind(),
516            std::io::ErrorKind::UnexpectedEof
517        );
518    }
519
520    /// Start a 9 bytes payload packet, but don't supply the necessary padding.
521    /// This is expected to always fail before returning the final data.
522    #[rstest]
523    #[case::before_padding(8 + 9)]
524    #[case::during_padding(8 + 9 + 2)]
525    #[case::after_padding(8 + 9 + padding_len(9) as usize - 1)]
526    #[tokio::test]
527    async fn read_9b_eof_after_payload(#[case] offset: usize) {
528        let payload = &hex!("FF0102030405060708");
529        let mut mock = Builder::new()
530            .read(&produce_packet_bytes(payload).await[..offset])
531            .build();
532
533        let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap();
534
535        // read_exact of the payload *body* will succeed, but a subsequent read will
536        // return UnexpectedEof error.
537        assert_eq!(r.read_exact(&mut [0; 8]).await.unwrap(), 8);
538        assert_eq!(
539            r.read_exact(&mut [0]).await.unwrap_err().kind(),
540            std::io::ErrorKind::UnexpectedEof
541        );
542    }
543
544    /// Start a 9 bytes payload packet, but return an error after a certain position.
545    /// Ensure that error is propagated.
546    #[rstest]
547    #[case::during_size(4)]
548    #[case::before_payload(8)]
549    #[case::during_payload(8 + 4)]
550    #[case::before_padding(8 + 4)]
551    #[case::during_padding(8 + 9 + 2)]
552    #[tokio::test]
553    async fn propagate_error_from_reader(#[case] offset: usize) {
554        let payload = &hex!("FF0102030405060708");
555        let mut mock = Builder::new()
556            .read(&produce_packet_bytes(payload).await[..offset])
557            .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo"))
558            .build();
559
560        // Either length reading or data reading can fail, depending on which test case we're in.
561        let err: io::Error = async {
562            let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?;
563            let mut buf = Vec::new();
564
565            r.read_to_end(&mut buf).await?;
566
567            Ok(())
568        }
569        .await
570        .expect_err("must fail");
571
572        assert_eq!(
573            err.kind(),
574            std::io::ErrorKind::Other,
575            "error kind must match"
576        );
577
578        assert_eq!(
579            err.into_inner().unwrap().to_string(),
580            "foo",
581            "error payload must contain foo"
582        );
583    }
584
585    /// Start a 9 bytes payload packet, but return an error after a certain position.
586    /// Ensure that error is propagated (AsyncReadBuf case)
587    #[rstest]
588    #[case::during_size(4)]
589    #[case::before_payload(8)]
590    #[case::during_payload(8 + 4)]
591    #[case::before_padding(8 + 4)]
592    #[case::during_padding(8 + 9 + 2)]
593    #[tokio::test]
594    async fn propagate_error_from_reader_buffered(#[case] offset: usize) {
595        let payload = &hex!("FF0102030405060708");
596        let mock = Builder::new()
597            .read(&produce_packet_bytes(payload).await[..offset])
598            .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo"))
599            .build();
600        let mut mock = BufReader::new(mock);
601
602        // Either length reading or data reading can fail, depending on which test case we're in.
603        let err: io::Error = async {
604            let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?;
605            let mut buf = Vec::new();
606
607            tokio::io::copy_buf(&mut r, &mut buf).await?;
608
609            Ok(())
610        }
611        .await
612        .expect_err("must fail");
613
614        assert_eq!(
615            err.kind(),
616            std::io::ErrorKind::Other,
617            "error kind must match"
618        );
619
620        assert_eq!(
621            err.into_inner().unwrap().to_string(),
622            "foo",
623            "error payload must contain foo"
624        );
625    }
626
627    /// If there's an error right after the padding, we don't propagate it, as
628    /// we're done reading. We just return EOF.
629    #[tokio::test]
630    async fn no_error_after_eof() {
631        let payload = &hex!("FF0102030405060708");
632        let mut mock = Builder::new()
633            .read(&produce_packet_bytes(payload).await)
634            .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo"))
635            .build();
636
637        let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap();
638        let mut buf = Vec::new();
639
640        r.read_to_end(&mut buf).await.expect("must succeed");
641        assert_eq!(buf.as_slice(), payload);
642    }
643
644    /// If there's an error right after the padding, we don't propagate it, as
645    /// we're done reading. We just return EOF.
646    #[tokio::test]
647    async fn no_error_after_eof_buffered() {
648        let payload = &hex!("FF0102030405060708");
649        let mock = Builder::new()
650            .read(&produce_packet_bytes(payload).await)
651            .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo"))
652            .build();
653        let mut mock = BufReader::new(mock);
654
655        let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap();
656        let mut buf = Vec::new();
657
658        tokio::io::copy_buf(&mut r, &mut buf)
659            .await
660            .expect("must succeed");
661        assert_eq!(buf.as_slice(), payload);
662    }
663
664    /// Introduce various stalls in various places of the packet, to ensure we
665    /// handle these cases properly, too.
666    #[rstest]
667    #[case::beginning(0)]
668    #[case::before_payload(8)]
669    #[case::during_payload(8 + 4)]
670    #[case::before_padding(8 + 4)]
671    #[case::during_padding(8 + 9 + 2)]
672    #[tokio::test]
673    async fn read_payload_correct_pending(#[case] offset: usize) {
674        let payload = &hex!("FF0102030405060708");
675        let mut mock = Builder::new()
676            .read(&produce_packet_bytes(payload).await[..offset])
677            .wait(Duration::from_nanos(0))
678            .read(&produce_packet_bytes(payload).await[offset..])
679            .build();
680
681        let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64)
682            .await
683            .unwrap();
684        let mut buf = Vec::new();
685        r.read_to_end(&mut buf).await.expect("must succeed");
686
687        assert_eq!(payload, &buf[..]);
688    }
689}