nix_compat/nar/reader/
mod.rs

1//! Parser for the Nix archive format, aka NAR.
2//!
3//! NAR files (and their hashed representations) are used in C++ Nix for
4//! a variety of things, including addressing fixed-output derivations
5//! and transferring store paths between Nix stores.
6
7use std::io::{
8    self, BufRead,
9    ErrorKind::{InvalidData, UnexpectedEof},
10    Read, Write,
11};
12
13#[cfg(not(debug_assertions))]
14use std::marker::PhantomData;
15
16// Required reading for understanding this module.
17use crate::nar::wire;
18
19#[cfg(all(feature = "async", feature = "wire"))]
20pub mod r#async;
21
22mod read;
23#[cfg(test)]
24mod test;
25
26pub type Reader<'a> = dyn BufRead + Send + 'a;
27
28struct ArchiveReader<'a, 'r> {
29    inner: &'a mut Reader<'r>,
30
31    /// In debug mode, also track when we need to abandon this archive reader.
32    ///
33    /// The archive reader must be abandoned when:
34    ///   * An error is encountered at any point
35    ///   * A file or directory reader is dropped before being read entirely.
36    ///
37    /// All of these checks vanish in release mode.
38    status: ArchiveReaderStatus<'a>,
39}
40
41macro_rules! try_or_poison {
42    ($it:expr, $ex:expr) => {
43        match $ex {
44            Ok(x) => x,
45            Err(e) => {
46                $it.status.poison();
47                return Err(e.into());
48            }
49        }
50    };
51}
52/// Start reading a NAR file from `reader`.
53pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> {
54    read::token(reader, &wire::TOK_NAR)?;
55    Node::new(ArchiveReader {
56        inner: reader,
57        status: ArchiveReaderStatus::top(),
58    })
59}
60
61pub enum Node<'a, 'r> {
62    Symlink {
63        target: Vec<u8>,
64    },
65    File {
66        executable: bool,
67        reader: FileReader<'a, 'r>,
68    },
69    Directory(DirReader<'a, 'r>),
70}
71
72impl<'a, 'r> Node<'a, 'r> {
73    /// Start reading a [Node], matching the next [wire::Node].
74    ///
75    /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink],
76    /// but is otherwise left to [DirReader] or [FileReader].
77    fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> {
78        Ok(match read::tag(reader.inner)? {
79            wire::Node::Sym => {
80                let target =
81                    try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN));
82
83                if target.is_empty() || target.contains(&0) {
84                    reader.status.poison();
85                    return Err(InvalidData.into());
86                }
87
88                try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR));
89                reader.status.ready_parent(); // Immediately allow reading from parent again
90
91                Node::Symlink { target }
92            }
93            tag @ (wire::Node::Reg | wire::Node::Exe) => {
94                let len = try_or_poison!(&mut reader, read::u64(reader.inner));
95
96                Node::File {
97                    executable: tag == wire::Node::Exe,
98                    reader: FileReader::new(reader, len)?,
99                }
100            }
101            wire::Node::Dir => Node::Directory(DirReader::new(reader)),
102        })
103    }
104}
105
106/// File contents, readable through the [Read] trait.
107///
108/// It comes with some caveats:
109///  * You must always read the entire file, unless you intend to abandon the entire archive reader.
110///  * You must abandon the entire archive reader upon the first error.
111///
112/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF.
113pub struct FileReader<'a, 'r> {
114    reader: ArchiveReader<'a, 'r>,
115    len: u64,
116    /// Truncated original file length for padding computation.
117    /// We only care about the 3 least significant bits; semantically, this is a u3.
118    pad: u8,
119}
120
121impl<'a, 'r> FileReader<'a, 'r> {
122    /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE].
123    /// We handle the terminating [wire::TOK_PAR] on semantic EOF.
124    fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> {
125        // For zero-length files, we have to read the terminating TOK_PAR
126        // immediately, since FileReader::read may never be called; we've
127        // already reached semantic EOF by definition.
128        if len == 0 {
129            read::token(reader.inner, &wire::TOK_PAR)?;
130            reader.status.ready_parent();
131        }
132
133        Ok(Self {
134            reader,
135            len,
136            pad: len as u8,
137        })
138    }
139
140    pub fn is_empty(&self) -> bool {
141        self.len == 0
142    }
143
144    pub fn len(&self) -> u64 {
145        self.len
146    }
147}
148
149impl FileReader<'_, '_> {
150    /// Equivalent to [BufRead::fill_buf]
151    ///
152    /// We can't directly implement [BufRead], because [FileReader::consume] needs
153    /// to perform fallible I/O.
154    pub fn fill_buf(&mut self) -> io::Result<&[u8]> {
155        if self.is_empty() {
156            return Ok(&[]);
157        }
158
159        self.reader.check_correct();
160
161        let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf());
162
163        if buf.is_empty() {
164            self.reader.status.poison();
165            return Err(UnexpectedEof.into());
166        }
167
168        if buf.len() as u64 > self.len {
169            buf = &buf[..self.len as usize];
170        }
171
172        Ok(buf)
173    }
174
175    /// Analogous to [BufRead::consume], differing only in that it needs
176    /// to perform I/O in order to read padding and terminators.
177    pub fn consume(&mut self, n: usize) -> io::Result<()> {
178        if n == 0 {
179            return Ok(());
180        }
181
182        self.reader.check_correct();
183
184        self.len = self
185            .len
186            .checked_sub(n as u64)
187            .expect("consumed bytes past EOF");
188
189        self.reader.inner.consume(n);
190
191        if self.is_empty() {
192            self.finish()?;
193        }
194
195        Ok(())
196    }
197
198    /// Copy the (remaining) contents of the file into `dst`.
199    pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> {
200        while !self.is_empty() {
201            let buf = self.fill_buf()?;
202            let n = try_or_poison!(self.reader, dst.write(buf));
203            self.consume(n)?;
204        }
205
206        Ok(())
207    }
208}
209
210impl Read for FileReader<'_, '_> {
211    fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
212        if buf.is_empty() || self.is_empty() {
213            return Ok(0);
214        }
215
216        self.reader.check_correct();
217
218        if buf.len() as u64 > self.len {
219            buf = &mut buf[..self.len as usize];
220        }
221
222        let n = try_or_poison!(self.reader, self.reader.inner.read(buf));
223        self.len -= n as u64;
224
225        if n == 0 {
226            self.reader.status.poison();
227            return Err(UnexpectedEof.into());
228        }
229
230        if self.is_empty() {
231            self.finish()?;
232        }
233
234        Ok(n)
235    }
236}
237
238impl FileReader<'_, '_> {
239    /// We've reached semantic EOF, consume and verify the padding and terminating TOK_PAR.
240    /// Files are padded to 64 bits (8 bytes), just like any other byte string in the wire format.
241    fn finish(&mut self) -> io::Result<()> {
242        let pad = (self.pad & 7) as usize;
243
244        if pad != 0 {
245            let mut buf = [0; 8];
246            try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..]));
247
248            if buf != [0; 8] {
249                self.reader.status.poison();
250                return Err(InvalidData.into());
251            }
252        }
253
254        try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
255
256        // Done with reading this file, allow going back up the chain of readers
257        self.reader.status.ready_parent();
258
259        Ok(())
260    }
261}
262
263/// A directory iterator, yielding a sequence of [Node]s.
264/// It must be fully consumed before reading further from the [DirReader] that produced it, if any.
265pub struct DirReader<'a, 'r> {
266    reader: ArchiveReader<'a, 'r>,
267    /// Previous directory entry name.
268    /// We have to hang onto this to enforce name monotonicity.
269    prev_name: Vec<u8>,
270}
271
272pub struct Entry<'a, 'r> {
273    pub name: &'a [u8],
274    pub node: Node<'a, 'r>,
275}
276
277impl<'a, 'r> DirReader<'a, 'r> {
278    fn new(reader: ArchiveReader<'a, 'r>) -> Self {
279        Self {
280            reader,
281            prev_name: vec![],
282        }
283    }
284
285    /// Read the next [Entry] from the directory.
286    ///
287    /// We explicitly don't implement [Iterator], since treating this as
288    /// a regular Rust iterator will surely lead you astray.
289    ///
290    ///  * You must always consume the entire iterator, unless you abandon the entire archive reader.
291    ///  * You must abandon the entire archive reader on the first error.
292    ///  * You must abandon the directory reader upon the first [None].
293    ///  * Even if you know the amount of elements up front, you must keep reading until you encounter [None].
294    #[allow(clippy::should_implement_trait)]
295    pub fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
296        self.reader.check_correct();
297
298        // COME FROM the previous iteration: if we've already read an entry,
299        // read its terminating TOK_PAR here.
300        if !self.prev_name.is_empty() {
301            try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
302        }
303
304        // Determine if there are more entries to follow
305        if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) {
306            // We've reached the end of this directory.
307            self.reader.status.ready_parent();
308            return Ok(None);
309        }
310
311        let mut name = [0; wire::MAX_NAME_LEN + 1];
312        let name = try_or_poison!(
313            self.reader,
314            read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN)
315        );
316
317        if name.is_empty()
318            || name.contains(&0)
319            || name.contains(&b'/')
320            || name == b"."
321            || name == b".."
322        {
323            self.reader.status.poison();
324            return Err(InvalidData.into());
325        }
326
327        // Enforce strict monotonicity of directory entry names.
328        if &self.prev_name[..] >= name {
329            self.reader.status.poison();
330            return Err(InvalidData.into());
331        }
332
333        self.prev_name.clear();
334        self.prev_name.extend_from_slice(name);
335
336        try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
337
338        Ok(Some(Entry {
339            name: &self.prev_name,
340            // Don't need to worry about poisoning here: Node::new will do it for us if needed
341            node: Node::new(self.reader.child())?,
342        }))
343    }
344}
345
346/// We use a stack of statuses to:
347///   * Share poisoned state across all objects from the same underlying reader,
348///     so we can check they are abandoned when an error occurs
349///   * Make sure only the most recently created object is read from, and is fully exhausted
350///     before anything it was created from is used again.
351enum ArchiveReaderStatus<'a> {
352    #[cfg(not(debug_assertions))]
353    None(PhantomData<&'a ()>),
354    #[cfg(debug_assertions)]
355    StackTop { poisoned: bool, ready: bool },
356    #[cfg(debug_assertions)]
357    StackChild {
358        poisoned: &'a mut bool,
359        parent_ready: &'a mut bool,
360        ready: bool,
361    },
362}
363
364impl ArchiveReaderStatus<'_> {
365    fn top() -> Self {
366        #[cfg(debug_assertions)]
367        {
368            ArchiveReaderStatus::StackTop {
369                poisoned: false,
370                ready: true,
371            }
372        }
373
374        #[cfg(not(debug_assertions))]
375        ArchiveReaderStatus::None(PhantomData)
376    }
377
378    /// Poison all the objects sharing the same reader, to be used when an error occurs
379    fn poison(&mut self) {
380        match self {
381            #[cfg(not(debug_assertions))]
382            ArchiveReaderStatus::None(_) => {}
383            #[cfg(debug_assertions)]
384            ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true,
385            #[cfg(debug_assertions)]
386            ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true,
387        }
388    }
389
390    /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again.
391    fn ready_parent(&mut self) {
392        match self {
393            #[cfg(not(debug_assertions))]
394            ArchiveReaderStatus::None(_) => {}
395            #[cfg(debug_assertions)]
396            ArchiveReaderStatus::StackTop { ready, .. } => {
397                *ready = false;
398            }
399            #[cfg(debug_assertions)]
400            ArchiveReaderStatus::StackChild {
401                ready,
402                parent_ready,
403                ..
404            } => {
405                *ready = false;
406                **parent_ready = true;
407            }
408        };
409    }
410
411    fn poisoned(&self) -> bool {
412        match self {
413            #[cfg(not(debug_assertions))]
414            ArchiveReaderStatus::None(_) => false,
415            #[cfg(debug_assertions)]
416            ArchiveReaderStatus::StackTop { poisoned, .. } => *poisoned,
417            #[cfg(debug_assertions)]
418            ArchiveReaderStatus::StackChild { poisoned, .. } => **poisoned,
419        }
420    }
421
422    fn ready(&self) -> bool {
423        match self {
424            #[cfg(not(debug_assertions))]
425            ArchiveReaderStatus::None(_) => true,
426            #[cfg(debug_assertions)]
427            ArchiveReaderStatus::StackTop { ready, .. } => *ready,
428            #[cfg(debug_assertions)]
429            ArchiveReaderStatus::StackChild { ready, .. } => *ready,
430        }
431    }
432}
433
434impl<'r> ArchiveReader<'_, 'r> {
435    /// Create a new child reader from this one.
436    /// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent`
437    fn child(&mut self) -> ArchiveReader<'_, 'r> {
438        ArchiveReader {
439            inner: self.inner,
440            #[cfg(not(debug_assertions))]
441            status: ArchiveReaderStatus::None(PhantomData),
442            #[cfg(debug_assertions)]
443            status: match &mut self.status {
444                ArchiveReaderStatus::StackTop { poisoned, ready } => {
445                    *ready = false;
446                    ArchiveReaderStatus::StackChild {
447                        poisoned,
448                        parent_ready: ready,
449                        ready: true,
450                    }
451                }
452                ArchiveReaderStatus::StackChild {
453                    poisoned, ready, ..
454                } => {
455                    *ready = false;
456                    ArchiveReaderStatus::StackChild {
457                        poisoned,
458                        parent_ready: ready,
459                        ready: true,
460                    }
461                }
462            },
463        }
464    }
465
466    /// Check the reader is in the correct status.
467    /// Only does anything when debug assertions are on.
468    #[inline(always)]
469    fn check_correct(&self) {
470        assert!(
471            !self.status.poisoned(),
472            "Archive reader used after it was meant to be abandoned!"
473        );
474        assert!(
475            self.status.ready(),
476            "Non-ready archive reader used! (Should've been reading from something else)"
477        );
478    }
479}