nix_compat/nar/wire/
mod.rs

1//! NAR wire format, without I/O details, since those differ between
2//! the synchronous and asynchronous implementations.
3//!
4//! The wire format is an S-expression format, encoded onto the wire
5//! using simple encoding rules.
6//!
7//! # Encoding
8//!
9//! Lengths are represented as 64-bit unsigned integers in little-endian
10//! format. Byte strings, including file contents and syntactic strings
11//! part of the grammar, are prefixed by their 64-bit length, and padded
12//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string
13//! is therefore encoded as eight zero bytes representing its length.
14//!
15//! # Grammar
16//!
17//! The NAR grammar is as follows:
18//! ```plain
19//! archive ::= "nix-archive-1" node
20//!
21//! node ::= "(" "type" "symlink" "target" string ")"
22//!      ||= "(" "type" "regular" ("executable" "")? "contents" string ")"
23//!      ||= "(" "type" "directory" entry* ")"
24//!
25//! entry ::= "entry" "(" "name" string "node" node ")"
26//! ```
27//!
28//! We rewrite it to pull together the purely syntactic elements into
29//! unified tokens, producing an equivalent grammar that can be parsed
30//! and serialized more elegantly:
31//! ```plain
32//! archive ::= TOK_NAR node
33//! node ::= TOK_SYM string             TOK_PAR
34//!      ||= (TOK_REG | TOK_EXE) string TOK_PAR
35//!      ||= TOK_DIR entry*             TOK_PAR
36//!
37//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR
38//!
39//! TOK_NAR ::= "nix-archive-1" "(" "type"
40//! TOK_SYM ::= "symlink" "target"
41//! TOK_REG ::= "regular" "contents"
42//! TOK_EXE ::= "regular" "executable" "" "contents"
43//! TOK_DIR ::= "directory"
44//! TOK_ENT ::= "entry" "(" "name"
45//! TOK_NOD ::= "node" "(" "type"
46//! TOK_PAR ::= ")"
47//! ```
48//!
49//! # Restrictions
50//!
51//! NOTE: These restrictions are not (and cannot be) enforced by this module,
52//! but must be enforced by its consumers, [super::reader] and [super::writer].
53//!
54//! Directory entry names cannot have the reserved names `.` and `..`, nor contain
55//! forward slashes. They must appear in strictly ascending lexicographic order
56//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length.
57//!
58//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length.
59//!
60//! Neither is permitted to be empty, or contain null bytes.
61
62// These values are the standard Linux length limits
63/// Maximum length of a directory entry name
64pub const MAX_NAME_LEN: usize = 255;
65/// Maximum length of a symlink target
66pub const MAX_TARGET_LEN: usize = 4095;
67
68#[cfg(test)]
69fn token(xs: &[&str]) -> Vec<u8> {
70    let mut out = vec![];
71    for x in xs {
72        let len = x.len() as u64;
73        out.extend_from_slice(&len.to_le_bytes());
74        out.extend_from_slice(x.as_bytes());
75
76        let n = x.len() & 7;
77        if n != 0 {
78            const ZERO: [u8; 8] = [0; 8];
79            out.extend_from_slice(&ZERO[n..]);
80        }
81    }
82    out
83}
84
85pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
86pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
87pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
88pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
89pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
90pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
91pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
92pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
93#[cfg(feature = "async")]
94#[allow(dead_code)]
95const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
96
97#[cfg(feature = "async")]
98#[allow(dead_code)]
99#[derive(Debug)]
100pub(crate) enum PadPar {}
101
102#[cfg(all(feature = "async", feature = "wire"))]
103impl crate::wire::reader::Tag for PadPar {
104    const PATTERN: &'static [u8] = &TOK_PAD_PAR;
105
106    type Buf = [u8; 24];
107
108    fn make_buf() -> Self::Buf {
109        [0; 24]
110    }
111}
112
113#[test]
114fn tokens() {
115    let cases: &[(&[u8], &[&str])] = &[
116        (&TOK_NAR, &["nix-archive-1", "(", "type"]),
117        (&TOK_SYM, &["symlink", "target"]),
118        (&TOK_REG, &["regular", "contents"]),
119        (&TOK_EXE, &["regular", "executable", "", "contents"]),
120        (&TOK_DIR, &["directory"]),
121        (&TOK_ENT, &["entry", "(", "name"]),
122        (&TOK_NOD, &["node", "(", "type"]),
123        (&TOK_PAR, &[")"]),
124        #[cfg(feature = "async")]
125        (&TOK_PAD_PAR, &["", ")"]),
126    ];
127
128    for &(tok, xs) in cases {
129        assert_eq!(tok, token(xs));
130    }
131}
132
133pub use tag::Tag;
134mod tag;
135
136tag::make! {
137    /// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD],
138    /// and preceding the next variable-length element.
139    pub enum Node[16] {
140        Sym = TOK_SYM,
141        Reg = TOK_REG,
142        Exe = TOK_EXE,
143        Dir = TOK_DIR,
144    }
145
146    /// Directory entry or terminator
147    pub enum Entry[0] {
148        /// End of directory
149        None = TOK_PAR,
150        /// Directory entry
151        /// Followed by a name string, [TOK_NOD], and a [Node].
152        Some = TOK_ENT,
153    }
154}