nix_compat/aterm/
parser.rs

1//! This module implements parsing code for some basic building blocks
2//! of the [ATerm][] format, which is used by C++ Nix to serialize Derivations.
3//!
4//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
5use bstr::BString;
6use nom::branch::alt;
7use nom::bytes::streaming::{escaped_transform, is_not};
8use nom::character::streaming::char as nomchar;
9use nom::combinator::{map_res, opt, value};
10use nom::multi::separated_list0;
11use nom::sequence::delimited;
12use nom::{IResult, Parser};
13
14/// Parse a bstr and undo any escaping (which is why this needs to allocate).
15// FUTUREWORK: have a version for fields that are known to not need escaping
16// (like store paths), and use &str.
17fn parse_escaped_bytes(i: &[u8]) -> IResult<&[u8], BString> {
18    escaped_transform(
19        is_not("\"\\"),
20        '\\',
21        alt((
22            value("\\".as_bytes(), nomchar('\\')),
23            value("\n".as_bytes(), nomchar('n')),
24            value("\t".as_bytes(), nomchar('t')),
25            value("\r".as_bytes(), nomchar('r')),
26            value("\"".as_bytes(), nomchar('\"')),
27        )),
28    )(i)
29    .map(|(i, v)| (i, BString::new(v)))
30}
31
32/// Parse a field in double quotes, undo any escaping, and return the unquoted
33/// and decoded `Vec<u8>`.
34pub(crate) fn parse_bytes_field(i: &[u8]) -> IResult<&[u8], BString> {
35    delimited(
36        nomchar('\"'),
37        opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
38        nomchar('\"'),
39    )
40    .parse(i)
41}
42
43/// Parse a field in double quotes, undo any escaping, and return the unquoted
44/// and decoded [String], if it's valid UTF-8.
45/// Or fail parsing if the bytes are no valid UTF-8.
46pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> {
47    delimited(
48        nomchar('\"'),
49        map_res(
50            opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
51            |bstr| String::from_utf8(bstr.to_vec()),
52        ),
53        nomchar('\"'),
54    )
55    .parse(i)
56}
57
58/// Parse a list of string fields (enclosed in brackets)
59pub(crate) fn parse_string_list(i: &[u8]) -> IResult<&[u8], Vec<String>> {
60    delimited(
61        nomchar('['),
62        separated_list0(nomchar(','), parse_string_field),
63        nomchar(']'),
64    )
65    .parse(i)
66}
67
68#[cfg(test)]
69mod tests {
70    use rstest::rstest;
71
72    #[rstest]
73    #[case::empty(br#""""#, b"", b"")]
74    #[case::hello_world(br#""Hello World""#, b"Hello World", b"")]
75    #[case::doublequote(br#""\"""#, br#"""#, b"")]
76    #[case::colon(br#"":""#, b":", b"")]
77    #[case::doublequote_rest(br#""\""Rest"#, br#"""#, b"Rest")]
78    fn test_parse_bstr_field(
79        #[case] input: &[u8],
80        #[case] expected: &[u8],
81        #[case] exp_rest: &[u8],
82    ) {
83        let (rest, parsed) = super::parse_bytes_field(input).expect("must parse");
84        assert_eq!(exp_rest, rest, "expected remainder");
85        assert_eq!(expected, parsed);
86    }
87
88    #[rstest]
89    #[case::empty(br#""""#, "", b"")]
90    #[case::hello_world(br#""Hello World""#, "Hello World", b"")]
91    #[case::doublequote(br#""\"""#, r#"""#, b"")]
92    #[case::colon(br#"":""#, ":", b"")]
93    #[case::doublequote_rest(br#""\""Rest"#, r#"""#, b"Rest")]
94    fn parse_string_field(#[case] input: &[u8], #[case] expected: &str, #[case] exp_rest: &[u8]) {
95        let (rest, parsed) = super::parse_string_field(input).expect("must parse");
96        assert_eq!(exp_rest, rest, "expected remainder");
97        assert_eq!(expected, &parsed);
98    }
99
100    #[test]
101    fn parse_string_field_invalid_encoding_fail() {
102        let input: Vec<_> = vec![b'"', 0xc5, 0xc4, 0xd6, b'"'];
103
104        super::parse_string_field(&input).expect_err("must fail");
105    }
106
107    #[rstest]
108    #[case::single_foo(br#"["foo"]"#, vec!["foo".to_string()], b"")]
109    #[case::empty_list(b"[]", vec![], b"")]
110    #[case::empty_list_with_rest(b"[]blub", vec![], b"blub")]
111    fn parse_list(#[case] input: &[u8], #[case] expected: Vec<String>, #[case] exp_rest: &[u8]) {
112        let (rest, parsed) = super::parse_string_list(input).expect("must parse");
113        assert_eq!(exp_rest, rest, "expected remainder");
114        assert_eq!(expected, parsed);
115    }
116}