Skip to main content

nix_compat/derivation/
parser.rs

1//! This module constructs a [Derivation] by parsing its [ATerm][]
2//! serialization.
3//!
4//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
5
6use nom::Parser;
7use nom::bytes::streaming::tag;
8use nom::character::streaming::char as nomchar;
9use nom::combinator::{all_consuming, consumed, map_res};
10use nom::multi::{separated_list0, separated_list1};
11use nom::sequence::{delimited, preceded, separated_pair, terminated};
12use std::collections::{BTreeMap, BTreeSet, btree_map};
13use thiserror;
14
15use crate::derivation::output::OutputHash;
16use crate::derivation::parse_error::{ErrorKind, NomError, NomResult, into_nomerror};
17use crate::derivation::{Derivation, Output, OutputName, write};
18use crate::store_path::{self, StorePath};
19use crate::{aterm, nixhash};
20
21#[derive(Debug, thiserror::Error)]
22pub enum Error<I> {
23    #[error("parsing error: {0}")]
24    Parser(#[from] NomError<I>),
25    #[error("premature EOF")]
26    Incomplete,
27    #[error("validation error: {0}")]
28    Validation(super::DerivationError),
29}
30
31/// Convenience conversion of borrowed Error to an owned counterpart.
32impl From<Error<&[u8]>> for Error<Vec<u8>> {
33    fn from(value: Error<&[u8]>) -> Self {
34        match value {
35            Error::Parser(nom_error) => Error::Parser(NomError {
36                input: nom_error.input.to_vec(),
37                code: nom_error.code,
38            }),
39            Error::Incomplete => Error::Incomplete,
40            Error::Validation(e) => Error::Validation(e),
41        }
42    }
43}
44
45pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> {
46    match all_consuming(parse_derivation).parse(i) {
47        Ok((rest, derivation)) => {
48            // this shouldn't happen, as all_consuming shouldn't return.
49            debug_assert!(rest.is_empty());
50
51            // invoke validate
52            derivation.validate().map_err(Error::Validation)?;
53
54            Ok(derivation)
55        }
56        Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete),
57        Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(e.into()),
58    }
59}
60
61/// This parses a derivation in streaming fashion.
62/// If the parse is successful, it returns the leftover bytes which were not used for the parsing.
63/// If the parse is unsuccessful, either it returns incomplete or an error with the input as
64/// leftover.
65#[allow(dead_code)]
66pub fn parse_streaming(i: &[u8]) -> (Result<Derivation, Error<&[u8]>>, &[u8]) {
67    match consumed(parse_derivation).parse(i) {
68        Ok((_, (rest, derivation))) => {
69            // invoke validate
70            if let Err(e) = derivation.validate().map_err(Error::Validation) {
71                return (Err(e), i);
72            }
73
74            (Ok(derivation), rest)
75        }
76        Err(nom::Err::Incomplete(_)) => (Err(Error::Incomplete), i),
77        Err(nom::Err::Error(e) | nom::Err::Failure(e)) => (Err(e.into()), i),
78    }
79}
80
81/// Parse one output in ATerm. This is 4 string fields inside parans:
82/// output name, output path, algo (and mode), digest.
83/// Returns the output name and [Output] struct.
84fn parse_output(i: &[u8]) -> NomResult<&[u8], (OutputName, Output)> {
85    delimited(
86        nomchar('('),
87        map_res(
88            |i| {
89                (
90                    terminated(aterm::parse_string_field, nomchar(',')),
91                    terminated(aterm::parse_string_field, nomchar(',')),
92                    terminated(aterm::parse_string_field, nomchar(',')),
93                    aterm::parse_bytes_field,
94                )
95                    .parse(i)
96                    .map_err(into_nomerror)
97            },
98            |(output_name_str, output_path_str, algo_and_mode, encoded_digest)| {
99                let output_name: OutputName = output_name_str.parse().map_err(|err| {
100                    nom::Err::Failure(NomError {
101                        input: i,
102                        code: ErrorKind::InvalidOutputName(err),
103                    })
104                })?;
105
106                // This can't be an empty string in ATerms written to disk.
107                // This being an empty string can only occur during output path calculation.
108                let output_path = string_to_store_path(i, &output_path_str)?;
109
110                Ok::<_, nom::Err<NomError<&[u8]>>>((
111                    output_name,
112                    Output {
113                        path: Some(output_path),
114                        output_hash: if algo_and_mode.is_empty() && encoded_digest.is_empty() {
115                            None
116                        } else {
117                            let digest =
118                                data_encoding::HEXLOWER
119                                    .decode(&encoded_digest)
120                                    .map_err(|err| {
121                                        nom::Err::Failure(NomError {
122                                            input: i,
123                                            code: ErrorKind::NixHashError(
124                                                // TODO: do we still need the outer error?
125                                                nixhash::Error::InvalidBase16Encoding(err),
126                                            ),
127                                        })
128                                    })?;
129
130                            Some(
131                                OutputHash::from_mode_algo_and_digest(&algo_and_mode, digest)
132                                    .map_err(|err| {
133                                        nom::Err::Failure(NomError {
134                                            input: i,
135                                            code: ErrorKind::NixHashError(err),
136                                        })
137                                    })?,
138                            )
139                        },
140                    },
141                ))
142            },
143        ),
144        nomchar(')'),
145    )
146    .parse(i)
147}
148
149/// Parse multiple outputs in ATerm. This is a list of things acccepted by
150/// parse_output, and takes care of turning the (String, Output) returned from
151/// it to a BTreeMap.
152/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are
153/// 4-tuples.
154fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<OutputName, Output>> {
155    let res = delimited(
156        nomchar('['),
157        separated_list1(tag(","), parse_output),
158        nomchar(']'),
159    )
160    .parse(i);
161
162    match res {
163        Ok((rst, outputs_lst)) => {
164            let mut outputs = BTreeMap::default();
165            for (output_name, output) in outputs_lst.into_iter() {
166                if outputs.contains_key(&output_name) {
167                    return Err(nom::Err::Failure(NomError {
168                        input: i,
169                        code: ErrorKind::DuplicateMapKey(output_name.to_string()),
170                    }));
171                }
172                outputs.insert(output_name, output);
173            }
174            Ok((rst, outputs))
175        }
176        // pass regular parse errors along
177        Err(e) => Err(e),
178    }
179}
180
181fn parse_input_derivations(
182    i: &[u8],
183) -> NomResult<&[u8], BTreeMap<StorePath, BTreeSet<OutputName>>> {
184    let (i, input_derivations_list) = parse_kv(aterm::parse_string_list)(i)?;
185
186    // This is a HashMap of drv paths to a list of output names.
187    let mut input_derivations: BTreeMap<StorePath, BTreeSet<_>> = BTreeMap::new();
188
189    for (input_derivation, output_names_strings) in input_derivations_list {
190        let mut output_names = BTreeSet::<OutputName>::new();
191        for output_name_string in output_names_strings.into_iter() {
192            let output_name = OutputName::try_from(output_name_string).map_err(|err| {
193                nom::Err::Failure(NomError {
194                    input: i,
195                    code: ErrorKind::InvalidOutputName(err),
196                })
197            })?;
198
199            if output_names.contains(&output_name) {
200                return Err(nom::Err::Failure(NomError {
201                    input: i,
202                    code: ErrorKind::DuplicateInputDerivationOutputName(
203                        output_name,
204                        input_derivation,
205                    ),
206                }));
207            }
208            output_names.insert(output_name);
209        }
210
211        let input_derivation = string_to_store_path(i, input_derivation.as_str())?;
212
213        input_derivations.insert(input_derivation, output_names);
214    }
215
216    Ok((i, input_derivations))
217}
218
219fn parse_input_sources(i: &[u8]) -> NomResult<&[u8], BTreeSet<StorePath>> {
220    let (i, input_sources_lst) = aterm::parse_string_list(i).map_err(into_nomerror)?;
221
222    let mut input_sources: BTreeSet<_> = BTreeSet::new();
223    for input_source in input_sources_lst.into_iter() {
224        let input_source = string_to_store_path(i, input_source.as_str())?;
225        if input_sources.contains(&input_source) {
226            return Err(nom::Err::Failure(NomError {
227                input: i,
228                code: ErrorKind::DuplicateInputSource(input_source.to_owned()),
229            }));
230        } else {
231            input_sources.insert(input_source);
232        }
233    }
234
235    Ok((i, input_sources))
236}
237
238fn string_to_store_path<'i>(
239    i: &'i [u8],
240    path_str: &str,
241) -> Result<StorePath, nom::Err<NomError<&'i [u8]>>> {
242    let path = StorePath::from_absolute_path(path_str.as_bytes()).map_err(
243        |e: store_path::ParseStorePathError| {
244            nom::Err::Failure(NomError {
245                input: i,
246                code: e.into(),
247            })
248        },
249    )?;
250
251    #[cfg(debug_assertions)]
252    assert_eq!(path_str, path.to_absolute_path());
253
254    Ok(path)
255}
256
257pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
258    use nom::Parser;
259    preceded(
260        tag(write::DERIVATION_PREFIX),
261        delimited(
262            // inside parens
263            nomchar('('),
264            // tuple requires all errors to be of the same type, so we need to be a
265            // bit verbose here wrapping generic IResult into [NomATermResult].
266            (
267                // parse outputs
268                terminated(parse_outputs, nomchar(',')),
269                // // parse input derivations
270                terminated(parse_input_derivations, nomchar(',')),
271                // // parse input sources
272                terminated(parse_input_sources, nomchar(',')),
273                // // parse system
274                |i| {
275                    terminated(aterm::parse_string_field, nomchar(','))
276                        .parse(i)
277                        .map_err(into_nomerror)
278                },
279                // // parse builder
280                |i| {
281                    terminated(aterm::parse_string_field, nomchar(','))
282                        .parse(i)
283                        .map_err(into_nomerror)
284                },
285                // // parse arguments
286                |i| {
287                    terminated(aterm::parse_string_list, nomchar(','))
288                        .parse(i)
289                        .map_err(into_nomerror)
290                },
291                // parse environment
292                parse_kv(aterm::parse_bytes_field),
293            ),
294            nomchar(')'),
295        )
296        .map(
297            |(
298                outputs,
299                input_derivations,
300                input_sources,
301                system,
302                builder,
303                arguments,
304                environment,
305            )| {
306                Derivation {
307                    arguments,
308                    builder,
309                    environment,
310                    input_derivations,
311                    input_sources,
312                    outputs,
313                    system,
314                }
315            },
316        ),
317    )
318    .parse(i)
319}
320
321/// Parse a list of key/value pairs into a BTreeMap.
322/// The parser for the values can be passed in.
323/// In terms of ATerm, this is just a 2-tuple,
324/// but we have the additional restriction that the first element needs to be
325/// unique across all tuples.
326pub(crate) fn parse_kv<'a, V, VF>(
327    vf: VF,
328) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap<String, V>> + 'static
329where
330    VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static,
331{
332    move |i|
333    // inside brackets
334    delimited(
335        nomchar('['),
336        |ii| {
337            let res = separated_list0(
338                nomchar(','),
339                // inside parens
340                delimited(
341                    nomchar('('),
342                    separated_pair(
343                        aterm::parse_string_field,
344                        nomchar(','),
345                        vf.clone(),
346                    ),
347                    nomchar(')'),
348                ),
349            ).parse(ii).map_err(into_nomerror);
350
351            match res {
352                Ok((rest, pairs)) => {
353                    let mut kvs: BTreeMap<String, V> = BTreeMap::new();
354                    for (k, v) in pairs.into_iter() {
355                        // collect the 2-tuple to a BTreeMap,
356                        // and fail if the key was already seen before.
357                        match kvs.entry(k) {
358                            btree_map::Entry::Vacant(e) => { e.insert(v); },
359                            btree_map::Entry::Occupied(e) => {
360                                return Err(nom::Err::Failure(NomError {
361                                    input: i,
362                                    code: ErrorKind::DuplicateMapKey(e.key().clone()),
363                                }));
364                            }
365                        }
366                    }
367                    Ok((rest, kvs))
368                }
369                Err(e) => Err(e),
370            }
371        },
372        nomchar(']'),
373    ).parse(i)
374}
375
376#[cfg(test)]
377mod tests {
378    use super::OutputHash;
379    use crate::derivation::{Output, OutputHashMode, OutputName};
380    use crate::store_path::StorePathRef;
381    use crate::{
382        derivation::{NixHash, parse_error::ErrorKind},
383        store_path::StorePath,
384    };
385    use std::collections::{BTreeMap, BTreeSet};
386    use std::sync::LazyLock;
387
388    use bstr::{BString, ByteSlice};
389    use hex_literal::hex;
390    use rstest::rstest;
391
392    static EXP_MULTI_OUTPUTS: LazyLock<BTreeMap<OutputName, Output>> = LazyLock::new(|| {
393        let mut b = BTreeMap::new();
394        b.insert(
395            "lib".parse().expect("valid OutputName"),
396            Output {
397                path: Some(
398                    StorePath::from_bytes(b"2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib")
399                        .unwrap(),
400                ),
401                output_hash: None,
402            },
403        );
404        b.insert(
405            "out".parse().expect("valid OutputName"),
406            Output {
407                path: Some(
408                    StorePath::from_bytes(
409                        b"55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".as_bytes(),
410                    )
411                    .unwrap(),
412                ),
413                output_hash: None,
414            },
415        );
416        b
417    });
418
419    static EXP_AB_MAP: LazyLock<BTreeMap<String, BString>> = LazyLock::new(|| {
420        let mut b = BTreeMap::new();
421        b.insert("a".to_string(), b"1".into());
422        b.insert("b".to_string(), b"2".into());
423        b
424    });
425
426    static EXP_INPUT_DERIVATIONS_SIMPLE: LazyLock<BTreeMap<StorePath, BTreeSet<OutputName>>> =
427        LazyLock::new(|| {
428            let mut b = BTreeMap::new();
429            b.insert(
430                StorePath::from_bytes(b"8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv")
431                    .unwrap(),
432                BTreeSet::from([OutputName::out()]),
433            );
434            b.insert(
435                StorePath::from_bytes(b"p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv")
436                    .unwrap(),
437                BTreeSet::from([OutputName::out(), "lib".parse().expect("valid OutputName")]),
438            );
439            b
440        });
441
442    static EXP_INPUT_DERIVATIONS_SIMPLE_ATERM: LazyLock<String> = LazyLock::new(|| {
443        format!(
444            "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"lib\"])]",
445            "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv",
446            "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv"
447        )
448    });
449
450    static EXP_INPUT_SOURCES_SIMPLE: LazyLock<BTreeSet<String>> = LazyLock::new(|| {
451        let mut b = BTreeSet::new();
452        b.insert("/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string());
453        b.insert("/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib".to_string());
454        b
455    });
456
457    /// Ensure parsing KVs works
458    #[rstest]
459    #[case::empty(b"[]", &BTreeMap::new(), b"")]
460    #[case::simple(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b"")]
461    fn parse_kv(
462        #[case] input: &'static [u8],
463        #[case] expected: &BTreeMap<String, BString>,
464        #[case] exp_rest: &[u8],
465    ) {
466        let (rest, parsed) =
467            super::parse_kv(crate::aterm::parse_bytes_field)(input).expect("must parse");
468        assert_eq!(exp_rest, rest, "expected remainder");
469        assert_eq!(*expected, parsed);
470    }
471
472    #[rstest]
473    #[case::incomplete_empty(b"[")]
474    #[case::incomplete_simple(b"[(\"a\",\"1\")")]
475    #[case::incomplete_complicated_escape(b"[(\"a")]
476    #[case::incomplete_complicated_sep(b"[(\"a\",")]
477    #[case::incomplete_complicated_multi_escape(b"[(\"a\",\"")]
478    #[case::incomplete_complicated_multi_outer_sep(b"[(\"a\",\"b\"),")]
479    fn parse_kv_incomplete(#[case] input: &'static [u8]) {
480        assert!(matches!(
481            super::parse_kv(crate::aterm::parse_bytes_field)(input),
482            Err(nom::Err::Incomplete(_))
483        ));
484    }
485
486    /// Ensures the kv parser complains about duplicate map keys
487    #[test]
488    fn parse_kv_fail_dup_keys() {
489        let input: &'static [u8] = b"[(\"a\",\"1\"),(\"a\",\"2\")]";
490        let e = super::parse_kv(crate::aterm::parse_bytes_field)(input).expect_err("must fail");
491
492        match e {
493            nom::Err::Failure(e) => {
494                assert_eq!(ErrorKind::DuplicateMapKey("a".to_string()), e.code);
495            }
496            _ => panic!("unexpected error"),
497        }
498    }
499
500    /// Ensure parsing input derivations works.
501    #[rstest]
502    #[case::empty(b"[]", &BTreeMap::new())]
503    #[case::simple(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE)]
504    fn parse_input_derivations(
505        #[case] input: &'static [u8],
506        #[case] expected: &BTreeMap<StorePath, BTreeSet<OutputName>>,
507    ) {
508        let (rest, parsed) = super::parse_input_derivations(input).expect("must parse");
509
510        assert_eq!(expected, &parsed, "parsed mismatch");
511        assert!(rest.is_empty(), "rest must be empty");
512    }
513
514    /// Ensures the input derivation parser complains about duplicate output names
515    #[test]
516    fn parse_input_derivations_fail_dup_output_names() {
517        let input_str = format!(
518            "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"out\"])]",
519            "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv",
520            "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv"
521        );
522        let e = super::parse_input_derivations(input_str.as_bytes()).expect_err("must fail");
523
524        match e {
525            nom::Err::Failure(e) => {
526                assert_eq!(
527                    ErrorKind::DuplicateInputDerivationOutputName(
528                        "out".parse().expect("Valid OutputName"),
529                        "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(),
530                    ),
531                    e.code
532                );
533            }
534            _ => panic!("unexpected error"),
535        }
536    }
537
538    /// Ensure parsing input sources works
539    #[rstest]
540    #[case::empty(b"[]", &BTreeSet::new())]
541    #[case::simple(b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out\",\"/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib\"]", &EXP_INPUT_SOURCES_SIMPLE)]
542    fn parse_input_sources(#[case] input: &'static [u8], #[case] expected: &BTreeSet<String>) {
543        let (rest, parsed) = super::parse_input_sources(input).expect("must parse");
544
545        assert_eq!(
546            expected,
547            &parsed
548                .iter()
549                .map(StorePath::to_absolute_path)
550                .collect::<BTreeSet<_>>(),
551            "parsed mismatch"
552        );
553        assert!(rest.is_empty(), "rest must be empty");
554    }
555
556    /// Ensures the input sources parser complains about duplicate input sources
557    #[test]
558    fn parse_input_sources_fail_dup_keys() {
559        let input: &'static [u8] = b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\",\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\"]";
560        let e = super::parse_input_sources(input).expect_err("must fail");
561
562        match e {
563            nom::Err::Failure(e) => {
564                assert_eq!(
565                    ErrorKind::DuplicateInputSource(
566                        StorePathRef::from_absolute_path(
567                            "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo".as_bytes()
568                        )
569                        .unwrap()
570                        .to_owned()
571                    ),
572                    e.code
573                );
574            }
575            _ => panic!("unexpected error"),
576        }
577    }
578
579    #[rstest]
580    #[case::simple(
581        br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#,
582        (OutputName::out(), Output {
583            path: Some(
584                StorePathRef::from_absolute_path("/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".as_bytes()).unwrap().to_owned()),
585            output_hash: None
586        })
587    )]
588    #[case::fod(
589        br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#,
590        (OutputName::out(), Output {
591            path: Some(
592                StorePathRef::from_absolute_path(
593                "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".as_bytes()).unwrap().to_owned()),
594            output_hash: Some(OutputHash{
595                mode: OutputHashMode::Recursive,
596                hash: NixHash::Sha256(hex!("08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")),
597            }),
598        })
599    )]
600    fn parse_output(#[case] input: &[u8], #[case] expected: (OutputName, Output)) {
601        let (rest, parsed) = super::parse_output(input).expect("must parse");
602        assert!(rest.is_empty());
603        assert_eq!(expected, parsed);
604    }
605
606    #[rstest]
607    #[case::multi_out(
608        br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#,
609        &EXP_MULTI_OUTPUTS
610    )]
611    fn parse_outputs(#[case] input: &[u8], #[case] expected: &BTreeMap<OutputName, Output>) {
612        let (rest, parsed) = super::parse_outputs(input).expect("must parse");
613        assert!(rest.is_empty());
614        assert_eq!(*expected, parsed);
615    }
616}