snix_eval/builtins/
to_xml.rs

1//! This module implements `builtins.toXML`, which is a serialisation
2//! of value information as well as internal snix state that several
3//! things in nixpkgs rely on.
4
5use bstr::ByteSlice;
6use rustc_hash::FxHashSet;
7use std::borrow::Cow;
8use std::{io::Write, rc::Rc};
9
10use crate::{ErrorKind, NixAttrs, NixContext, NixContextElement, NixString, Value};
11
12/// Recursively serialise a value to XML. The value *must* have been
13/// deep-forced before being passed to this function.
14/// On success, returns the NixContext.
15pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> {
16    // Write a literal document declaration, using C++-Nix-style
17    // single quotes.
18    writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?;
19
20    let mut emitter = XmlEmitter::new(writer);
21
22    emitter.write_open_tag("expr", &[])?;
23    value_variant_to_xml(&mut emitter, value)?;
24    emitter.write_closing_tag("expr")?;
25
26    Ok(emitter.into_context())
27}
28
29fn write_typed_value<W: Write, V: ToString>(
30    w: &mut XmlEmitter<W>,
31    name_unescaped: &str,
32    value: V,
33) -> Result<(), ErrorKind> {
34    w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?;
35    Ok(())
36}
37
38fn write_attrs_naked<W: Write>(w: &mut XmlEmitter<W>, attrs: &NixAttrs) -> Result<(), ErrorKind> {
39    for (k, v) in attrs.iter_sorted() {
40        w.write_open_tag("attr", &[("name", &k.to_str_lossy())])?;
41        value_variant_to_xml(w, v)?;
42        w.write_closing_tag("attr")?;
43    }
44
45    Ok(())
46}
47
48fn write_derivation<W: Write>(w: &mut XmlEmitter<W>, attrs: &NixAttrs) -> Result<(), ErrorKind> {
49    if let Some(drv_path) = attrs
50        .select("drvPath")
51        .and_then(|val| val.to_contextful_str().ok())
52    {
53        match attrs
54            .select("outPath")
55            .and_then(|val| val.to_contextful_str().ok())
56        {
57            Some(out_path) => w.write_open_tag(
58                "derivation",
59                &[
60                    ("drvPath", &drv_path.to_str_lossy()),
61                    ("outPath", &out_path.to_str_lossy()),
62                ],
63            )?,
64            None => w.write_open_tag("derivation", &[("drvPath", &drv_path.to_str_lossy())])?,
65        };
66        if !drv_path.is_empty() && w.drvs_seen.insert(drv_path.clone()) {
67            write_attrs_naked(w, attrs)?;
68        } else {
69            w.write_self_closing_tag("repeated", &[])?;
70        }
71        return w.write_closing_tag("derivation").map_err(Into::into);
72    };
73
74    w.write_self_closing_tag("repeated", &[])?;
75    Ok(())
76}
77
78fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> {
79    match value {
80        Value::Thunk(t) => return value_variant_to_xml(w, &t.value()),
81
82        Value::Null => {
83            w.write_open_tag("null", &[])?;
84            w.write_closing_tag("null")?;
85        }
86
87        Value::Bool(b) => return write_typed_value(w, "bool", b),
88        Value::Integer(i) => return write_typed_value(w, "int", i),
89        Value::Float(f) => return write_typed_value(w, "float", f),
90        Value::String(s) => {
91            if let Some(context) = s.context() {
92                w.extend_context(context.iter().cloned());
93            }
94            return write_typed_value(w, "string", s.to_str()?);
95        }
96        Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
97
98        Value::List(list) => {
99            w.write_open_tag("list", &[])?;
100
101            for elem in list.into_iter() {
102                value_variant_to_xml(w, elem)?;
103            }
104
105            w.write_closing_tag("list")?;
106        }
107
108        Value::Attrs(attrs) => {
109            if attrs.is_derivation() {
110                write_derivation(w, attrs)?;
111            } else {
112                w.write_open_tag("attrs", &[])?;
113                write_attrs_naked(w, attrs)?;
114                w.write_closing_tag("attrs")?;
115            }
116        }
117
118        Value::Closure(c) => {
119            w.write_open_tag("function", &[])?;
120
121            match &c.lambda.formals {
122                Some(formals) => {
123                    let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2);
124                    if formals.ellipsis {
125                        attrs.push(("ellipsis", "1"));
126                    }
127                    if let Some(name) = &formals.name {
128                        attrs.push(("name", name.as_str()));
129                    }
130
131                    w.write_open_tag("attrspat", &attrs)?;
132                    for arg in formals.arguments.iter() {
133                        w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?;
134                    }
135
136                    w.write_closing_tag("attrspat")?;
137                }
138                None => {
139                    // Use the stored parameter name
140                    w.write_self_closing_tag("varpat", &[("name", &c.lambda.param_name)])?;
141                }
142            }
143
144            w.write_closing_tag("function")?;
145        }
146
147        Value::Builtin(_) => {
148            w.write_open_tag("unevaluated", &[])?;
149            w.write_closing_tag("unevaluated")?;
150        }
151
152        Value::AttrNotFound
153        | Value::Blueprint(_)
154        | Value::DeferredUpvalue(_)
155        | Value::UnresolvedPath(_)
156        | Value::FinaliseRequest(_) => {
157            return Err(ErrorKind::SnixBug {
158                msg: "internal value variant encountered in builtins.toXML",
159                metadata: Some(Rc::new(value.clone())),
160            });
161        }
162
163        Value::Catchable(_) => {
164            panic!("Snix bug: value_to_xml() called on a value which had not been deep-forced")
165        }
166    };
167
168    Ok(())
169}
170
171/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`.
172struct XmlEmitter<W> {
173    /// The current indentation
174    cur_indent: usize,
175    writer: W,
176    context: NixContext,
177    drvs_seen: FxHashSet<NixString>,
178}
179
180impl<W: Write> XmlEmitter<W> {
181    pub fn new(writer: W) -> Self {
182        XmlEmitter {
183            cur_indent: 0,
184            writer,
185            context: Default::default(),
186            drvs_seen: Default::default(),
187        }
188    }
189
190    /// Write an open tag with the given name (which is not escaped!)
191    /// and attributes (Keys are not escaped! Only attribute values are.)
192    pub fn write_open_tag(
193        &mut self,
194        name_unescaped: &str,
195        attrs: &[(&str, &str)],
196    ) -> std::io::Result<()> {
197        self.add_indent()?;
198        self.writer.write_all(b"<")?;
199        self.writer.write_all(name_unescaped.as_bytes())?;
200        self.write_attrs_escape_vals(attrs)?;
201        self.writer.write_all(b">\n")?;
202        self.cur_indent += 2;
203        Ok(())
204    }
205
206    /// Write a self-closing open tag with the given name (which is not escaped!)
207    /// and attributes (Keys are not escaped! Only attribute values are.)
208    pub fn write_self_closing_tag(
209        &mut self,
210        name_unescaped: &str,
211        attrs: &[(&str, &str)],
212    ) -> std::io::Result<()> {
213        self.add_indent()?;
214        self.writer.write_all(b"<")?;
215        self.writer.write_all(name_unescaped.as_bytes())?;
216        self.write_attrs_escape_vals(attrs)?;
217        self.writer.write_all(b" />\n")?;
218        Ok(())
219    }
220
221    /// Write a closing tag with the given name (which is not escaped!)
222    pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> {
223        self.cur_indent -= 2;
224        self.add_indent()?;
225        self.writer.write_all(b"</")?;
226        self.writer.write_all(name_unescaped.as_bytes())?;
227        self.writer.write_all(b">\n")?;
228        Ok(())
229    }
230
231    #[inline]
232    fn add_indent(&mut self) -> std::io::Result<()> {
233        self.writer.write_all(&b" ".repeat(self.cur_indent))
234    }
235
236    /// Write an attribute list
237    fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> {
238        for (name, val) in attrs {
239            self.writer.write_all(b" ")?;
240            self.writer.write_all(name.as_bytes())?;
241            self.writer.write_all(br#"=""#)?;
242            self.writer
243                .write_all(Self::escape_attr_value(val).as_bytes())?;
244            self.writer.write_all(b"\"")?;
245        }
246        Ok(())
247    }
248
249    /// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something.
250    fn escape_attr_value(s: &str) -> Cow<str> {
251        let mut last_escape: usize = 0;
252        let mut res: Cow<str> = Cow::Borrowed("");
253        // iterating via char_indices gives us the ability to index the original string slice at character boundaries
254        for (idx, c) in s.char_indices() {
255            match Self::should_escape_char(c) {
256                None => {}
257                Some(new) => {
258                    // add characters since the last escape we did
259                    res += &s[last_escape..idx];
260                    // add the escaped value
261                    res += new;
262                    last_escape = idx + 1;
263                }
264            }
265        }
266        // we did not need to escape anything, so borrow original string
267        if last_escape == 0 {
268            Cow::Borrowed(s)
269        } else {
270            // add the remaining characters
271            res += &s[last_escape..];
272            res
273        }
274    }
275
276    fn should_escape_char(c: char) -> Option<&'static str> {
277        match c {
278            '<' => Some("&lt;"),
279            '>' => Some("&gt;"),
280            '"' => Some("&quot;"),
281            '\'' => Some("&apos;"),
282            '&' => Some("&amp;"),
283            '\n' => Some("&#xA;"),
284            '\r' => Some("&#xD;"),
285            _ => None,
286        }
287    }
288
289    /// Extends the existing context with more context elements.
290    fn extend_context<T>(&mut self, iter: T)
291    where
292        T: IntoIterator<Item = NixContextElement>,
293    {
294        self.context.extend(iter)
295    }
296
297    /// Consumes [Self] and returns the [NixContext] collected.
298    fn into_context(self) -> NixContext {
299        self.context
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use bytes::buf::Writer;
306    use pretty_assertions::assert_eq;
307
308    use crate::builtins::to_xml::XmlEmitter;
309    use std::borrow::Cow;
310
311    #[test]
312    fn xml_gen() {
313        let mut buf = Vec::new();
314        let mut x = XmlEmitter::new(&mut buf);
315        x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")])
316            .unwrap();
317        x.write_self_closing_tag("self-closing", &[("tag", "yay")])
318            .unwrap();
319        x.write_closing_tag("hello").unwrap();
320
321        assert_eq!(
322            std::str::from_utf8(&buf).unwrap(),
323            r##"<hello hi="it’s me" no="&lt;escape&gt;">
324  <self-closing tag="yay" />
325</hello>
326"##
327        );
328    }
329
330    #[test]
331    fn xml_escape() {
332        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") {
333            Cow::Owned(s) => assert_eq!(s, "ab&lt;&gt;c&amp;de".to_string(), "escape stuff"),
334            Cow::Borrowed(s) => panic!("s should be owned {s}"),
335        }
336        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") {
337            Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"),
338            Cow::Owned(s) => panic!("s should be borrowed {s}"),
339        }
340        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") {
341            Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"),
342            Cow::Owned(s) => panic!("s should be borrowed {s}"),
343        }
344        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") {
345            Cow::Owned(s) => assert_eq!(
346                s,
347                "hi!&lt;ŷ&gt;bla".to_string(),
348                "multi-byte chars are correctly used"
349            ),
350            Cow::Borrowed(s) => panic!("s should be owned {s}"),
351        }
352    }
353
354    #[test]
355    fn test_function_param_name_in_xml() {
356        use crate::Evaluation;
357
358        // Create a simple function with parameter name "myParam"
359        let code = r#"builtins.toXML (myParam: myParam)"#;
360        let eval = Evaluation::builder_pure().build();
361        let result = eval.evaluate(code, None);
362
363        assert!(
364            result.errors.is_empty(),
365            "Evaluation should succeed, but got errors: {:?}",
366            result.errors
367        );
368
369        // Get the XML string from the result
370        let xml_output = result
371            .value
372            .expect("Result should have a value")
373            .to_str()
374            .expect("Result should be a contextless string")
375            .to_string();
376
377        // Verify the XML contains varpat element
378        assert!(
379            xml_output.contains("<varpat"),
380            "XML should contain a varpat element, got: {xml_output}"
381        );
382
383        // Verify the parameter name is correctly included
384        assert!(
385            xml_output.contains("myParam"),
386            "XML should contain parameter name 'myParam', got: {xml_output}"
387        );
388
389        // Verify it contains the function structure
390        assert!(
391            xml_output.contains("<function>"),
392            "XML should contain a function element"
393        );
394    }
395}