snix_eval/builtins/
to_xml.rs

1//! This module implements `builtins.toXML`, which is a serialisation
2//! of value information as well as internal snix state that several
3//! things in nixpkgs rely on.
4
5use bstr::ByteSlice;
6use std::borrow::Cow;
7use std::{io::Write, rc::Rc};
8
9use crate::{ErrorKind, NixContext, NixContextElement, Value};
10
11/// Recursively serialise a value to XML. The value *must* have been
12/// deep-forced before being passed to this function.
13/// On success, returns the NixContext.
14pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> {
15    // Write a literal document declaration, using C++-Nix-style
16    // single quotes.
17    writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?;
18
19    let mut emitter = XmlEmitter::new(writer);
20
21    emitter.write_open_tag("expr", &[])?;
22    value_variant_to_xml(&mut emitter, value)?;
23    emitter.write_closing_tag("expr")?;
24
25    Ok(emitter.into_context())
26}
27
28fn write_typed_value<W: Write, V: ToString>(
29    w: &mut XmlEmitter<W>,
30    name_unescaped: &str,
31    value: V,
32) -> Result<(), ErrorKind> {
33    w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?;
34    Ok(())
35}
36
37fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> {
38    match value {
39        Value::Thunk(t) => return value_variant_to_xml(w, &t.value()),
40
41        Value::Null => {
42            w.write_open_tag("null", &[])?;
43            w.write_closing_tag("null")?;
44        }
45
46        Value::Bool(b) => return write_typed_value(w, "bool", b),
47        Value::Integer(i) => return write_typed_value(w, "int", i),
48        Value::Float(f) => return write_typed_value(w, "float", f),
49        Value::String(s) => {
50            if let Some(context) = s.context() {
51                w.extend_context(context.iter().cloned());
52            }
53            return write_typed_value(w, "string", s.to_str()?);
54        }
55        Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
56
57        Value::List(list) => {
58            w.write_open_tag("list", &[])?;
59
60            for elem in list.into_iter() {
61                value_variant_to_xml(w, elem)?;
62            }
63
64            w.write_closing_tag("list")?;
65        }
66
67        Value::Attrs(attrs) => {
68            w.write_open_tag("attrs", &[])?;
69
70            for elem in attrs.iter_sorted() {
71                w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?;
72                value_variant_to_xml(w, elem.1)?;
73                w.write_closing_tag("attr")?;
74            }
75
76            w.write_closing_tag("attrs")?;
77        }
78
79        Value::Closure(c) => {
80            w.write_open_tag("function", &[])?;
81
82            match &c.lambda.formals {
83                Some(formals) => {
84                    let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2);
85                    if formals.ellipsis {
86                        attrs.push(("ellipsis", "1"));
87                    }
88                    if let Some(ref name) = &formals.name {
89                        attrs.push(("name", name.as_str()));
90                    }
91
92                    w.write_open_tag("attrspat", &attrs)?;
93                    for arg in formals.arguments.iter() {
94                        w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?;
95                    }
96
97                    w.write_closing_tag("attrspat")?;
98                }
99                None => {
100                    // TODO(tazjin): snix does not currently persist function
101                    // argument names anywhere (whereas we do for formals, as
102                    // that is required for other runtime behaviour). Because of
103                    // this the implementation here is fake, always returning
104                    // the same argument name.
105                    //
106                    // If we don't want to persist the data, we can re-parse the
107                    // AST from the spans of the lambda's bytecode and figure it
108                    // out that way, but it needs some investigating.
109                    w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?;
110                }
111            }
112
113            w.write_closing_tag("function")?;
114        }
115
116        Value::Builtin(_) => {
117            w.write_open_tag("unevaluated", &[])?;
118            w.write_closing_tag("unevaluated")?;
119        }
120
121        Value::AttrNotFound
122        | Value::Blueprint(_)
123        | Value::DeferredUpvalue(_)
124        | Value::UnresolvedPath(_)
125        | Value::FinaliseRequest(_) => {
126            return Err(ErrorKind::SnixBug {
127                msg: "internal value variant encountered in builtins.toXML",
128                metadata: Some(Rc::new(value.clone())),
129            })
130        }
131
132        Value::Catchable(_) => {
133            panic!("Snix bug: value_to_xml() called on a value which had not been deep-forced")
134        }
135    };
136
137    Ok(())
138}
139
140/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`.
141struct XmlEmitter<W> {
142    /// The current indentation
143    cur_indent: usize,
144    writer: W,
145    context: NixContext,
146}
147
148impl<W: Write> XmlEmitter<W> {
149    pub fn new(writer: W) -> Self {
150        XmlEmitter {
151            cur_indent: 0,
152            writer,
153            context: Default::default(),
154        }
155    }
156
157    /// Write an open tag with the given name (which is not escaped!)
158    /// and attributes (Keys are not escaped! Only attribute values are.)
159    pub fn write_open_tag(
160        &mut self,
161        name_unescaped: &str,
162        attrs: &[(&str, &str)],
163    ) -> std::io::Result<()> {
164        self.add_indent()?;
165        self.writer.write_all(b"<")?;
166        self.writer.write_all(name_unescaped.as_bytes())?;
167        self.write_attrs_escape_vals(attrs)?;
168        self.writer.write_all(b">\n")?;
169        self.cur_indent += 2;
170        Ok(())
171    }
172
173    /// Write a self-closing open tag with the given name (which is not escaped!)
174    /// and attributes (Keys are not escaped! Only attribute values are.)
175    pub fn write_self_closing_tag(
176        &mut self,
177        name_unescaped: &str,
178        attrs: &[(&str, &str)],
179    ) -> std::io::Result<()> {
180        self.add_indent()?;
181        self.writer.write_all(b"<")?;
182        self.writer.write_all(name_unescaped.as_bytes())?;
183        self.write_attrs_escape_vals(attrs)?;
184        self.writer.write_all(b" />\n")?;
185        Ok(())
186    }
187
188    /// Write a closing tag with the given name (which is not escaped!)
189    pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> {
190        self.cur_indent -= 2;
191        self.add_indent()?;
192        self.writer.write_all(b"</")?;
193        self.writer.write_all(name_unescaped.as_bytes())?;
194        self.writer.write_all(b">\n")?;
195        Ok(())
196    }
197
198    #[inline]
199    fn add_indent(&mut self) -> std::io::Result<()> {
200        self.writer.write_all(&b" ".repeat(self.cur_indent))
201    }
202
203    /// Write an attribute list
204    fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> {
205        for (name, val) in attrs {
206            self.writer.write_all(b" ")?;
207            self.writer.write_all(name.as_bytes())?;
208            self.writer.write_all(br#"=""#)?;
209            self.writer
210                .write_all(Self::escape_attr_value(val).as_bytes())?;
211            self.writer.write_all(b"\"")?;
212        }
213        Ok(())
214    }
215
216    /// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something.
217    fn escape_attr_value(s: &str) -> Cow<str> {
218        let mut last_escape: usize = 0;
219        let mut res: Cow<str> = Cow::Borrowed("");
220        // iterating via char_indices gives us the ability to index the original string slice at character boundaries
221        for (idx, c) in s.char_indices() {
222            match Self::should_escape_char(c) {
223                None => {}
224                Some(new) => {
225                    // add characters since the last escape we did
226                    res += &s[last_escape..idx];
227                    // add the escaped value
228                    res += new;
229                    last_escape = idx + 1;
230                }
231            }
232        }
233        // we did not need to escape anything, so borrow original string
234        if last_escape == 0 {
235            Cow::Borrowed(s)
236        } else {
237            // add the remaining characters
238            res += &s[last_escape..];
239            res
240        }
241    }
242
243    fn should_escape_char(c: char) -> Option<&'static str> {
244        match c {
245            '<' => Some("&lt;"),
246            '>' => Some("&gt;"),
247            '"' => Some("&quot;"),
248            '\'' => Some("&apos;"),
249            '&' => Some("&amp;"),
250            '\n' => Some("&#xA;"),
251            '\r' => Some("&#xD;"),
252            _ => None,
253        }
254    }
255
256    /// Extends the existing context with more context elements.
257    fn extend_context<T>(&mut self, iter: T)
258    where
259        T: IntoIterator<Item = NixContextElement>,
260    {
261        self.context.extend(iter)
262    }
263
264    /// Consumes [Self] and returns the [NixContext] collected.
265    fn into_context(self) -> NixContext {
266        self.context
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use bytes::buf::Writer;
273    use pretty_assertions::assert_eq;
274
275    use crate::builtins::to_xml::XmlEmitter;
276    use std::borrow::Cow;
277
278    #[test]
279    fn xml_gen() {
280        let mut buf = Vec::new();
281        let mut x = XmlEmitter::new(&mut buf);
282        x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")])
283            .unwrap();
284        x.write_self_closing_tag("self-closing", &[("tag", "yay")])
285            .unwrap();
286        x.write_closing_tag("hello").unwrap();
287
288        assert_eq!(
289            std::str::from_utf8(&buf).unwrap(),
290            r##"<hello hi="it’s me" no="&lt;escape&gt;">
291  <self-closing tag="yay" />
292</hello>
293"##
294        );
295    }
296
297    #[test]
298    fn xml_escape() {
299        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") {
300            Cow::Owned(s) => assert_eq!(s, "ab&lt;&gt;c&amp;de".to_string(), "escape stuff"),
301            Cow::Borrowed(s) => panic!("s should be owned {}", s),
302        }
303        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") {
304            Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"),
305            Cow::Owned(s) => panic!("s should be borrowed {}", s),
306        }
307        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") {
308            Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"),
309            Cow::Owned(s) => panic!("s should be borrowed {}", s),
310        }
311        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") {
312            Cow::Owned(s) => assert_eq!(
313                s,
314                "hi!&lt;ŷ&gt;bla".to_string(),
315                "multi-byte chars are correctly used"
316            ),
317            Cow::Borrowed(s) => panic!("s should be owned {}", s),
318        }
319    }
320}