quick_xml/
writer.rs

1//! Contains high-level interface for an events-based XML emitter.
2
3use std::borrow::Cow;
4use std::io::Write;
5use std::result::Result as StdResult;
6
7use crate::encoding::UTF8_BOM;
8use crate::errors::{Error, Result};
9use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};
10
11#[cfg(feature = "async-tokio")]
12mod async_tokio;
13
14/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] or [`tokio::io::AsyncWrite`] implementor.
15#[cfg(feature = "serialize")]
16use {crate::de::DeError, serde::Serialize};
17
18/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor.
19///
20/// # Examples
21///
22/// ```
23/// # use pretty_assertions::assert_eq;
24/// use quick_xml::events::{Event, BytesEnd, BytesStart};
25/// use quick_xml::reader::Reader;
26/// use quick_xml::writer::Writer;
27/// use std::io::Cursor;
28///
29/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
30/// let mut reader = Reader::from_str(xml);
31/// let mut writer = Writer::new(Cursor::new(Vec::new()));
32/// loop {
33///     match reader.read_event() {
34///         Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
35///
36///             // crates a new element ... alternatively we could reuse `e` by calling
37///             // `e.into_owned()`
38///             let mut elem = BytesStart::new("my_elem");
39///
40///             // collect existing attributes
41///             elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
42///
43///             // copy existing attributes, adds a new my-key="some value" attribute
44///             elem.push_attribute(("my-key", "some value"));
45///
46///             // writes the event to the writer
47///             assert!(writer.write_event(Event::Start(elem)).is_ok());
48///         },
49///         Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
50///             assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok());
51///         },
52///         Ok(Event::Eof) => break,
53///         // we can either move or borrow the event to write, depending on your use-case
54///         Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()),
55///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
56///     }
57/// }
58///
59/// let result = writer.into_inner().into_inner();
60/// let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
61/// assert_eq!(result, expected.as_bytes());
62/// ```
63#[derive(Clone)]
64pub struct Writer<W> {
65    /// underlying writer
66    writer: W,
67    indent: Option<Indentation>,
68}
69
70impl<W> Writer<W> {
71    /// Creates a `Writer` from a generic writer.
72    pub const fn new(inner: W) -> Writer<W> {
73        Writer {
74            writer: inner,
75            indent: None,
76        }
77    }
78
79    /// Creates a `Writer` with configured indents from a generic writer.
80    pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer<W> {
81        Writer {
82            writer: inner,
83            indent: Some(Indentation::new(indent_char, indent_size)),
84        }
85    }
86
87    /// Consumes this `Writer`, returning the underlying writer.
88    pub fn into_inner(self) -> W {
89        self.writer
90    }
91
92    /// Get a mutable reference to the underlying writer.
93    pub fn get_mut(&mut self) -> &mut W {
94        &mut self.writer
95    }
96
97    /// Get a reference to the underlying writer.
98    pub const fn get_ref(&self) -> &W {
99        &self.writer
100    }
101
102    /// Provides a simple, high-level API for writing XML elements.
103    ///
104    /// Returns an [`ElementWriter`] that simplifies setting attributes and writing
105    /// content inside the element.
106    ///
107    /// # Example
108    ///
109    /// ```
110    /// # use quick_xml::Result;
111    /// # fn main() -> Result<()> {
112    /// use quick_xml::events::{BytesStart, BytesText, Event};
113    /// use quick_xml::writer::Writer;
114    /// use quick_xml::Error;
115    /// use std::io::Cursor;
116    ///
117    /// let mut writer = Writer::new(Cursor::new(Vec::new()));
118    ///
119    /// // writes <tag attr1="value1"/>
120    /// writer.create_element("tag")
121    ///     .with_attribute(("attr1", "value1"))  // chain `with_attribute()` calls to add many attributes
122    ///     .write_empty()?;
123    ///
124    /// // writes <tag attr1="value1" attr2="value2">with some text inside</tag>
125    /// writer.create_element("tag")
126    ///     .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter())  // or add attributes from an iterator
127    ///     .write_text_content(BytesText::new("with some text inside"))?;
128    ///
129    /// // writes <tag><fruit quantity="0">apple</fruit><fruit quantity="1">orange</fruit></tag>
130    /// writer.create_element("tag")
131    ///     // We need to provide error type, because it is not named somewhere explicitly
132    ///     .write_inner_content::<_, Error>(|writer| {
133    ///         let fruits = ["apple", "orange"];
134    ///         for (quant, item) in fruits.iter().enumerate() {
135    ///             writer
136    ///                 .create_element("fruit")
137    ///                 .with_attribute(("quantity", quant.to_string().as_str()))
138    ///                 .write_text_content(BytesText::new(item))?;
139    ///         }
140    ///         Ok(())
141    ///     })?;
142    /// # Ok(())
143    /// # }
144    /// ```
145    #[must_use]
146    pub fn create_element<'a, N>(&'a mut self, name: N) -> ElementWriter<'a, W>
147    where
148        N: Into<Cow<'a, str>>,
149    {
150        ElementWriter {
151            writer: self,
152            start_tag: BytesStart::new(name),
153            state: AttributeIndent::NoneAttributesWritten,
154            spaces: Vec::new(),
155        }
156    }
157}
158
159impl<W: Write> Writer<W> {
160    /// Write a [Byte-Order-Mark] character to the document.
161    ///
162    /// # Example
163    ///
164    /// ```rust
165    /// # use quick_xml::Result;
166    /// # fn main() -> Result<()> {
167    /// use quick_xml::events::{BytesStart, BytesText, Event};
168    /// use quick_xml::writer::Writer;
169    /// use quick_xml::Error;
170    /// use std::io::Cursor;
171    ///
172    /// let mut buffer = Vec::new();
173    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
174    ///
175    /// writer.write_bom()?;
176    /// writer
177    ///     .create_element("empty")
178    ///     .with_attribute(("attr1", "value1"))
179    ///     .write_empty()
180    ///     .expect("failure");
181    ///
182    /// assert_eq!(
183    ///     std::str::from_utf8(&buffer).unwrap(),
184    ///     "\u{FEFF}<empty attr1=\"value1\"/>"
185    /// );
186    /// # Ok(())
187    /// # }
188    /// ```
189    /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM
190    pub fn write_bom(&mut self) -> Result<()> {
191        self.write(UTF8_BOM)
192    }
193
194    /// Writes the given event to the underlying writer.
195    pub fn write_event<'a, E: Into<Event<'a>>>(&mut self, event: E) -> Result<()> {
196        let mut next_should_line_break = true;
197        let result = match event.into() {
198            Event::Start(e) => {
199                let result = self.write_wrapped(b"<", &e, b">");
200                if let Some(i) = self.indent.as_mut() {
201                    i.grow();
202                }
203                result
204            }
205            Event::End(e) => {
206                if let Some(i) = self.indent.as_mut() {
207                    i.shrink();
208                }
209                self.write_wrapped(b"</", &e, b">")
210            }
211            Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"),
212            Event::Text(e) => {
213                next_should_line_break = false;
214                self.write(&e)
215            }
216            Event::Comment(e) => self.write_wrapped(b"<!--", &e, b"-->"),
217            Event::CData(e) => {
218                next_should_line_break = false;
219                self.write(b"<![CDATA[")?;
220                self.write(&e)?;
221                self.write(b"]]>")
222            }
223            Event::Decl(e) => self.write_wrapped(b"<?", &e, b"?>"),
224            Event::PI(e) => self.write_wrapped(b"<?", &e, b"?>"),
225            Event::DocType(e) => self.write_wrapped(b"<!DOCTYPE ", &e, b">"),
226            Event::Eof => Ok(()),
227        };
228        if let Some(i) = self.indent.as_mut() {
229            i.should_line_break = next_should_line_break;
230        }
231        result
232    }
233
234    /// Writes bytes
235    #[inline]
236    pub(crate) fn write(&mut self, value: &[u8]) -> Result<()> {
237        self.writer.write_all(value).map_err(Into::into)
238    }
239
240    #[inline]
241    fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> Result<()> {
242        if let Some(ref i) = self.indent {
243            if i.should_line_break {
244                self.writer.write_all(b"\n")?;
245                self.writer.write_all(i.current())?;
246            }
247        }
248        self.write(before)?;
249        self.write(value)?;
250        self.write(after)?;
251        Ok(())
252    }
253
254    /// Manually write a newline and indentation at the proper level.
255    ///
256    /// This can be used when the heuristic to line break and indent after any
257    /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly
258    /// after [`Text`].
259    ///
260    /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`].
261    ///
262    /// [`Text`]: Event::Text
263    /// [`Start`]: Event::Start
264    /// [`new_with_indent`]: Self::new_with_indent
265    pub fn write_indent(&mut self) -> Result<()> {
266        if let Some(ref i) = self.indent {
267            self.writer.write_all(b"\n")?;
268            self.writer.write_all(i.current())?;
269        }
270        Ok(())
271    }
272
273    /// Write an arbitrary serializable type
274    ///
275    /// Note: If you are attempting to write XML in a non-UTF-8 encoding, this may not
276    /// be safe to use. Rust basic types assume UTF-8 encodings.
277    ///
278    /// ```rust
279    /// # use pretty_assertions::assert_eq;
280    /// # use serde::Serialize;
281    /// # use quick_xml::events::{BytesStart, Event};
282    /// # use quick_xml::writer::Writer;
283    /// # use quick_xml::DeError;
284    /// # fn main() -> Result<(), DeError> {
285    /// #[derive(Debug, PartialEq, Serialize)]
286    /// struct MyData {
287    ///     question: String,
288    ///     answer: u32,
289    /// }
290    ///
291    /// let data = MyData {
292    ///     question: "The Ultimate Question of Life, the Universe, and Everything".into(),
293    ///     answer: 42,
294    /// };
295    ///
296    /// let mut buffer = Vec::new();
297    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
298    ///
299    /// let start = BytesStart::new("root");
300    /// let end = start.to_end();
301    ///
302    /// writer.write_event(Event::Start(start.clone()))?;
303    /// writer.write_serializable("my_data", &data)?;
304    /// writer.write_event(Event::End(end))?;
305    ///
306    /// assert_eq!(
307    ///     std::str::from_utf8(&buffer)?,
308    ///     r#"<root>
309    ///     <my_data>
310    ///         <question>The Ultimate Question of Life, the Universe, and Everything</question>
311    ///         <answer>42</answer>
312    ///     </my_data>
313    /// </root>"#
314    /// );
315    /// # Ok(())
316    /// # }
317    /// ```
318    #[cfg(feature = "serialize")]
319    pub fn write_serializable<T: Serialize>(
320        &mut self,
321        tag_name: &str,
322        content: &T,
323    ) -> std::result::Result<(), DeError> {
324        use crate::se::{Indent, Serializer};
325
326        self.write_indent()?;
327        let mut fmt = ToFmtWrite(&mut self.writer);
328        let mut serializer = Serializer::with_root(&mut fmt, Some(tag_name))?;
329
330        if let Some(indent) = &mut self.indent {
331            serializer.set_indent(Indent::Borrow(indent));
332        }
333
334        content.serialize(serializer)?;
335
336        Ok(())
337    }
338}
339
340/// Track indent inside elements state
341///
342/// ```mermaid
343/// stateDiagram-v2
344///     [*] --> NoneAttributesWritten
345///     NoneAttributesWritten --> Spaces : .with_attribute()
346///     NoneAttributesWritten --> WriteConfigured : .new_line()
347///
348///     Spaces --> Spaces : .with_attribute()
349///     Spaces --> WriteSpaces : .new_line()
350///
351///     WriteSpaces --> Spaces : .with_attribute()
352///     WriteSpaces --> WriteSpaces : .new_line()
353///
354///     Configured --> Configured : .with_attribute()
355///     Configured --> WriteConfigured : .new_line()
356///
357///     WriteConfigured --> Configured : .with_attribute()
358///     WriteConfigured --> WriteConfigured : .new_line()
359/// ```
360#[derive(Debug)]
361enum AttributeIndent {
362    /// Initial state. `ElementWriter` was just created and no attributes written yet
363    NoneAttributesWritten,
364    /// Write specified count of spaces to indent before writing attribute in `with_attribute()`
365    WriteSpaces(usize),
366    /// Keep space indent that should be used if `new_line()` would be called
367    Spaces(usize),
368    /// Write specified count of indent characters before writing attribute in `with_attribute()`
369    WriteConfigured(usize),
370    /// Keep indent that should be used if `new_line()` would be called
371    Configured(usize),
372}
373
374/// A struct to write an element. Contains methods to add attributes and inner
375/// elements to the element
376pub struct ElementWriter<'a, W> {
377    writer: &'a mut Writer<W>,
378    start_tag: BytesStart<'a>,
379    state: AttributeIndent,
380    /// Contains spaces used to write space indents of attributes
381    spaces: Vec<u8>,
382}
383
384impl<'a, W> ElementWriter<'a, W> {
385    /// Adds an attribute to this element.
386    pub fn with_attribute<'b, I>(mut self, attr: I) -> Self
387    where
388        I: Into<Attribute<'b>>,
389    {
390        self.write_attr(attr.into());
391        self
392    }
393
394    /// Add additional attributes to this element using an iterator.
395    ///
396    /// The yielded items must be convertible to [`Attribute`] using `Into`.
397    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
398    where
399        I: IntoIterator,
400        I::Item: Into<Attribute<'b>>,
401    {
402        let mut iter = attributes.into_iter();
403        if let Some(attr) = iter.next() {
404            self.write_attr(attr.into());
405            self.start_tag.extend_attributes(iter);
406        }
407        self
408    }
409
410    /// Push a new line inside an element between attributes. Note, that this
411    /// method does nothing if [`Writer`] was created without indentation support.
412    ///
413    /// # Examples
414    ///
415    /// The following code
416    ///
417    /// ```
418    /// # use quick_xml::writer::Writer;
419    /// let mut buffer = Vec::new();
420    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2);
421    /// writer
422    ///   .create_element("element")
423    ///     //.new_line() (1)
424    ///     .with_attribute(("first", "1"))
425    ///     .with_attribute(("second", "2"))
426    ///     .new_line()
427    ///     .with_attributes([
428    ///         ("third", "3"),
429    ///         ("fourth", "4"),
430    ///     ])
431    ///     //.new_line() (2)
432    ///     .write_empty();
433    /// ```
434    /// will produce the following XMLs:
435    /// ```xml
436    /// <!-- result of the code above. Spaces always is used -->
437    /// <element first="1" second="2"
438    ///          third="3" fourth="4"/>
439    ///
440    /// <!-- if uncomment only (1) - indent depends on indentation
441    ///      settings - 2 spaces here -->
442    /// <element
443    ///   first="1" second="2"
444    ///   third="3" fourth="4"/>
445    ///
446    /// <!-- if uncomment only (2). Spaces always is used  -->
447    /// <element first="1" second="2"
448    ///          third="3" fourth="4"
449    /// />
450    /// ```
451    pub fn new_line(mut self) -> Self {
452        if let Some(i) = self.writer.indent.as_mut() {
453            match self.state {
454                // .new_line() called just after .create_element().
455                // Use element indent to additionally indent attributes
456                AttributeIndent::NoneAttributesWritten => {
457                    self.state = AttributeIndent::WriteConfigured(i.indent_size)
458                }
459
460                AttributeIndent::WriteSpaces(_) => {}
461                // .new_line() called when .with_attribute() was called at least once.
462                // The spaces should be used to indent
463                // Plan saved indent
464                AttributeIndent::Spaces(indent) => {
465                    self.state = AttributeIndent::WriteSpaces(indent)
466                }
467
468                AttributeIndent::WriteConfigured(_) => {}
469                // .new_line() called when .with_attribute() was called at least once.
470                // The configured indent characters should be used to indent
471                // Plan saved indent
472                AttributeIndent::Configured(indent) => {
473                    self.state = AttributeIndent::WriteConfigured(indent)
474                }
475            }
476            self.start_tag.push_newline();
477        };
478        self
479    }
480
481    /// Writes attribute and maintain indentation state
482    fn write_attr<'b>(&mut self, attr: Attribute<'b>) {
483        if let Some(i) = self.writer.indent.as_mut() {
484            // Save the indent that we should use next time when .new_line() be called
485            self.state = match self.state {
486                // Neither .new_line() or .with_attribute() yet called
487                // If newline inside attributes will be requested, we should indent them
488                // by the length of tag name and +1 for `<` and +1 for one space
489                AttributeIndent::NoneAttributesWritten => {
490                    self.start_tag.push_attribute(attr);
491                    AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2)
492                }
493
494                // Indent was requested by previous call to .new_line(), write it
495                // New line was already written
496                AttributeIndent::WriteSpaces(indent) => {
497                    if self.spaces.len() < indent {
498                        self.spaces.resize(indent, b' ');
499                    }
500                    self.start_tag.push_indent(&self.spaces[..indent]);
501                    self.start_tag.push_attr(attr.into());
502                    AttributeIndent::Spaces(indent)
503                }
504                // .new_line() was not called, but .with_attribute() was.
505                // use the previously calculated indent
506                AttributeIndent::Spaces(indent) => {
507                    self.start_tag.push_attribute(attr);
508                    AttributeIndent::Spaces(indent)
509                }
510
511                // Indent was requested by previous call to .new_line(), write it
512                // New line was already written
513                AttributeIndent::WriteConfigured(indent) => {
514                    self.start_tag.push_indent(i.additional(indent));
515                    self.start_tag.push_attr(attr.into());
516                    AttributeIndent::Configured(indent)
517                }
518                // .new_line() was not called, but .with_attribute() was.
519                // use the previously calculated indent
520                AttributeIndent::Configured(indent) => {
521                    self.start_tag.push_attribute(attr);
522                    AttributeIndent::Configured(indent)
523                }
524            };
525        } else {
526            self.start_tag.push_attribute(attr);
527        }
528    }
529}
530
531impl<'a, W: Write> ElementWriter<'a, W> {
532    /// Write some text inside the current element.
533    pub fn write_text_content(self, text: BytesText) -> Result<&'a mut Writer<W>> {
534        self.writer
535            .write_event(Event::Start(self.start_tag.borrow()))?;
536        self.writer.write_event(Event::Text(text))?;
537        self.writer
538            .write_event(Event::End(self.start_tag.to_end()))?;
539        Ok(self.writer)
540    }
541
542    /// Write a CData event `<![CDATA[...]]>` inside the current element.
543    pub fn write_cdata_content(self, text: BytesCData) -> Result<&'a mut Writer<W>> {
544        self.writer
545            .write_event(Event::Start(self.start_tag.borrow()))?;
546        self.writer.write_event(Event::CData(text))?;
547        self.writer
548            .write_event(Event::End(self.start_tag.to_end()))?;
549        Ok(self.writer)
550    }
551
552    /// Write a processing instruction `<?...?>` inside the current element.
553    pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer<W>> {
554        self.writer
555            .write_event(Event::Start(self.start_tag.borrow()))?;
556        self.writer.write_event(Event::PI(pi))?;
557        self.writer
558            .write_event(Event::End(self.start_tag.to_end()))?;
559        Ok(self.writer)
560    }
561
562    /// Write an empty (self-closing) tag.
563    pub fn write_empty(self) -> Result<&'a mut Writer<W>> {
564        self.writer.write_event(Event::Empty(self.start_tag))?;
565        Ok(self.writer)
566    }
567
568    /// Create a new scope for writing XML inside the current element.
569    pub fn write_inner_content<F, E>(self, closure: F) -> StdResult<&'a mut Writer<W>, E>
570    where
571        F: FnOnce(&mut Writer<W>) -> StdResult<(), E>,
572        E: From<Error>,
573    {
574        self.writer
575            .write_event(Event::Start(self.start_tag.borrow()))?;
576        closure(self.writer)?;
577        self.writer
578            .write_event(Event::End(self.start_tag.to_end()))?;
579        Ok(self.writer)
580    }
581}
582#[cfg(feature = "serialize")]
583struct ToFmtWrite<T>(pub T);
584
585#[cfg(feature = "serialize")]
586impl<T> std::fmt::Write for ToFmtWrite<T>
587where
588    T: std::io::Write,
589{
590    fn write_str(&mut self, s: &str) -> std::fmt::Result {
591        self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error)
592    }
593}
594
595#[derive(Clone)]
596pub(crate) struct Indentation {
597    /// todo: this is an awkward fit as it has no impact on indentation logic, but it is
598    /// only applicable when an indentation exists. Potentially refactor later
599    should_line_break: bool,
600    /// The character code to be used for indentations (e.g. ` ` or `\t`)
601    indent_char: u8,
602    /// How many instances of the indent character ought to be used for each level of indentation
603    indent_size: usize,
604    /// Used as a cache for the bytes used for indentation
605    indents: Vec<u8>,
606    /// The current amount of indentation
607    current_indent_len: usize,
608}
609
610impl Indentation {
611    pub fn new(indent_char: u8, indent_size: usize) -> Self {
612        Self {
613            should_line_break: false,
614            indent_char,
615            indent_size,
616            indents: vec![indent_char; 128],
617            current_indent_len: 0, // invariant - needs to remain less than indents.len()
618        }
619    }
620
621    /// Increase indentation by one level
622    pub fn grow(&mut self) {
623        self.current_indent_len += self.indent_size;
624        self.ensure(self.current_indent_len);
625    }
626
627    /// Decrease indentation by one level. Do nothing, if level already zero
628    pub fn shrink(&mut self) {
629        self.current_indent_len = self.current_indent_len.saturating_sub(self.indent_size);
630    }
631
632    /// Returns indent string for current level
633    pub fn current(&self) -> &[u8] {
634        &self.indents[..self.current_indent_len]
635    }
636
637    /// Returns indent with current indent plus additional indent
638    pub fn additional(&mut self, additional_indent: usize) -> &[u8] {
639        let new_len = self.current_indent_len + additional_indent;
640        self.ensure(new_len);
641        &self.indents[..new_len]
642    }
643
644    fn ensure(&mut self, new_len: usize) {
645        if self.indents.len() < new_len {
646            self.indents.resize(new_len, self.indent_char);
647        }
648    }
649}