quick_xml/de/mod.rs
1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//! - [Basics](#basics)
19//! - [Optional attributes and elements](#optional-attributes-and-elements)
20//! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
23//! - [Composition Rules](#composition-rules)
24//! - [Enum Representations](#enum-representations)
25//! - [Normal enum variant](#normal-enum-variant)
26//! - [`$text` enum variant](#text-enum-variant)
27//! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
28//! - [`$text`](#text)
29//! - [`$value`](#value)
30//! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
31//! - [Structs and sequences of structs](#structs-and-sequences-of-structs)
32//! - [Enums and sequences of enums](#enums-and-sequences-of-enums)
33//! - [Frequently Used Patterns](#frequently-used-patterns)
34//! - [`<element>` lists](#element-lists)
35//! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
36//! - [Internally Tagged Enums](#internally-tagged-enums)
37//!
38//!
39//!
40//! Mapping XML to Rust types
41//! =========================
42//!
43//! Type names are never considered when deserializing, so you can name your
44//! types as you wish. Other general rules:
45//! - `struct` field name could be represented in XML only as an attribute name
46//! or an element name;
47//! - `enum` variant name could be represented in XML only as an attribute name
48//! or an element name;
49//! - the unit struct, unit type `()` and unit enum variant can be deserialized
50//! from any valid XML content:
51//! - attribute and element names;
52//! - attribute and element values;
53//! - text or CDATA content (including mixed text and CDATA content).
54//!
55//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
56//!
57//! NOTE: All tests are marked with an `ignore` option, even though they do
58//! compile. This is because rustdoc marks such blocks with an information
59//! icon unlike `no_run` blocks.
60//!
61//! </div>
62//!
63//! <table>
64//! <thead>
65//! <tr><th colspan="2">
66//!
67//! ## Basics
68//!
69//! </th></tr>
70//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
71//! </thead>
72//! <tbody style="vertical-align:top;">
73//! <tr>
74//! <td>
75//! Content of attributes and text / CDATA content of elements (including mixed
76//! text and CDATA content):
77//!
78//! ```xml
79//! <... ...="content" />
80//! ```
81//! ```xml
82//! <...>content</...>
83//! ```
84//! ```xml
85//! <...><![CDATA[content]]></...>
86//! ```
87//! ```xml
88//! <...>text<![CDATA[cdata]]>text</...>
89//! ```
90//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
91//! </td>
92//! <td>
93//!
94//! You can use any type that can be deserialized from an `&str`, for example:
95//! - [`String`] and [`&str`]
96//! - [`Cow<str>`]
97//! - [`u32`], [`f32`] and other numeric types
98//! - `enum`s, like
99//! ```
100//! # use pretty_assertions::assert_eq;
101//! # use serde::Deserialize;
102//! # #[derive(Debug, PartialEq)]
103//! #[derive(Deserialize)]
104//! enum Language {
105//! Rust,
106//! Cpp,
107//! #[serde(other)]
108//! Other,
109//! }
110//! # #[derive(Debug, PartialEq, Deserialize)]
111//! # struct X { #[serde(rename = "$text")] x: Language }
112//! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
113//! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
114//! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
115//! ```
116//!
117//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
118//!
119//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
120//! such as `&str`, is possible only if you parse document in the UTF-8
121//! encoding and content does not contain entity references such as `&`,
122//! or character references such as `
`, as well as text content represented
123//! by one piece of [text] or [CDATA] element.
124//! </div>
125//! <!-- TODO: document an error type returned -->
126//!
127//! [text]: Event::Text
128//! [CDATA]: Event::CData
129//! </td>
130//! </tr>
131//! <!-- 2 ===================================================================================== -->
132//! <tr>
133//! <td>
134//!
135//! Content of attributes and text / CDATA content of elements (including mixed
136//! text and CDATA content), which represents a space-delimited lists, as
137//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
138//!
139//! ```xml
140//! <... ...="element1 element2 ..." />
141//! ```
142//! ```xml
143//! <...>
144//! element1
145//! element2
146//! ...
147//! </...>
148//! ```
149//! ```xml
150//! <...><![CDATA[
151//! element1
152//! element2
153//! ...
154//! ]]></...>
155//! ```
156//!
157//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
158//! </td>
159//! <td>
160//!
161//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
162//!
163//! ```
164//! type List = Vec<u32>;
165//! ```
166//!
167//! See the next row to learn where in your struct definition you should
168//! use that type.
169//!
170//! According to the XML Schema specification, delimiters for elements is one
171//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
172//!
173//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
174//!
175//! NOTE: according to the XML Schema restrictions, you cannot escape those
176//! white-space characters, so list elements will _never_ contain them.
177//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
178//! values which looks like identifiers in many languages, for example, `item`,
179//! `some_item` or `some-item`, so that shouldn't be a problem.
180//!
181//! NOTE: according to the XML Schema specification, list elements can be
182//! delimited only by spaces. Other delimiters (for example, commas) are not
183//! allowed.
184//!
185//! </div>
186//!
187//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
188//! </td>
189//! </tr>
190//! <!-- 3 ===================================================================================== -->
191//! <tr>
192//! <td>
193//! A typical XML with attributes. The root tag name does not matter:
194//!
195//! ```xml
196//! <any-tag one="..." two="..."/>
197//! ```
198//! </td>
199//! <td>
200//!
201//! A structure where each XML attribute is mapped to a field with a name
202//! starting with `@`. Because Rust identifiers do not permit the `@` character,
203//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
204//! The name of the struct itself does not matter:
205//!
206//! ```
207//! # use serde::Deserialize;
208//! # type T = ();
209//! # type U = ();
210//! // Get both attributes
211//! # #[derive(Debug, PartialEq)]
212//! #[derive(Deserialize)]
213//! struct AnyName {
214//! #[serde(rename = "@one")]
215//! one: T,
216//!
217//! #[serde(rename = "@two")]
218//! two: U,
219//! }
220//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
221//! ```
222//! ```
223//! # use serde::Deserialize;
224//! # type T = ();
225//! // Get only the one attribute, ignore the other
226//! # #[derive(Debug, PartialEq)]
227//! #[derive(Deserialize)]
228//! struct AnyName {
229//! #[serde(rename = "@one")]
230//! one: T,
231//! }
232//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
235//! ```
236//! ```
237//! # use serde::Deserialize;
238//! // Ignore all attributes
239//! // You can also use the `()` type (unit type)
240//! # #[derive(Debug, PartialEq)]
241//! #[derive(Deserialize)]
242//! struct AnyName;
243//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
246//! ```
247//!
248//! All these structs can be used to deserialize from an XML on the
249//! left side depending on amount of information that you want to get.
250//! Of course, you can combine them with elements extractor structs (see below).
251//!
252//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
253//!
254//! NOTE: XML allows you to have an attribute and an element with the same name
255//! inside the one element. quick-xml deals with that by prepending a `@` prefix
256//! to the name of attributes.
257//! </div>
258//! </td>
259//! </tr>
260//! <!-- 4 ===================================================================================== -->
261//! <tr>
262//! <td>
263//! A typical XML with child elements. The root tag name does not matter:
264//!
265//! ```xml
266//! <any-tag>
267//! <one>...</one>
268//! <two>...</two>
269//! </any-tag>
270//! ```
271//! </td>
272//! <td>
273//! A structure where each XML child element is mapped to the field.
274//! Each element name becomes a name of field. The name of the struct itself
275//! does not matter:
276//!
277//! ```
278//! # use serde::Deserialize;
279//! # type T = ();
280//! # type U = ();
281//! // Get both elements
282//! # #[derive(Debug, PartialEq)]
283//! #[derive(Deserialize)]
284//! struct AnyName {
285//! one: T,
286//! two: U,
287//! }
288//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
289//! #
290//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
292//! ```
293//! ```
294//! # use serde::Deserialize;
295//! # type T = ();
296//! // Get only the one element, ignore the other
297//! # #[derive(Debug, PartialEq)]
298//! #[derive(Deserialize)]
299//! struct AnyName {
300//! one: T,
301//! }
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
304//! ```
305//! ```
306//! # use serde::Deserialize;
307//! // Ignore all elements
308//! // You can also use the `()` type (unit type)
309//! # #[derive(Debug, PartialEq)]
310//! #[derive(Deserialize)]
311//! struct AnyName;
312//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
316//! ```
317//!
318//! All these structs can be used to deserialize from an XML on the
319//! left side depending on amount of information that you want to get.
320//! Of course, you can combine them with attributes extractor structs (see above).
321//!
322//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
323//!
324//! NOTE: XML allows you to have an attribute and an element with the same name
325//! inside the one element. quick-xml deals with that by prepending a `@` prefix
326//! to the name of attributes.
327//! </div>
328//! </td>
329//! </tr>
330//! <!-- 5 ===================================================================================== -->
331//! <tr>
332//! <td>
333//! An XML with an attribute and a child element named equally:
334//!
335//! ```xml
336//! <any-tag field="...">
337//! <field>...</field>
338//! </any-tag>
339//! ```
340//! </td>
341//! <td>
342//!
343//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
344//! for an attribute:
345//!
346//! ```
347//! # use pretty_assertions::assert_eq;
348//! # use serde::Deserialize;
349//! # type T = ();
350//! # type U = ();
351//! # #[derive(Debug, PartialEq)]
352//! #[derive(Deserialize)]
353//! struct AnyName {
354//! #[serde(rename = "@field")]
355//! attribute: T,
356//! field: U,
357//! }
358//! # assert_eq!(
359//! # AnyName { attribute: (), field: () },
360//! # quick_xml::de::from_str(r#"
361//! # <any-tag field="...">
362//! # <field>...</field>
363//! # </any-tag>
364//! # "#).unwrap(),
365//! # );
366//! ```
367//! </td>
368//! </tr>
369//! <!-- ======================================================================================= -->
370//! <tr><th colspan="2">
371//!
372//! ## Optional attributes and elements
373//!
374//! </th></tr>
375//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
376//! <!-- 6 ===================================================================================== -->
377//! <tr>
378//! <td>
379//! An optional XML attribute that you want to capture.
380//! The root tag name does not matter:
381//!
382//! ```xml
383//! <any-tag optional="..."/>
384//! ```
385//! ```xml
386//! <any-tag/>
387//! ```
388//! </td>
389//! <td>
390//!
391//! A structure with an optional field, renamed according to the requirements
392//! for attributes:
393//!
394//! ```
395//! # use pretty_assertions::assert_eq;
396//! # use serde::Deserialize;
397//! # type T = ();
398//! # #[derive(Debug, PartialEq)]
399//! #[derive(Deserialize)]
400//! struct AnyName {
401//! #[serde(rename = "@optional")]
402//! optional: Option<T>,
403//! }
404//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
405//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
406//! ```
407//! When the XML attribute is present, type `T` will be deserialized from
408//! an attribute value (which is a string). Note, that if `T = String` or other
409//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
410//! represents the missed attribute:
411//! ```xml
412//! <any-tag optional="..."/><!-- Some("...") -->
413//! <any-tag optional=""/> <!-- Some("") -->
414//! <any-tag/> <!-- None -->
415//! ```
416//! </td>
417//! </tr>
418//! <!-- 7 ===================================================================================== -->
419//! <tr>
420//! <td>
421//! An optional XML elements that you want to capture.
422//! The root tag name does not matter:
423//!
424//! ```xml
425//! <any-tag/>
426//! <optional>...</optional>
427//! </any-tag>
428//! ```
429//! ```xml
430//! <any-tag/>
431//! <optional/>
432//! </any-tag>
433//! ```
434//! ```xml
435//! <any-tag/>
436//! ```
437//! </td>
438//! <td>
439//!
440//! A structure with an optional field:
441//!
442//! ```
443//! # use pretty_assertions::assert_eq;
444//! # use serde::Deserialize;
445//! # type T = ();
446//! # #[derive(Debug, PartialEq)]
447//! #[derive(Deserialize)]
448//! struct AnyName {
449//! optional: Option<T>,
450//! }
451//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
452//! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
453//! ```
454//! When the XML element is present, type `T` will be deserialized from an
455//! element (which is a string or a multi-mapping -- i.e. mapping which can have
456//! duplicated keys).
457//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
458//!
459//! Currently some edge cases exists described in the issue [#497].
460//! </div>
461//! </td>
462//! </tr>
463//! <!-- ======================================================================================= -->
464//! <tr><th colspan="2">
465//!
466//! ## Choices (`xs:choice` XML Schema type)
467//!
468//! </th></tr>
469//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
470//! <!-- 8 ===================================================================================== -->
471//! <tr>
472//! <td>
473//! An XML with different root tag names, as well as text / CDATA content:
474//!
475//! ```xml
476//! <one field1="...">...</one>
477//! ```
478//! ```xml
479//! <two>
480//! <field2>...</field2>
481//! </two>
482//! ```
483//! ```xml
484//! Text <![CDATA[or (mixed)
485//! CDATA]]> content
486//! ```
487//! </td>
488//! <td>
489//!
490//! An enum where each variant has the name of a possible root tag. The name of
491//! the enum itself does not matter.
492//!
493//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
494//!
495//! All these structs can be used to deserialize from any XML on the
496//! left side depending on amount of information that you want to get:
497//!
498//! ```
499//! # use pretty_assertions::assert_eq;
500//! # use serde::Deserialize;
501//! # type T = ();
502//! # type U = ();
503//! # #[derive(Debug, PartialEq)]
504//! #[derive(Deserialize)]
505//! #[serde(rename_all = "snake_case")]
506//! enum AnyName {
507//! One { #[serde(rename = "@field1")] field1: T },
508//! Two { field2: U },
509//!
510//! /// Use unit variant, if you do not care of a content.
511//! /// You can use tuple variant if you want to parse
512//! /// textual content as an xs:list.
513//! /// Struct variants are will pass a string to the
514//! /// struct enum variant visitor, which typically
515//! /// returns Err(Custom)
516//! #[serde(rename = "$text")]
517//! Text(String),
518//! }
519//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
520//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
521//! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
522//! ```
523//! ```
524//! # use pretty_assertions::assert_eq;
525//! # use serde::Deserialize;
526//! # type T = ();
527//! # #[derive(Debug, PartialEq)]
528//! #[derive(Deserialize)]
529//! struct Two {
530//! field2: T,
531//! }
532//! # #[derive(Debug, PartialEq)]
533//! #[derive(Deserialize)]
534//! #[serde(rename_all = "snake_case")]
535//! enum AnyName {
536//! // `field1` content discarded
537//! One,
538//! Two(Two),
539//! #[serde(rename = "$text")]
540//! Text,
541//! }
542//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
543//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
544//! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
545//! ```
546//! ```
547//! # use pretty_assertions::assert_eq;
548//! # use serde::Deserialize;
549//! # #[derive(Debug, PartialEq)]
550//! #[derive(Deserialize)]
551//! #[serde(rename_all = "snake_case")]
552//! enum AnyName {
553//! One,
554//! // the <two> and textual content will be mapped to this
555//! #[serde(other)]
556//! Other,
557//! }
558//! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
559//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
560//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
561//! ```
562//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
563//!
564//! NOTE: You should have variants for all possible tag names in your enum
565//! or have an `#[serde(other)]` variant.
566//! <!-- TODO: document an error type if that requirement is violated -->
567//! </div>
568//! </td>
569//! </tr>
570//! <!-- 9 ===================================================================================== -->
571//! <tr>
572//! <td>
573//!
574//! `<xs:choice>` embedded in the other element, and at the same time you want
575//! to get access to other attributes that can appear in the same container
576//! (`<any-tag>`). Also this case can be described, as if you want to choose
577//! Rust enum variant based on a tag name:
578//!
579//! ```xml
580//! <any-tag field="...">
581//! <one>...</one>
582//! </any-tag>
583//! ```
584//! ```xml
585//! <any-tag field="...">
586//! <two>...</two>
587//! </any-tag>
588//! ```
589//! ```xml
590//! <any-tag field="...">
591//! Text <![CDATA[or (mixed)
592//! CDATA]]> content
593//! </any-tag>
594//! ```
595//! </td>
596//! <td>
597//!
598//! A structure with a field which type is an `enum`.
599//!
600//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
601//!
602//! Names of the enum, struct, and struct field with `Choice` type does not matter:
603//!
604//! ```
605//! # use pretty_assertions::assert_eq;
606//! # use serde::Deserialize;
607//! # type T = ();
608//! # #[derive(Debug, PartialEq)]
609//! #[derive(Deserialize)]
610//! #[serde(rename_all = "snake_case")]
611//! enum Choice {
612//! One,
613//! Two,
614//!
615//! /// Use unit variant, if you do not care of a content.
616//! /// You can use tuple variant if you want to parse
617//! /// textual content as an xs:list.
618//! /// Struct variants are will pass a string to the
619//! /// struct enum variant visitor, which typically
620//! /// returns Err(Custom)
621//! #[serde(rename = "$text")]
622//! Text(String),
623//! }
624//! # #[derive(Debug, PartialEq)]
625//! #[derive(Deserialize)]
626//! struct AnyName {
627//! #[serde(rename = "@field")]
628//! field: T,
629//!
630//! #[serde(rename = "$value")]
631//! any_name: Choice,
632//! }
633//! # assert_eq!(
634//! # AnyName { field: (), any_name: Choice::One },
635//! # quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
636//! # );
637//! # assert_eq!(
638//! # AnyName { field: (), any_name: Choice::Two },
639//! # quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
640//! # );
641//! # assert_eq!(
642//! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) },
643//! # quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
644//! # );
645//! ```
646//! </td>
647//! </tr>
648//! <!-- 10 ==================================================================================== -->
649//! <tr>
650//! <td>
651//!
652//! `<xs:choice>` embedded in the other element, and at the same time you want
653//! to get access to other elements that can appear in the same container
654//! (`<any-tag>`). Also this case can be described, as if you want to choose
655//! Rust enum variant based on a tag name:
656//!
657//! ```xml
658//! <any-tag>
659//! <field>...</field>
660//! <one>...</one>
661//! </any-tag>
662//! ```
663//! ```xml
664//! <any-tag>
665//! <two>...</two>
666//! <field>...</field>
667//! </any-tag>
668//! ```
669//! </td>
670//! <td>
671//!
672//! A structure with a field which type is an `enum`.
673//!
674//! Names of the enum, struct, and struct field with `Choice` type does not matter:
675//!
676//! ```
677//! # use pretty_assertions::assert_eq;
678//! # use serde::Deserialize;
679//! # type T = ();
680//! # #[derive(Debug, PartialEq)]
681//! #[derive(Deserialize)]
682//! #[serde(rename_all = "snake_case")]
683//! enum Choice {
684//! One,
685//! Two,
686//! }
687//! # #[derive(Debug, PartialEq)]
688//! #[derive(Deserialize)]
689//! struct AnyName {
690//! field: T,
691//!
692//! #[serde(rename = "$value")]
693//! any_name: Choice,
694//! }
695//! # assert_eq!(
696//! # AnyName { field: (), any_name: Choice::One },
697//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
698//! # );
699//! # assert_eq!(
700//! # AnyName { field: (), any_name: Choice::Two },
701//! # quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
702//! # );
703//! ```
704//!
705//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
706//!
707//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
708//! variant, element `<field>` will be mapped to the `field` and not to the enum
709//! variant.
710//! </div>
711//!
712//! </td>
713//! </tr>
714//! <!-- 11 ==================================================================================== -->
715//! <tr>
716//! <td>
717//!
718//! `<xs:choice>` encapsulated in other element with a fixed name:
719//!
720//! ```xml
721//! <any-tag field="...">
722//! <choice>
723//! <one>...</one>
724//! </choice>
725//! </any-tag>
726//! ```
727//! ```xml
728//! <any-tag field="...">
729//! <choice>
730//! <two>...</two>
731//! </choice>
732//! </any-tag>
733//! ```
734//! </td>
735//! <td>
736//!
737//! A structure with a field of an intermediate type with one field of `enum` type.
738//! Actually, this example is not necessary, because you can construct it by yourself
739//! using the composition rules that were described above. However the XML construction
740//! described here is very common, so it is shown explicitly.
741//!
742//! Names of the enum and struct does not matter:
743//!
744//! ```
745//! # use pretty_assertions::assert_eq;
746//! # use serde::Deserialize;
747//! # type T = ();
748//! # #[derive(Debug, PartialEq)]
749//! #[derive(Deserialize)]
750//! #[serde(rename_all = "snake_case")]
751//! enum Choice {
752//! One,
753//! Two,
754//! }
755//! # #[derive(Debug, PartialEq)]
756//! #[derive(Deserialize)]
757//! struct Holder {
758//! #[serde(rename = "$value")]
759//! any_name: Choice,
760//! }
761//! # #[derive(Debug, PartialEq)]
762//! #[derive(Deserialize)]
763//! struct AnyName {
764//! #[serde(rename = "@field")]
765//! field: T,
766//!
767//! choice: Holder,
768//! }
769//! # assert_eq!(
770//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
771//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
772//! # );
773//! # assert_eq!(
774//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
775//! # quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
776//! # );
777//! ```
778//! </td>
779//! </tr>
780//! <!-- 12 ==================================================================================== -->
781//! <tr>
782//! <td>
783//!
784//! `<xs:choice>` encapsulated in other element with a fixed name:
785//!
786//! ```xml
787//! <any-tag>
788//! <field>...</field>
789//! <choice>
790//! <one>...</one>
791//! </choice>
792//! </any-tag>
793//! ```
794//! ```xml
795//! <any-tag>
796//! <choice>
797//! <two>...</two>
798//! </choice>
799//! <field>...</field>
800//! </any-tag>
801//! ```
802//! </td>
803//! <td>
804//!
805//! A structure with a field of an intermediate type with one field of `enum` type.
806//! Actually, this example is not necessary, because you can construct it by yourself
807//! using the composition rules that were described above. However the XML construction
808//! described here is very common, so it is shown explicitly.
809//!
810//! Names of the enum and struct does not matter:
811//!
812//! ```
813//! # use pretty_assertions::assert_eq;
814//! # use serde::Deserialize;
815//! # type T = ();
816//! # #[derive(Debug, PartialEq)]
817//! #[derive(Deserialize)]
818//! #[serde(rename_all = "snake_case")]
819//! enum Choice {
820//! One,
821//! Two,
822//! }
823//! # #[derive(Debug, PartialEq)]
824//! #[derive(Deserialize)]
825//! struct Holder {
826//! #[serde(rename = "$value")]
827//! any_name: Choice,
828//! }
829//! # #[derive(Debug, PartialEq)]
830//! #[derive(Deserialize)]
831//! struct AnyName {
832//! field: T,
833//!
834//! choice: Holder,
835//! }
836//! # assert_eq!(
837//! # AnyName { field: (), choice: Holder { any_name: Choice::One } },
838//! # quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
839//! # );
840//! # assert_eq!(
841//! # AnyName { field: (), choice: Holder { any_name: Choice::Two } },
842//! # quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
843//! # );
844//! ```
845//! </td>
846//! </tr>
847//! <!-- ======================================================================================== -->
848//! <tr><th colspan="2">
849//!
850//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
851//!
852//! </th></tr>
853//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
854//! <!-- 13 ==================================================================================== -->
855//! <tr>
856//! <td>
857//! A sequence inside of a tag without a dedicated name:
858//!
859//! ```xml
860//! <any-tag/>
861//! ```
862//! ```xml
863//! <any-tag>
864//! <item/>
865//! </any-tag>
866//! ```
867//! ```xml
868//! <any-tag>
869//! <item/>
870//! <item/>
871//! <item/>
872//! </any-tag>
873//! ```
874//! </td>
875//! <td>
876//!
877//! A structure with a field which is a sequence type, for example, [`Vec`].
878//! Because XML syntax does not distinguish between empty sequences and missed
879//! elements, we should indicate that on the Rust side, because serde will require
880//! that field `item` exists. You can do that in two possible ways:
881//!
882//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
883//! ```
884//! # use pretty_assertions::assert_eq;
885//! # use serde::Deserialize;
886//! # type Item = ();
887//! # #[derive(Debug, PartialEq)]
888//! #[derive(Deserialize)]
889//! struct AnyName {
890//! #[serde(default)]
891//! item: Vec<Item>,
892//! }
893//! # assert_eq!(
894//! # AnyName { item: vec![] },
895//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
896//! # );
897//! # assert_eq!(
898//! # AnyName { item: vec![()] },
899//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
900//! # );
901//! # assert_eq!(
902//! # AnyName { item: vec![(), (), ()] },
903//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
904//! # );
905//! ```
906//!
907//! Use the [`Option`]. In that case inner array will always contains at least one
908//! element after deserialization:
909//! ```ignore
910//! # use pretty_assertions::assert_eq;
911//! # use serde::Deserialize;
912//! # type Item = ();
913//! # #[derive(Debug, PartialEq)]
914//! #[derive(Deserialize)]
915//! struct AnyName {
916//! item: Option<Vec<Item>>,
917//! }
918//! # assert_eq!(
919//! # AnyName { item: None },
920//! # quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
921//! # );
922//! # assert_eq!(
923//! # AnyName { item: Some(vec![()]) },
924//! # quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
925//! # );
926//! # assert_eq!(
927//! # AnyName { item: Some(vec![(), (), ()]) },
928//! # quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
929//! # );
930//! ```
931//!
932//! See also [Frequently Used Patterns](#element-lists).
933//!
934//! [field]: https://serde.rs/field-attrs.html#default
935//! [struct]: https://serde.rs/container-attrs.html#default
936//! </td>
937//! </tr>
938//! <!-- 14 ==================================================================================== -->
939//! <tr>
940//! <td>
941//! A sequence with a strict order, probably with mixed content
942//! (text / CDATA and tags):
943//!
944//! ```xml
945//! <one>...</one>
946//! text
947//! <![CDATA[cdata]]>
948//! <two>...</two>
949//! <one>...</one>
950//! ```
951//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
952//!
953//! NOTE: this is just an example for showing mapping. XML does not allow
954//! multiple root tags -- you should wrap the sequence into a tag.
955//! </div>
956//! </td>
957//! <td>
958//!
959//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
960//! Each element of the tuple should be able to be deserialized from the nested
961//! element content (`...`), except the enum types which would be deserialized
962//! from the full element (`<one>...</one>`), so they could use the element name
963//! to choose the right variant:
964//!
965//! ```
966//! # use pretty_assertions::assert_eq;
967//! # use serde::Deserialize;
968//! # type One = ();
969//! # type Two = ();
970//! # /*
971//! type One = ...;
972//! type Two = ...;
973//! # */
974//! # #[derive(Debug, PartialEq)]
975//! #[derive(Deserialize)]
976//! struct AnyName(One, String, Two, One);
977//! # assert_eq!(
978//! # AnyName((), "text cdata".into(), (), ()),
979//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
980//! # );
981//! ```
982//! ```
983//! # use pretty_assertions::assert_eq;
984//! # use serde::Deserialize;
985//! # #[derive(Debug, PartialEq)]
986//! #[derive(Deserialize)]
987//! #[serde(rename_all = "snake_case")]
988//! enum Choice {
989//! One,
990//! }
991//! # type Two = ();
992//! # /*
993//! type Two = ...;
994//! # */
995//! type AnyName = (Choice, String, Two, Choice);
996//! # assert_eq!(
997//! # (Choice::One, "text cdata".to_string(), (), Choice::One),
998//! # quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
999//! # );
1000//! ```
1001//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1002//!
1003//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1004//! so you cannot have two adjacent string types in your sequence.
1005//!
1006//! NOTE: In the case that the list might contain tags that are overlapped with
1007//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1008//! </div>
1009//! </td>
1010//! </tr>
1011//! <!-- 15 ==================================================================================== -->
1012//! <tr>
1013//! <td>
1014//! A sequence with a non-strict order, probably with a mixed content
1015//! (text / CDATA and tags).
1016//!
1017//! ```xml
1018//! <one>...</one>
1019//! text
1020//! <![CDATA[cdata]]>
1021//! <two>...</two>
1022//! <one>...</one>
1023//! ```
1024//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1025//!
1026//! NOTE: this is just an example for showing mapping. XML does not allow
1027//! multiple root tags -- you should wrap the sequence into a tag.
1028//! </div>
1029//! </td>
1030//! <td>
1031//! A homogeneous sequence of elements with a fixed or dynamic size:
1032//!
1033//! ```
1034//! # use pretty_assertions::assert_eq;
1035//! # use serde::Deserialize;
1036//! # #[derive(Debug, PartialEq)]
1037//! #[derive(Deserialize)]
1038//! #[serde(rename_all = "snake_case")]
1039//! enum Choice {
1040//! One,
1041//! Two,
1042//! #[serde(other)]
1043//! Other,
1044//! }
1045//! type AnyName = [Choice; 4];
1046//! # assert_eq!(
1047//! # [Choice::One, Choice::Other, Choice::Two, Choice::One],
1048//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1049//! # );
1050//! ```
1051//! ```
1052//! # use pretty_assertions::assert_eq;
1053//! # use serde::Deserialize;
1054//! # #[derive(Debug, PartialEq)]
1055//! #[derive(Deserialize)]
1056//! #[serde(rename_all = "snake_case")]
1057//! enum Choice {
1058//! One,
1059//! Two,
1060//! #[serde(rename = "$text")]
1061//! Other(String),
1062//! }
1063//! type AnyName = Vec<Choice>;
1064//! # assert_eq!(
1065//! # vec![
1066//! # Choice::One,
1067//! # Choice::Other("text cdata".into()),
1068//! # Choice::Two,
1069//! # Choice::One,
1070//! # ],
1071//! # quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1072//! # );
1073//! ```
1074//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1075//!
1076//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1077//! so you cannot have two adjacent string types in your sequence.
1078//! </div>
1079//! </td>
1080//! </tr>
1081//! <!-- 16 ==================================================================================== -->
1082//! <tr>
1083//! <td>
1084//! A sequence with a strict order, probably with a mixed content,
1085//! (text and tags) inside of the other element:
1086//!
1087//! ```xml
1088//! <any-tag attribute="...">
1089//! <one>...</one>
1090//! text
1091//! <![CDATA[cdata]]>
1092//! <two>...</two>
1093//! <one>...</one>
1094//! </any-tag>
1095//! ```
1096//! </td>
1097//! <td>
1098//!
1099//! A structure where all child elements mapped to the one field which have
1100//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1101//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1102//!
1103//! You MUST specify `#[serde(rename = "$value")]` on that field:
1104//!
1105//! ```
1106//! # use pretty_assertions::assert_eq;
1107//! # use serde::Deserialize;
1108//! # type One = ();
1109//! # type Two = ();
1110//! # /*
1111//! type One = ...;
1112//! type Two = ...;
1113//! # */
1114//!
1115//! # #[derive(Debug, PartialEq)]
1116//! #[derive(Deserialize)]
1117//! struct AnyName {
1118//! #[serde(rename = "@attribute")]
1119//! # attribute: (),
1120//! # /*
1121//! attribute: ...,
1122//! # */
1123//! // Does not (yet?) supported by the serde
1124//! // https://github.com/serde-rs/serde/issues/1905
1125//! // #[serde(flatten)]
1126//! #[serde(rename = "$value")]
1127//! any_name: (One, String, Two, One),
1128//! }
1129//! # assert_eq!(
1130//! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1131//! # quick_xml::de::from_str("\
1132//! # <any-tag attribute='...'>\
1133//! # <one>...</one>\
1134//! # text \
1135//! # <![CDATA[cdata]]>\
1136//! # <two>...</two>\
1137//! # <one>...</one>\
1138//! # </any-tag>"
1139//! # ).unwrap(),
1140//! # );
1141//! ```
1142//! ```
1143//! # use pretty_assertions::assert_eq;
1144//! # use serde::Deserialize;
1145//! # type One = ();
1146//! # type Two = ();
1147//! # /*
1148//! type One = ...;
1149//! type Two = ...;
1150//! # */
1151//!
1152//! # #[derive(Debug, PartialEq)]
1153//! #[derive(Deserialize)]
1154//! struct NamedTuple(One, String, Two, One);
1155//!
1156//! # #[derive(Debug, PartialEq)]
1157//! #[derive(Deserialize)]
1158//! struct AnyName {
1159//! #[serde(rename = "@attribute")]
1160//! # attribute: (),
1161//! # /*
1162//! attribute: ...,
1163//! # */
1164//! // Does not (yet?) supported by the serde
1165//! // https://github.com/serde-rs/serde/issues/1905
1166//! // #[serde(flatten)]
1167//! #[serde(rename = "$value")]
1168//! any_name: NamedTuple,
1169//! }
1170//! # assert_eq!(
1171//! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1172//! # quick_xml::de::from_str("\
1173//! # <any-tag attribute='...'>\
1174//! # <one>...</one>\
1175//! # text \
1176//! # <![CDATA[cdata]]>\
1177//! # <two>...</two>\
1178//! # <one>...</one>\
1179//! # </any-tag>"
1180//! # ).unwrap(),
1181//! # );
1182//! ```
1183//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1184//!
1185//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1186//! so you cannot have two adjacent string types in your sequence.
1187//! </div>
1188//! </td>
1189//! </tr>
1190//! <!-- 17 ==================================================================================== -->
1191//! <tr>
1192//! <td>
1193//! A sequence with a non-strict order, probably with a mixed content
1194//! (text / CDATA and tags) inside of the other element:
1195//!
1196//! ```xml
1197//! <any-tag>
1198//! <one>...</one>
1199//! text
1200//! <![CDATA[cdata]]>
1201//! <two>...</two>
1202//! <one>...</one>
1203//! </any-tag>
1204//! ```
1205//! </td>
1206//! <td>
1207//!
1208//! A structure where all child elements mapped to the one field which have
1209//! a homogeneous sequential type: array-like container. A container type `T`
1210//! should be able to be deserialized from the nested element content (`...`),
1211//! except if it is an enum type which would be deserialized from the full
1212//! element (`<one>...</one>`).
1213//!
1214//! You MUST specify `#[serde(rename = "$value")]` on that field:
1215//!
1216//! ```
1217//! # use pretty_assertions::assert_eq;
1218//! # use serde::Deserialize;
1219//! # #[derive(Debug, PartialEq)]
1220//! #[derive(Deserialize)]
1221//! #[serde(rename_all = "snake_case")]
1222//! enum Choice {
1223//! One,
1224//! Two,
1225//! #[serde(rename = "$text")]
1226//! Other(String),
1227//! }
1228//! # #[derive(Debug, PartialEq)]
1229//! #[derive(Deserialize)]
1230//! struct AnyName {
1231//! #[serde(rename = "@attribute")]
1232//! # attribute: (),
1233//! # /*
1234//! attribute: ...,
1235//! # */
1236//! // Does not (yet?) supported by the serde
1237//! // https://github.com/serde-rs/serde/issues/1905
1238//! // #[serde(flatten)]
1239//! #[serde(rename = "$value")]
1240//! any_name: [Choice; 4],
1241//! }
1242//! # assert_eq!(
1243//! # AnyName { attribute: (), any_name: [
1244//! # Choice::One,
1245//! # Choice::Other("text cdata".into()),
1246//! # Choice::Two,
1247//! # Choice::One,
1248//! # ] },
1249//! # quick_xml::de::from_str("\
1250//! # <any-tag attribute='...'>\
1251//! # <one>...</one>\
1252//! # text \
1253//! # <![CDATA[cdata]]>\
1254//! # <two>...</two>\
1255//! # <one>...</one>\
1256//! # </any-tag>"
1257//! # ).unwrap(),
1258//! # );
1259//! ```
1260//! ```
1261//! # use pretty_assertions::assert_eq;
1262//! # use serde::Deserialize;
1263//! # #[derive(Debug, PartialEq)]
1264//! #[derive(Deserialize)]
1265//! #[serde(rename_all = "snake_case")]
1266//! enum Choice {
1267//! One,
1268//! Two,
1269//! #[serde(rename = "$text")]
1270//! Other(String),
1271//! }
1272//! # #[derive(Debug, PartialEq)]
1273//! #[derive(Deserialize)]
1274//! struct AnyName {
1275//! #[serde(rename = "@attribute")]
1276//! # attribute: (),
1277//! # /*
1278//! attribute: ...,
1279//! # */
1280//! // Does not (yet?) supported by the serde
1281//! // https://github.com/serde-rs/serde/issues/1905
1282//! // #[serde(flatten)]
1283//! #[serde(rename = "$value")]
1284//! any_name: Vec<Choice>,
1285//! }
1286//! # assert_eq!(
1287//! # AnyName { attribute: (), any_name: vec![
1288//! # Choice::One,
1289//! # Choice::Other("text cdata".into()),
1290//! # Choice::Two,
1291//! # Choice::One,
1292//! # ] },
1293//! # quick_xml::de::from_str("\
1294//! # <any-tag attribute='...'>\
1295//! # <one>...</one>\
1296//! # text \
1297//! # <![CDATA[cdata]]>\
1298//! # <two>...</two>\
1299//! # <one>...</one>\
1300//! # </any-tag>"
1301//! # ).unwrap(),
1302//! # );
1303//! ```
1304//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1305//!
1306//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1307//! so you cannot have two adjacent string types in your sequence.
1308//! </div>
1309//! </td>
1310//! </tr>
1311//! </tbody>
1312//! </table>
1313//!
1314//!
1315//! Generate Rust types from XML
1316//! ============================
1317//!
1318//! To speed up the creation of Rust types that represent a given XML file you can
1319//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1320//! It provides a standalone binary and a Rust library that parses one or more XML files
1321//! and generates a collection of structs that are compatible with quick_xml::de.
1322//!
1323//!
1324//!
1325//! Composition Rules
1326//! =================
1327//!
1328//! The XML format is very different from other formats supported by `serde`.
1329//! One such difference it is how data in the serialized form is related to
1330//! the Rust type. Usually each byte in the data can be associated only with
1331//! one field in the data structure. However, XML is an exception.
1332//!
1333//! For example, took this XML:
1334//!
1335//! ```xml
1336//! <any>
1337//! <key attr="value"/>
1338//! </any>
1339//! ```
1340//!
1341//! and try to deserialize it to the struct `AnyName`:
1342//!
1343//! ```no_run
1344//! # use serde::Deserialize;
1345//! #[derive(Deserialize)]
1346//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1347//! // Used data: ^^^^^^^^^^^^^^^^^^^
1348//! key: Inner, // Inner calls `deserialize_struct` on `<key attr="value"/>`
1349//! // Used data: ^^^^^^^^^^^^
1350//! }
1351//! #[derive(Deserialize)]
1352//! struct Inner {
1353//! #[serde(rename = "@attr")]
1354//! attr: String, // String calls `deserialize_string` on `value`
1355//! // Used data: ^^^^^
1356//! }
1357//! ```
1358//!
1359//! Comments shows what methods of a [`Deserializer`] called by each struct
1360//! `deserialize` method and which input their seen. **Used data** shows, what
1361//! content is actually used for deserializing. As you see, name of the inner
1362//! `<key>` tag used both as a map key / outer struct field name and as part
1363//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1364//! by it).
1365//!
1366//!
1367//!
1368//! Enum Representations
1369//! ====================
1370//!
1371//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1372//! `$value` fields. A normal representation is compatible with serde's adjacent
1373//! and internal tags feature -- tag for adjacently and internally tagged enums
1374//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1375//! using [`Deserializer::deserialize_enum`].
1376//!
1377//! Use those simple rules to remember, how enum would be represented in XML:
1378//! - In `$value` field the representation is always the same as top-level representation;
1379//! - In `$text` field the representation is always the same as in normal field,
1380//! but surrounding tags with field name are removed;
1381//! - In normal field the representation is always contains a tag with a field name.
1382//!
1383//! Normal enum variant
1384//! -------------------
1385//!
1386//! To model an `xs:choice` XML construct use `$value` field.
1387//! To model a top-level `xs:choice` just use the enum type.
1388//!
1389//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1390//! |-------|-----------------------------------------|---------------------|---------------------|
1391//! |Unit |`<Unit/>` |`<field>Unit</field>`|`Unit` |
1392//! |Newtype|`<Newtype>42</Newtype>` |Err(Custom) [^0] |Err(Custom) [^0] |
1393//! |Tuple |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0] |Err(Custom) [^0] |
1394//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0] |Err(Custom) [^0] |
1395//!
1396//! `$text` enum variant
1397//! --------------------
1398//!
1399//! |Kind |Top-level and in `$value` field |In normal field |In `$text` field |
1400//! |-------|-----------------------------------------|---------------------|---------------------|
1401//! |Unit |_(empty)_ |`<field/>` |_(empty)_ |
1402//! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1403//! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1404//! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] |
1405//!
1406//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1407//! error will be returned, but custom deserialize implementation can successfully deserialize
1408//! value from a string which will be passed to it.
1409//!
1410//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1411//! because it clash with `Unit` representation in normal field.
1412//!
1413//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1414//! because it clash with `Unit` representation in `$text` field.
1415//!
1416//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1417//! because it clash with `Unit` representation in normal field.
1418//!
1419//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1420//! because it clash with `Unit` representation in `$text` field.
1421//!
1422//!
1423//!
1424//! Difference between `$text` and `$value` special names
1425//! =====================================================
1426//!
1427//! quick-xml supports two special names for fields -- `$text` and `$value`.
1428//! Although they may seem the same, there is a distinction. Two different
1429//! names is required mostly for serialization, because quick-xml should know
1430//! how you want to serialize certain constructs, which could be represented
1431//! through XML in multiple different ways.
1432//!
1433//! The only difference is in how complex types and sequences are serialized.
1434//! If you doubt which one you should select, begin with [`$value`](#value).
1435//!
1436//! ## `$text`
1437//! `$text` is used when you want to write your XML as a text or a CDATA content.
1438//! More formally, field with that name represents simple type definition with
1439//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1440//! as described in the [specification].
1441//!
1442//! As a result, not all types of such fields can be serialized. Only serialization
1443//! of following types are supported:
1444//! - all primitive types (strings, numbers, booleans)
1445//! - unit variants of enumerations (serializes to a name of a variant)
1446//! - newtypes (delegates serialization to inner type)
1447//! - [`Option`] of above (`None` serializes to nothing)
1448//! - sequences (including tuples and tuple variants of enumerations) of above,
1449//! excluding `None` and empty string elements (because it will not be possible
1450//! to deserialize them back). The elements are separated by space(s)
1451//! - unit type `()` and unit structs (serializes to nothing)
1452//!
1453//! Complex types, such as structs and maps, are not supported in this field.
1454//! If you want them, you should use `$value`.
1455//!
1456//! Sequences serialized to a space-delimited string, that is why only certain
1457//! types are allowed in this mode:
1458//!
1459//! ```
1460//! # use serde::{Deserialize, Serialize};
1461//! # use quick_xml::de::from_str;
1462//! # use quick_xml::se::to_string;
1463//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1464//! struct AnyName {
1465//! #[serde(rename = "$text")]
1466//! field: Vec<usize>,
1467//! }
1468//!
1469//! let obj = AnyName { field: vec![1, 2, 3] };
1470//! let xml = to_string(&obj).unwrap();
1471//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1472//!
1473//! let object: AnyName = from_str(&xml).unwrap();
1474//! assert_eq!(object, obj);
1475//! ```
1476//!
1477//! ## `$value`
1478//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1479//!
1480//! NOTE: a name `#content` would better explain the purpose of that field,
1481//! but `$value` is used for compatibility with other XML serde crates, which
1482//! uses that name. This will allow you to switch XML crates more smoothly if required.
1483//! </div>
1484//!
1485//! Representation of primitive types in `$value` does not differ from their
1486//! representation in `$text` field. The difference is how sequences are serialized.
1487//! `$value` serializes each sequence item as a separate XML element. The name
1488//! of that element is taken from serialized type, and because only `enum`s provide
1489//! such name (their variant name), only they should be used for such fields.
1490//!
1491//! `$value` fields does not support `struct` types with fields, the serialization
1492//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1493//! type `()` serializing to nothing and can be deserialized from any content.
1494//!
1495//! Serialization and deserialization of `$value` field performed as usual, except
1496//! that name for an XML element will be given by the serialized type, instead of
1497//! field. The latter allow to serialize enumerated types, where variant is encoded
1498//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1499//!
1500//! In the example below, field will be serialized as `<field/>`, because elements
1501//! get their names from the field name. It cannot be deserialized, because `Enum`
1502//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1503//!
1504//! ```
1505//! # use serde::{Deserialize, Serialize};
1506//! # use pretty_assertions::assert_eq;
1507//! # #[derive(PartialEq, Debug)]
1508//! #[derive(Deserialize, Serialize)]
1509//! enum Enum { A, B, C }
1510//!
1511//! # #[derive(PartialEq, Debug)]
1512//! #[derive(Deserialize, Serialize)]
1513//! struct AnyName {
1514//! // <field>A</field>, <field>B</field>, or <field>C</field>
1515//! field: Enum,
1516//! }
1517//! # assert_eq!(
1518//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1519//! # "<AnyName><field>A</field></AnyName>",
1520//! # );
1521//! # assert_eq!(
1522//! # AnyName { field: Enum::B },
1523//! # quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1524//! # );
1525//! ```
1526//!
1527//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1528//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1529//! deserialize it from the same elements:
1530//!
1531//! ```
1532//! # use serde::{Deserialize, Serialize};
1533//! # use pretty_assertions::assert_eq;
1534//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1535//! # enum Enum { A, B, C }
1536//! #
1537//! # #[derive(PartialEq, Debug)]
1538//! #[derive(Deserialize, Serialize)]
1539//! struct AnyName {
1540//! // <A/>, <B/> or <C/>
1541//! #[serde(rename = "$value")]
1542//! field: Enum,
1543//! }
1544//! # assert_eq!(
1545//! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1546//! # "<AnyName><A/></AnyName>",
1547//! # );
1548//! # assert_eq!(
1549//! # AnyName { field: Enum::B },
1550//! # quick_xml::de::from_str("<root><B/></root>").unwrap(),
1551//! # );
1552//! ```
1553//!
1554//! ### Primitives and sequences of primitives
1555//!
1556//! Sequences serialized to a list of elements. Note, that types that does not
1557//! produce their own tag (i. e. primitives) are written as is, without delimiters:
1558//!
1559//! ```
1560//! # use serde::{Deserialize, Serialize};
1561//! # use pretty_assertions::assert_eq;
1562//! # use quick_xml::de::from_str;
1563//! # use quick_xml::se::to_string;
1564//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1565//! struct AnyName {
1566//! #[serde(rename = "$value")]
1567//! field: Vec<usize>,
1568//! }
1569//!
1570//! let obj = AnyName { field: vec![1, 2, 3] };
1571//! let xml = to_string(&obj).unwrap();
1572//! // Note, that types that does not produce their own tag are written as is!
1573//! assert_eq!(xml, "<AnyName>123</AnyName>");
1574//!
1575//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1576//! assert_eq!(object, AnyName { field: vec![123] });
1577//!
1578//! // `1 2 3` is mapped to a single `usize` element
1579//! // It is impossible to deserialize list of primitives to such field
1580//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1581//! ```
1582//!
1583//! A particular case of that example is a string `$value` field, which probably
1584//! would be a most used example of that attribute:
1585//!
1586//! ```
1587//! # use serde::{Deserialize, Serialize};
1588//! # use pretty_assertions::assert_eq;
1589//! # use quick_xml::de::from_str;
1590//! # use quick_xml::se::to_string;
1591//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1592//! struct AnyName {
1593//! #[serde(rename = "$value")]
1594//! field: String,
1595//! }
1596//!
1597//! let obj = AnyName { field: "content".to_string() };
1598//! let xml = to_string(&obj).unwrap();
1599//! assert_eq!(xml, "<AnyName>content</AnyName>");
1600//! ```
1601//!
1602//! ### Structs and sequences of structs
1603//!
1604//! Note, that structures do not have a serializable name as well (name of the
1605//! type is never used), so it is impossible to serialize non-unit struct or
1606//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1607//! are serialized as empty string, because units itself serializing
1608//! to nothing:
1609//!
1610//! ```
1611//! # use serde::{Deserialize, Serialize};
1612//! # use pretty_assertions::assert_eq;
1613//! # use quick_xml::de::from_str;
1614//! # use quick_xml::se::to_string;
1615//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1616//! struct Unit;
1617//!
1618//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1619//! struct AnyName {
1620//! // #[serde(default)] is required to deserialization of empty lists
1621//! // This is a general note, not related to $value
1622//! #[serde(rename = "$value", default)]
1623//! field: Vec<Unit>,
1624//! }
1625//!
1626//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1627//! let xml = to_string(&obj).unwrap();
1628//! assert_eq!(xml, "<AnyName/>");
1629//!
1630//! let object: AnyName = from_str("<AnyName/>").unwrap();
1631//! assert_eq!(object, AnyName { field: vec![] });
1632//!
1633//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1634//! assert_eq!(object, AnyName { field: vec![] });
1635//!
1636//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1637//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1638//! ```
1639//!
1640//! ### Enums and sequences of enums
1641//!
1642//! Enumerations uses the variant name as an element name:
1643//!
1644//! ```
1645//! # use serde::{Deserialize, Serialize};
1646//! # use pretty_assertions::assert_eq;
1647//! # use quick_xml::de::from_str;
1648//! # use quick_xml::se::to_string;
1649//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1650//! struct AnyName {
1651//! #[serde(rename = "$value")]
1652//! field: Vec<Enum>,
1653//! }
1654//!
1655//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1656//! enum Enum { A, B, C }
1657//!
1658//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1659//! let xml = to_string(&obj).unwrap();
1660//! assert_eq!(
1661//! xml,
1662//! "<AnyName>\
1663//! <A/>\
1664//! <B/>\
1665//! <C/>\
1666//! </AnyName>"
1667//! );
1668//!
1669//! let object: AnyName = from_str(&xml).unwrap();
1670//! assert_eq!(object, obj);
1671//! ```
1672//!
1673//! ----------------------------------------------------------------------------
1674//!
1675//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1676//! that is not enforced, so you can theoretically have both, but you should
1677//! avoid that.
1678//!
1679//!
1680//!
1681//! Frequently Used Patterns
1682//! ========================
1683//!
1684//! Some XML constructs used so frequent, that it is worth to document the recommended
1685//! way to represent them in the Rust. The sections below describes them.
1686//!
1687//! `<element>` lists
1688//! -----------------
1689//! Many XML formats wrap lists of elements in the additional container,
1690//! although this is not required by the XML rules:
1691//!
1692//! ```xml
1693//! <root>
1694//! <field1/>
1695//! <field2/>
1696//! <list><!-- Container -->
1697//! <element/>
1698//! <element/>
1699//! <element/>
1700//! </list>
1701//! <field3/>
1702//! </root>
1703//! ```
1704//! In this case, there is a great desire to describe this XML in this way:
1705//! ```
1706//! /// Represents <element/>
1707//! type Element = ();
1708//!
1709//! /// Represents <root>...</root>
1710//! struct AnyName {
1711//! // Incorrect
1712//! list: Vec<Element>,
1713//! }
1714//! ```
1715//! This will not work, because potentially `<list>` element can have attributes
1716//! and other elements inside. You should define the struct for the `<list>`
1717//! explicitly, as you do that in the XSD for that XML:
1718//! ```
1719//! /// Represents <element/>
1720//! type Element = ();
1721//!
1722//! /// Represents <root>...</root>
1723//! struct AnyName {
1724//! // Correct
1725//! list: List,
1726//! }
1727//! /// Represents <list>...</list>
1728//! struct List {
1729//! element: Vec<Element>,
1730//! }
1731//! ```
1732//!
1733//! If you want to simplify your API, you could write a simple function for unwrapping
1734//! inner list and apply it via [`deserialize_with`]:
1735//!
1736//! ```
1737//! # use pretty_assertions::assert_eq;
1738//! use quick_xml::de::from_str;
1739//! use serde::{Deserialize, Deserializer};
1740//!
1741//! /// Represents <element/>
1742//! type Element = ();
1743//!
1744//! /// Represents <root>...</root>
1745//! #[derive(Deserialize, Debug, PartialEq)]
1746//! struct AnyName {
1747//! #[serde(deserialize_with = "unwrap_list")]
1748//! list: Vec<Element>,
1749//! }
1750//!
1751//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1752//! where
1753//! D: Deserializer<'de>,
1754//! {
1755//! /// Represents <list>...</list>
1756//! #[derive(Deserialize)]
1757//! struct List {
1758//! // default allows empty list
1759//! #[serde(default)]
1760//! element: Vec<Element>,
1761//! }
1762//! Ok(List::deserialize(deserializer)?.element)
1763//! }
1764//!
1765//! assert_eq!(
1766//! AnyName { list: vec![(), (), ()] },
1767//! from_str("
1768//! <root>
1769//! <list>
1770//! <element/>
1771//! <element/>
1772//! <element/>
1773//! </list>
1774//! </root>
1775//! ").unwrap(),
1776//! );
1777//! ```
1778//!
1779//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1780//!
1781//! Overlapped (Out-of-Order) Elements
1782//! ----------------------------------
1783//! In the case that the list might contain tags that are overlapped with
1784//! tags that do not correspond to the list (this is a usual case in XML
1785//! documents) like this:
1786//! ```xml
1787//! <any-name>
1788//! <item/>
1789//! <another-item/>
1790//! <item/>
1791//! <item/>
1792//! </any-name>
1793//! ```
1794//! you should enable the [`overlapped-lists`] feature to make it possible
1795//! to deserialize this to:
1796//! ```no_run
1797//! # use serde::Deserialize;
1798//! #[derive(Deserialize)]
1799//! #[serde(rename_all = "kebab-case")]
1800//! struct AnyName {
1801//! item: Vec<()>,
1802//! another_item: (),
1803//! }
1804//! ```
1805//!
1806//!
1807//! Internally Tagged Enums
1808//! -----------------------
1809//! [Tagged enums] are currently not supported because of an issue in the Serde
1810//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1811//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1812//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1813//! or implementing [`Deserialize`], but this can get very tedious very fast for
1814//! files with large amounts of tagged enums. To help with this issue quick-xml
1815//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1816//! macro documentation for details.
1817//!
1818//!
1819//! [`overlapped-lists`]: ../index.html#overlapped-lists
1820//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1821//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1822//! [#497]: https://github.com/tafia/quick-xml/issues/497
1823//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1824//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1825//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1826//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1827//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1828//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1829//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1830
1831// Macros should be defined before the modules that using them
1832// Also, macros should be imported before using them
1833use serde::serde_if_integer128;
1834
1835macro_rules! deserialize_type {
1836 ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1837 fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1838 where
1839 V: Visitor<'de>,
1840 {
1841 // No need to unescape because valid integer representations cannot be escaped
1842 let text = self.read_string()?;
1843 visitor.$visit(text.parse()?)
1844 }
1845 };
1846}
1847
1848/// Implement deserialization methods for scalar types, such as numbers, strings,
1849/// byte arrays, booleans and identifiers.
1850macro_rules! deserialize_primitives {
1851 ($($mut:tt)?) => {
1852 deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
1853 deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
1854 deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
1855 deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
1856
1857 deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
1858 deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
1859 deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
1860 deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
1861
1862 serde_if_integer128! {
1863 deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
1864 deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
1865 }
1866
1867 deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
1868 deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
1869
1870 fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1871 where
1872 V: Visitor<'de>,
1873 {
1874 let text = self.read_string()?;
1875
1876 str2bool(&text, visitor)
1877 }
1878
1879 /// Character represented as [strings](#method.deserialize_str).
1880 #[inline]
1881 fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1882 where
1883 V: Visitor<'de>,
1884 {
1885 self.deserialize_str(visitor)
1886 }
1887
1888 fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1889 where
1890 V: Visitor<'de>,
1891 {
1892 let text = self.read_string()?;
1893 match text {
1894 Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1895 Cow::Owned(string) => visitor.visit_string(string),
1896 }
1897 }
1898
1899 /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1900 #[inline]
1901 fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1902 where
1903 V: Visitor<'de>,
1904 {
1905 self.deserialize_str(visitor)
1906 }
1907
1908 /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
1909 #[inline]
1910 fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
1911 where
1912 V: Visitor<'de>,
1913 {
1914 self.deserialize_any(visitor)
1915 }
1916
1917 /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1918 #[inline]
1919 fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1920 where
1921 V: Visitor<'de>,
1922 {
1923 self.deserialize_bytes(visitor)
1924 }
1925
1926 /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1927 #[inline]
1928 fn deserialize_unit_struct<V>(
1929 self,
1930 _name: &'static str,
1931 visitor: V,
1932 ) -> Result<V::Value, DeError>
1933 where
1934 V: Visitor<'de>,
1935 {
1936 self.deserialize_unit(visitor)
1937 }
1938
1939 /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1940 #[inline]
1941 fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1942 where
1943 V: Visitor<'de>,
1944 {
1945 self.deserialize_seq(visitor)
1946 }
1947
1948 /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1949 #[inline]
1950 fn deserialize_tuple_struct<V>(
1951 self,
1952 _name: &'static str,
1953 len: usize,
1954 visitor: V,
1955 ) -> Result<V::Value, DeError>
1956 where
1957 V: Visitor<'de>,
1958 {
1959 self.deserialize_tuple(len, visitor)
1960 }
1961
1962 /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1963 /// with empty name and fields.
1964 #[inline]
1965 fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1966 where
1967 V: Visitor<'de>,
1968 {
1969 self.deserialize_struct("", &[], visitor)
1970 }
1971
1972 /// Identifiers represented as [strings](#method.deserialize_str).
1973 #[inline]
1974 fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1975 where
1976 V: Visitor<'de>,
1977 {
1978 self.deserialize_str(visitor)
1979 }
1980
1981 /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1982 #[inline]
1983 fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1984 where
1985 V: Visitor<'de>,
1986 {
1987 self.deserialize_unit(visitor)
1988 }
1989 };
1990}
1991
1992mod key;
1993mod map;
1994mod resolver;
1995mod simple_type;
1996mod text;
1997mod var;
1998
1999pub use crate::errors::serialize::DeError;
2000pub use resolver::{EntityResolver, PredefinedEntityResolver};
2001
2002use crate::{
2003 de::map::ElementMapAccess,
2004 encoding::Decoder,
2005 errors::Error,
2006 events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2007 name::QName,
2008 reader::Reader,
2009};
2010use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
2011use std::borrow::Cow;
2012#[cfg(feature = "overlapped-lists")]
2013use std::collections::VecDeque;
2014use std::io::BufRead;
2015use std::mem::replace;
2016#[cfg(feature = "overlapped-lists")]
2017use std::num::NonZeroUsize;
2018use std::ops::Deref;
2019
2020/// Data represented by a text node or a CDATA node. XML markup is not expected
2021pub(crate) const TEXT_KEY: &str = "$text";
2022/// Data represented by any XML markup inside
2023pub(crate) const VALUE_KEY: &str = "$value";
2024
2025/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2026/// events. _Consequent_ means that events should follow each other or be
2027/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2028///
2029/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2030/// is borrowed and makes copies of data when it is owned.
2031///
2032/// [`Text`]: Event::Text
2033/// [`CData`]: Event::CData
2034/// [`Comment`]: Event::Comment
2035/// [`PI`]: Event::PI
2036#[derive(Clone, Debug, PartialEq, Eq)]
2037pub struct Text<'a> {
2038 text: Cow<'a, str>,
2039}
2040
2041impl<'a> Deref for Text<'a> {
2042 type Target = str;
2043
2044 #[inline]
2045 fn deref(&self) -> &Self::Target {
2046 self.text.deref()
2047 }
2048}
2049
2050impl<'a> From<&'a str> for Text<'a> {
2051 #[inline]
2052 fn from(text: &'a str) -> Self {
2053 Self {
2054 text: Cow::Borrowed(text),
2055 }
2056 }
2057}
2058
2059////////////////////////////////////////////////////////////////////////////////////////////////////
2060
2061/// Simplified event which contains only these variants that used by deserializer
2062#[derive(Clone, Debug, PartialEq, Eq)]
2063pub enum DeEvent<'a> {
2064 /// Start tag (with attributes) `<tag attr="value">`.
2065 Start(BytesStart<'a>),
2066 /// End tag `</tag>`.
2067 End(BytesEnd<'a>),
2068 /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2069 /// events. _Consequent_ means that events should follow each other or be
2070 /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2071 ///
2072 /// [`Text`]: Event::Text
2073 /// [`CData`]: Event::CData
2074 /// [`Comment`]: Event::Comment
2075 /// [`PI`]: Event::PI
2076 Text(Text<'a>),
2077 /// End of XML document.
2078 Eof,
2079}
2080
2081////////////////////////////////////////////////////////////////////////////////////////////////////
2082
2083/// Simplified event which contains only these variants that used by deserializer,
2084/// but [`Text`] events not yet fully processed.
2085///
2086/// [`Text`] events should be trimmed if they does not surrounded by the other
2087/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2088/// event, where they are trimmed from the start, but not from the end. To trim
2089/// end spaces we should lookahead by one deserializer event (i. e. skip all
2090/// comments and processing instructions).
2091///
2092/// [`Text`]: Event::Text
2093/// [`CData`]: Event::CData
2094#[derive(Clone, Debug, PartialEq, Eq)]
2095pub enum PayloadEvent<'a> {
2096 /// Start tag (with attributes) `<tag attr="value">`.
2097 Start(BytesStart<'a>),
2098 /// End tag `</tag>`.
2099 End(BytesEnd<'a>),
2100 /// Escaped character data between tags.
2101 Text(BytesText<'a>),
2102 /// Unescaped character data stored in `<![CDATA[...]]>`.
2103 CData(BytesCData<'a>),
2104 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2105 DocType(BytesText<'a>),
2106 /// End of XML document.
2107 Eof,
2108}
2109
2110impl<'a> PayloadEvent<'a> {
2111 /// Ensures that all data is owned to extend the object's lifetime if necessary.
2112 #[inline]
2113 fn into_owned(self) -> PayloadEvent<'static> {
2114 match self {
2115 PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2116 PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2117 PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2118 PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2119 PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2120 PayloadEvent::Eof => PayloadEvent::Eof,
2121 }
2122 }
2123}
2124
2125/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2126/// [`PayloadEvent::Text`] events, that followed by any event except
2127/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2128struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2129 /// A source of low-level XML events
2130 reader: R,
2131 /// Intermediate event, that could be returned by the next call to `next()`.
2132 /// If that is the `Text` event then leading spaces already trimmed, but
2133 /// trailing spaces is not. Before the event will be returned, trimming of
2134 /// the spaces could be necessary
2135 lookahead: Result<PayloadEvent<'i>, DeError>,
2136
2137 /// Used to resolve unknown entities that would otherwise cause the parser
2138 /// to return an [`EscapeError::UnrecognizedEntity`] error.
2139 ///
2140 /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2141 entity_resolver: E,
2142}
2143
2144impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2145 fn new(mut reader: R, entity_resolver: E) -> Self {
2146 // Lookahead by one event immediately, so we do not need to check in the
2147 // loop if we need lookahead or not
2148 let lookahead = reader.next();
2149
2150 Self {
2151 reader,
2152 lookahead,
2153 entity_resolver,
2154 }
2155 }
2156
2157 /// Returns `true` if all events was consumed
2158 const fn is_empty(&self) -> bool {
2159 matches!(self.lookahead, Ok(PayloadEvent::Eof))
2160 }
2161
2162 /// Read next event and put it in lookahead, return the current lookahead
2163 #[inline(always)]
2164 fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2165 replace(&mut self.lookahead, self.reader.next())
2166 }
2167
2168 /// Returns `true` when next event is not a text event in any form.
2169 #[inline(always)]
2170 const fn current_event_is_last_text(&self) -> bool {
2171 // If next event is a text or CDATA, we should not trim trailing spaces
2172 !matches!(
2173 self.lookahead,
2174 Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2175 )
2176 }
2177
2178 /// Read all consequent [`Text`] and [`CData`] events until non-text event
2179 /// occurs. Content of all events would be appended to `result` and returned
2180 /// as [`DeEvent::Text`].
2181 ///
2182 /// [`Text`]: PayloadEvent::Text
2183 /// [`CData`]: PayloadEvent::CData
2184 fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2185 loop {
2186 if self.current_event_is_last_text() {
2187 break;
2188 }
2189
2190 match self.next_impl()? {
2191 PayloadEvent::Text(mut e) => {
2192 if self.current_event_is_last_text() {
2193 // FIXME: Actually, we should trim after decoding text, but now we trim before
2194 e.inplace_trim_end();
2195 }
2196 result
2197 .to_mut()
2198 .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2199 }
2200 PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2201
2202 // SAFETY: current_event_is_last_text checks that event is Text or CData
2203 _ => unreachable!("Only `Text` and `CData` events can come here"),
2204 }
2205 }
2206 Ok(DeEvent::Text(Text { text: result }))
2207 }
2208
2209 /// Return an input-borrowing event.
2210 fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2211 loop {
2212 return match self.next_impl()? {
2213 PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2214 PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2215 PayloadEvent::Text(mut e) => {
2216 if self.current_event_is_last_text() && e.inplace_trim_end() {
2217 // FIXME: Actually, we should trim after decoding text, but now we trim before
2218 continue;
2219 }
2220 self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2221 }
2222 PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2223 PayloadEvent::DocType(e) => {
2224 self.entity_resolver
2225 .capture(e)
2226 .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2227 continue;
2228 }
2229 PayloadEvent::Eof => Ok(DeEvent::Eof),
2230 };
2231 }
2232 }
2233
2234 #[inline]
2235 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2236 match self.lookahead {
2237 // We pre-read event with the same name that is required to be skipped.
2238 // First call of `read_to_end` will end out pre-read event, the second
2239 // will consume other events
2240 Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2241 let result1 = self.reader.read_to_end(name);
2242 let result2 = self.reader.read_to_end(name);
2243
2244 // In case of error `next_impl` returns `Eof`
2245 let _ = self.next_impl();
2246 result1?;
2247 result2?;
2248 }
2249 // We pre-read event with the same name that is required to be skipped.
2250 // Because this is end event, we already consume the whole tree, so
2251 // nothing to do, just update lookahead
2252 Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2253 let _ = self.next_impl();
2254 }
2255 Ok(_) => {
2256 let result = self.reader.read_to_end(name);
2257
2258 // In case of error `next_impl` returns `Eof`
2259 let _ = self.next_impl();
2260 result?;
2261 }
2262 // Read next lookahead event, unpack error from the current lookahead
2263 Err(_) => {
2264 self.next_impl()?;
2265 }
2266 }
2267 Ok(())
2268 }
2269
2270 #[inline]
2271 fn decoder(&self) -> Decoder {
2272 self.reader.decoder()
2273 }
2274}
2275
2276////////////////////////////////////////////////////////////////////////////////////////////////////
2277
2278/// Deserialize an instance of type `T` from a string of XML text.
2279pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2280where
2281 T: Deserialize<'de>,
2282{
2283 let mut de = Deserializer::from_str(s);
2284 T::deserialize(&mut de)
2285}
2286
2287/// Deserialize from a reader. This method will do internal copies of data
2288/// readed from `reader`. If you want have a `&str` input and want to borrow
2289/// as much as possible, use [`from_str`].
2290pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2291where
2292 R: BufRead,
2293 T: DeserializeOwned,
2294{
2295 let mut de = Deserializer::from_reader(reader);
2296 T::deserialize(&mut de)
2297}
2298
2299// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
2300// valid boolean representations are only "true", "false", "1", and "0"
2301fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
2302where
2303 V: de::Visitor<'de>,
2304{
2305 match value {
2306 "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
2307 visitor.visit_bool(true)
2308 }
2309 "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
2310 visitor.visit_bool(false)
2311 }
2312 _ => Err(DeError::InvalidBoolean(value.into())),
2313 }
2314}
2315
2316fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
2317where
2318 V: Visitor<'de>,
2319{
2320 #[cfg(feature = "encoding")]
2321 {
2322 let value = decoder.decode(value)?;
2323 // No need to unescape because valid boolean representations cannot be escaped
2324 str2bool(value.as_ref(), visitor)
2325 }
2326
2327 #[cfg(not(feature = "encoding"))]
2328 {
2329 // No need to unescape because valid boolean representations cannot be escaped
2330 match value {
2331 b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
2332 visitor.visit_bool(true)
2333 }
2334 b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
2335 visitor.visit_bool(false)
2336 }
2337 e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
2338 }
2339 }
2340}
2341
2342////////////////////////////////////////////////////////////////////////////////////////////////////
2343
2344/// A structure that deserializes XML into Rust values.
2345pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2346where
2347 R: XmlRead<'de>,
2348{
2349 /// An XML reader that streams events into this deserializer
2350 reader: XmlReader<'de, R, E>,
2351
2352 /// When deserializing sequences sometimes we have to skip unwanted events.
2353 /// That events should be stored and then replayed. This is a replay buffer,
2354 /// that streams events while not empty. When it exhausted, events will
2355 /// requested from [`Self::reader`].
2356 #[cfg(feature = "overlapped-lists")]
2357 read: VecDeque<DeEvent<'de>>,
2358 /// When deserializing sequences sometimes we have to skip events, because XML
2359 /// is tolerant to elements order and even if in the XSD order is strictly
2360 /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2361 /// That means, that elements, forming a sequence, could be overlapped with
2362 /// other elements, do not related to that sequence.
2363 ///
2364 /// In order to support this, deserializer will scan events and skip unwanted
2365 /// events, store them here. After call [`Self::start_replay()`] all events
2366 /// moved from this to [`Self::read`].
2367 #[cfg(feature = "overlapped-lists")]
2368 write: VecDeque<DeEvent<'de>>,
2369 /// Maximum number of events that can be skipped when processing sequences
2370 /// that occur out-of-order. This field is used to prevent potential
2371 /// denial-of-service (DoS) attacks which could cause infinite memory
2372 /// consumption when parsing a very large amount of XML into a sequence field.
2373 #[cfg(feature = "overlapped-lists")]
2374 limit: Option<NonZeroUsize>,
2375
2376 #[cfg(not(feature = "overlapped-lists"))]
2377 peek: Option<DeEvent<'de>>,
2378
2379 /// Buffer to store attribute name as a field name exposed to serde consumers
2380 key_buf: String,
2381}
2382
2383impl<'de, R, E> Deserializer<'de, R, E>
2384where
2385 R: XmlRead<'de>,
2386 E: EntityResolver,
2387{
2388 /// Create an XML deserializer from one of the possible quick_xml input sources.
2389 ///
2390 /// Typically it is more convenient to use one of these methods instead:
2391 ///
2392 /// - [`Deserializer::from_str`]
2393 /// - [`Deserializer::from_reader`]
2394 fn new(reader: R, entity_resolver: E) -> Self {
2395 Self {
2396 reader: XmlReader::new(reader, entity_resolver),
2397
2398 #[cfg(feature = "overlapped-lists")]
2399 read: VecDeque::new(),
2400 #[cfg(feature = "overlapped-lists")]
2401 write: VecDeque::new(),
2402 #[cfg(feature = "overlapped-lists")]
2403 limit: None,
2404
2405 #[cfg(not(feature = "overlapped-lists"))]
2406 peek: None,
2407
2408 key_buf: String::new(),
2409 }
2410 }
2411
2412 /// Returns `true` if all events was consumed.
2413 pub fn is_empty(&self) -> bool {
2414 #[cfg(feature = "overlapped-lists")]
2415 if self.read.is_empty() {
2416 return self.reader.is_empty();
2417 }
2418 #[cfg(not(feature = "overlapped-lists"))]
2419 if self.peek.is_none() {
2420 return self.reader.is_empty();
2421 }
2422 false
2423 }
2424
2425 /// Returns the underlying XML reader.
2426 ///
2427 /// ```
2428 /// # use pretty_assertions::assert_eq;
2429 /// use serde::Deserialize;
2430 /// use quick_xml::de::Deserializer;
2431 /// use quick_xml::Reader;
2432 ///
2433 /// #[derive(Deserialize)]
2434 /// struct SomeStruct {
2435 /// field1: String,
2436 /// field2: String,
2437 /// }
2438 ///
2439 /// // Try to deserialize from broken XML
2440 /// let mut de = Deserializer::from_str(
2441 /// "<SomeStruct><field1><field2></SomeStruct>"
2442 /// // 0 ^= 28 ^= 41
2443 /// );
2444 ///
2445 /// let err = SomeStruct::deserialize(&mut de);
2446 /// assert!(err.is_err());
2447 ///
2448 /// let reader: &Reader<_> = de.get_ref().get_ref();
2449 ///
2450 /// assert_eq!(reader.error_position(), 28);
2451 /// assert_eq!(reader.buffer_position(), 41);
2452 /// ```
2453 pub const fn get_ref(&self) -> &R {
2454 &self.reader.reader
2455 }
2456
2457 /// Set the maximum number of events that could be skipped during deserialization
2458 /// of sequences.
2459 ///
2460 /// If `<element>` contains more than specified nested elements, `$text` or
2461 /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2462 /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2463 /// for the deserialization, for example, `Vec<T>`).
2464 ///
2465 /// This method can be used to prevent a [DoS] attack and infinite memory
2466 /// consumption when parsing a very large XML to a sequence field.
2467 ///
2468 /// It is strongly recommended to set limit to some value when you parse data
2469 /// from untrusted sources. You should choose a value that your typical XMLs
2470 /// can have _between_ different elements that corresponds to the same sequence.
2471 ///
2472 /// # Examples
2473 ///
2474 /// Let's imagine, that we deserialize such structure:
2475 /// ```
2476 /// struct List {
2477 /// item: Vec<()>,
2478 /// }
2479 /// ```
2480 ///
2481 /// The XML that we try to parse look like this:
2482 /// ```xml
2483 /// <any-name>
2484 /// <item/>
2485 /// <!-- Bufferization starts at this point -->
2486 /// <another-item>
2487 /// <some-element>with text</some-element>
2488 /// <yet-another-element/>
2489 /// </another-item>
2490 /// <!-- Buffer will be emptied at this point; 7 events were buffered -->
2491 /// <item/>
2492 /// <!-- There is nothing to buffer, because elements follows each other -->
2493 /// <item/>
2494 /// </any-name>
2495 /// ```
2496 ///
2497 /// There, when we deserialize the `item` field, we need to buffer 7 events,
2498 /// before we can deserialize the second `<item/>`:
2499 ///
2500 /// - `<another-item>`
2501 /// - `<some-element>`
2502 /// - `$text(with text)`
2503 /// - `</some-element>`
2504 /// - `<yet-another-element/>` (virtual start event)
2505 /// - `<yet-another-element/>` (virtual end event)
2506 /// - `</another-item>`
2507 ///
2508 /// Note, that `<yet-another-element/>` internally represented as 2 events:
2509 /// one for the start tag and one for the end tag. In the future this can be
2510 /// eliminated, but for now we use [auto-expanding feature] of a reader,
2511 /// because this simplifies deserializer code.
2512 ///
2513 /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2514 /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2515 /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2516 #[cfg(feature = "overlapped-lists")]
2517 pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2518 self.limit = limit;
2519 self
2520 }
2521
2522 #[cfg(feature = "overlapped-lists")]
2523 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2524 if self.read.is_empty() {
2525 self.read.push_front(self.reader.next()?);
2526 }
2527 if let Some(event) = self.read.front() {
2528 return Ok(event);
2529 }
2530 // SAFETY: `self.read` was filled in the code above.
2531 // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2532 // if unsafe code will be allowed
2533 unreachable!()
2534 }
2535 #[cfg(not(feature = "overlapped-lists"))]
2536 fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2537 if self.peek.is_none() {
2538 self.peek = Some(self.reader.next()?);
2539 }
2540 match self.peek.as_ref() {
2541 Some(v) => Ok(v),
2542 // SAFETY: a `None` variant for `self.peek` would have been replaced
2543 // by a `Some` variant in the code above.
2544 // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2545 // if unsafe code will be allowed
2546 None => unreachable!(),
2547 }
2548 }
2549
2550 fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2551 // Replay skipped or peeked events
2552 #[cfg(feature = "overlapped-lists")]
2553 if let Some(event) = self.read.pop_front() {
2554 return Ok(event);
2555 }
2556 #[cfg(not(feature = "overlapped-lists"))]
2557 if let Some(e) = self.peek.take() {
2558 return Ok(e);
2559 }
2560 self.reader.next()
2561 }
2562
2563 /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2564 /// should be replayed after calling [`Self::start_replay()`].
2565 #[cfg(feature = "overlapped-lists")]
2566 #[inline]
2567 #[must_use = "returned checkpoint should be used in `start_replay`"]
2568 fn skip_checkpoint(&self) -> usize {
2569 self.write.len()
2570 }
2571
2572 /// Extracts XML tree of events from and stores them in the skipped events
2573 /// buffer from which they can be retrieved later. You MUST call
2574 /// [`Self::start_replay()`] after calling this to give access to the skipped
2575 /// events and release internal buffers.
2576 #[cfg(feature = "overlapped-lists")]
2577 fn skip(&mut self) -> Result<(), DeError> {
2578 let event = self.next()?;
2579 self.skip_event(event)?;
2580 match self.write.back() {
2581 // Skip all subtree, if we skip a start event
2582 Some(DeEvent::Start(e)) => {
2583 let end = e.name().as_ref().to_owned();
2584 let mut depth = 0;
2585 loop {
2586 let event = self.next()?;
2587 match event {
2588 DeEvent::Start(ref e) if e.name().as_ref() == end => {
2589 self.skip_event(event)?;
2590 depth += 1;
2591 }
2592 DeEvent::End(ref e) if e.name().as_ref() == end => {
2593 self.skip_event(event)?;
2594 if depth == 0 {
2595 break;
2596 }
2597 depth -= 1;
2598 }
2599 DeEvent::Eof => {
2600 self.skip_event(event)?;
2601 break;
2602 }
2603 _ => self.skip_event(event)?,
2604 }
2605 }
2606 }
2607 _ => (),
2608 }
2609 Ok(())
2610 }
2611
2612 #[cfg(feature = "overlapped-lists")]
2613 #[inline]
2614 fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2615 if let Some(max) = self.limit {
2616 if self.write.len() >= max.get() {
2617 return Err(DeError::TooManyEvents(max));
2618 }
2619 }
2620 self.write.push_back(event);
2621 Ok(())
2622 }
2623
2624 /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2625 /// skip buffer to [`Self::read`] buffer.
2626 ///
2627 /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2628 /// return events that was skipped previously by calling [`Self::skip()`],
2629 /// and only when all that events will be consumed, the deserializer starts
2630 /// to drain events from underlying reader.
2631 ///
2632 /// This method MUST be called if any number of [`Self::skip()`] was called
2633 /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2634 #[cfg(feature = "overlapped-lists")]
2635 fn start_replay(&mut self, checkpoint: usize) {
2636 if checkpoint == 0 {
2637 self.write.append(&mut self.read);
2638 std::mem::swap(&mut self.read, &mut self.write);
2639 } else {
2640 let mut read = self.write.split_off(checkpoint);
2641 read.append(&mut self.read);
2642 self.read = read;
2643 }
2644 }
2645
2646 #[inline]
2647 fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2648 self.read_string_impl(true)
2649 }
2650
2651 /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2652 /// events, merge them into one string. If there are no such events, returns
2653 /// an empty string.
2654 ///
2655 /// If `allow_start` is `false`, then only text events are consumed, for other
2656 /// events an error is returned (see table below).
2657 ///
2658 /// If `allow_start` is `true`, then two or three events are expected:
2659 /// - [`DeEvent::Start`];
2660 /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2661 /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2662 ///
2663 /// Corresponding events are consumed.
2664 ///
2665 /// # Handling events
2666 ///
2667 /// The table below shows how events is handled by this method:
2668 ///
2669 /// |Event |XML |Handling
2670 /// |------------------|---------------------------|----------------------------------------
2671 /// |[`DeEvent::Start`]|`<tag>...</tag>` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2672 /// |[`DeEvent::End`] |`</any-tag>` |This is impossible situation, the method will panic if it happens
2673 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2674 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2675 ///
2676 /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2677 ///
2678 /// |Event |XML |Handling
2679 /// |------------------|---------------------------|----------------------------------------------------------------------------------
2680 /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2681 /// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
2682 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2683 /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2684 ///
2685 /// [`Text`]: Event::Text
2686 /// [`CData`]: Event::CData
2687 fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2688 match self.next()? {
2689 DeEvent::Text(e) => Ok(e.text),
2690 // allow one nested level
2691 DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2692 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2693 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2694 // If we here, then out deserializer has a bug
2695 DeEvent::End(e) => unreachable!("{:?}", e),
2696 DeEvent::Eof => Err(DeError::UnexpectedEof),
2697 }
2698 }
2699 /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2700 /// [`DeEvent::End`] event.
2701 ///
2702 /// # Parameters
2703 /// - `name`: name of a tag opened before reading text. The corresponding end tag
2704 /// should present in input just after the text
2705 fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2706 match self.next()? {
2707 DeEvent::Text(e) => match self.next()? {
2708 // The matching tag name is guaranteed by the reader
2709 DeEvent::End(_) => Ok(e.text),
2710 // SAFETY: Cannot be two consequent Text events, they would be merged into one
2711 DeEvent::Text(_) => unreachable!(),
2712 DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2713 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2714 },
2715 // We can get End event in case of `<tag></tag>` or `<tag/>` input
2716 // Return empty text in that case
2717 // The matching tag name is guaranteed by the reader
2718 DeEvent::End(_) => Ok("".into()),
2719 DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2720 DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2721 }
2722 }
2723
2724 /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2725 /// dropped. This method should be called after [`Self::next()`]
2726 #[cfg(feature = "overlapped-lists")]
2727 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2728 let mut depth = 0;
2729 loop {
2730 match self.read.pop_front() {
2731 Some(DeEvent::Start(e)) if e.name() == name => {
2732 depth += 1;
2733 }
2734 Some(DeEvent::End(e)) if e.name() == name => {
2735 if depth == 0 {
2736 break;
2737 }
2738 depth -= 1;
2739 }
2740
2741 // Drop all other skipped events
2742 Some(_) => continue,
2743
2744 // If we do not have skipped events, use effective reading that will
2745 // not allocate memory for events
2746 None => {
2747 // We should close all opened tags, because we could buffer
2748 // Start events, but not the corresponding End events. So we
2749 // keep reading events until we exit all nested tags.
2750 // `read_to_end()` will return an error if an Eof was encountered
2751 // preliminary (in case of malformed XML).
2752 //
2753 // <tag><tag></tag></tag>
2754 // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2755 // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
2756 // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2757 loop {
2758 self.reader.read_to_end(name)?;
2759 if depth == 0 {
2760 break;
2761 }
2762 depth -= 1;
2763 }
2764 break;
2765 }
2766 }
2767 }
2768 Ok(())
2769 }
2770 #[cfg(not(feature = "overlapped-lists"))]
2771 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2772 // First one might be in self.peek
2773 match self.next()? {
2774 DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2775 DeEvent::End(e) if e.name() == name => return Ok(()),
2776 _ => (),
2777 }
2778 self.reader.read_to_end(name)
2779 }
2780}
2781
2782impl<'de> Deserializer<'de, SliceReader<'de>> {
2783 /// Create new deserializer that will borrow data from the specified string.
2784 ///
2785 /// Deserializer created with this method will not resolve custom entities.
2786 #[allow(clippy::should_implement_trait)]
2787 pub fn from_str(source: &'de str) -> Self {
2788 Self::from_str_with_resolver(source, PredefinedEntityResolver)
2789 }
2790}
2791
2792impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2793where
2794 E: EntityResolver,
2795{
2796 /// Create new deserializer that will borrow data from the specified string
2797 /// and use specified entity resolver.
2798 pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2799 let mut reader = Reader::from_str(source);
2800 let config = reader.config_mut();
2801 config.expand_empty_elements = true;
2802
2803 Self::new(
2804 SliceReader {
2805 reader,
2806 start_trimmer: StartTrimmer::default(),
2807 },
2808 entity_resolver,
2809 )
2810 }
2811}
2812
2813impl<'de, R> Deserializer<'de, IoReader<R>>
2814where
2815 R: BufRead,
2816{
2817 /// Create new deserializer that will copy data from the specified reader
2818 /// into internal buffer.
2819 ///
2820 /// If you already have a string use [`Self::from_str`] instead, because it
2821 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2822 /// UTF-8, you can decode it first before using [`from_str`].
2823 ///
2824 /// Deserializer created with this method will not resolve custom entities.
2825 pub fn from_reader(reader: R) -> Self {
2826 Self::with_resolver(reader, PredefinedEntityResolver)
2827 }
2828}
2829
2830impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2831where
2832 R: BufRead,
2833 E: EntityResolver,
2834{
2835 /// Create new deserializer that will copy data from the specified reader
2836 /// into internal buffer and use specified entity resolver.
2837 ///
2838 /// If you already have a string use [`Self::from_str`] instead, because it
2839 /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2840 /// UTF-8, you can decode it first before using [`from_str`].
2841 pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2842 let mut reader = Reader::from_reader(reader);
2843 let config = reader.config_mut();
2844 config.expand_empty_elements = true;
2845
2846 Self::new(
2847 IoReader {
2848 reader,
2849 start_trimmer: StartTrimmer::default(),
2850 buf: Vec::new(),
2851 },
2852 entity_resolver,
2853 )
2854 }
2855}
2856
2857impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2858where
2859 R: XmlRead<'de>,
2860 E: EntityResolver,
2861{
2862 type Error = DeError;
2863
2864 deserialize_primitives!();
2865
2866 fn deserialize_struct<V>(
2867 self,
2868 _name: &'static str,
2869 fields: &'static [&'static str],
2870 visitor: V,
2871 ) -> Result<V::Value, DeError>
2872 where
2873 V: Visitor<'de>,
2874 {
2875 match self.next()? {
2876 DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2877 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2878 // If we here, then out deserializer has a bug
2879 DeEvent::End(e) => unreachable!("{:?}", e),
2880 // Deserializer methods are only hints, if deserializer could not satisfy
2881 // request, it should return the data that it has. It is responsibility
2882 // of a Visitor to return an error if it does not understand the data
2883 DeEvent::Text(e) => match e.text {
2884 Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
2885 Cow::Owned(s) => visitor.visit_string(s),
2886 },
2887 DeEvent::Eof => Err(DeError::UnexpectedEof),
2888 }
2889 }
2890
2891 /// Unit represented in XML as a `xs:element` or text/CDATA content.
2892 /// Any content inside `xs:element` is ignored and skipped.
2893 ///
2894 /// Produces unit struct from any of following inputs:
2895 /// - any `<tag ...>...</tag>`
2896 /// - any `<tag .../>`
2897 /// - any consequent text / CDATA content (can consist of several parts
2898 /// delimited by comments and processing instructions)
2899 ///
2900 /// # Events handling
2901 ///
2902 /// |Event |XML |Handling
2903 /// |------------------|---------------------------|-------------------------------------------
2904 /// |[`DeEvent::Start`]|`<tag>...</tag>` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2905 /// |[`DeEvent::End`] |`</tag>` |This is impossible situation, the method will panic if it happens
2906 /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2907 /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2908 fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2909 where
2910 V: Visitor<'de>,
2911 {
2912 match self.next()? {
2913 DeEvent::Start(s) => {
2914 self.read_to_end(s.name())?;
2915 visitor.visit_unit()
2916 }
2917 DeEvent::Text(_) => visitor.visit_unit(),
2918 // SAFETY: The reader is guaranteed that we don't have unmatched tags
2919 // If we here, then out deserializer has a bug
2920 DeEvent::End(e) => unreachable!("{:?}", e),
2921 DeEvent::Eof => Err(DeError::UnexpectedEof),
2922 }
2923 }
2924
2925 /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2926 /// with the same deserializer.
2927 fn deserialize_newtype_struct<V>(
2928 self,
2929 _name: &'static str,
2930 visitor: V,
2931 ) -> Result<V::Value, DeError>
2932 where
2933 V: Visitor<'de>,
2934 {
2935 visitor.visit_newtype_struct(self)
2936 }
2937
2938 fn deserialize_enum<V>(
2939 self,
2940 _name: &'static str,
2941 _variants: &'static [&'static str],
2942 visitor: V,
2943 ) -> Result<V::Value, DeError>
2944 where
2945 V: Visitor<'de>,
2946 {
2947 visitor.visit_enum(var::EnumAccess::new(self))
2948 }
2949
2950 fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2951 where
2952 V: Visitor<'de>,
2953 {
2954 visitor.visit_seq(self)
2955 }
2956
2957 fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2958 where
2959 V: Visitor<'de>,
2960 {
2961 match self.peek()? {
2962 DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
2963 DeEvent::Eof => visitor.visit_none(),
2964 _ => visitor.visit_some(self),
2965 }
2966 }
2967
2968 fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2969 where
2970 V: Visitor<'de>,
2971 {
2972 match self.peek()? {
2973 DeEvent::Text(_) => self.deserialize_str(visitor),
2974 _ => self.deserialize_map(visitor),
2975 }
2976 }
2977}
2978
2979/// An accessor to sequence elements forming a value for top-level sequence of XML
2980/// elements.
2981///
2982/// Technically, multiple top-level elements violates XML rule of only one top-level
2983/// element, but we consider this as several concatenated XML documents.
2984impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2985where
2986 R: XmlRead<'de>,
2987 E: EntityResolver,
2988{
2989 type Error = DeError;
2990
2991 fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2992 where
2993 T: DeserializeSeed<'de>,
2994 {
2995 match self.peek()? {
2996 DeEvent::Eof => {
2997 // We need to consume event in order to self.is_empty() worked
2998 self.next()?;
2999 Ok(None)
3000 }
3001
3002 // Start(tag), End(tag), Text
3003 _ => seed.deserialize(&mut **self).map(Some),
3004 }
3005 }
3006}
3007
3008////////////////////////////////////////////////////////////////////////////////////////////////////
3009
3010/// Helper struct that contains a state for an algorithm of converting events
3011/// from raw events to semi-trimmed events that is independent from a way of
3012/// events reading.
3013struct StartTrimmer {
3014 /// If `true`, then leading whitespace will be removed from next returned
3015 /// [`Event::Text`]. This field is set to `true` after reading each event
3016 /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
3017 /// read right after them does not trimmed.
3018 trim_start: bool,
3019}
3020
3021impl StartTrimmer {
3022 /// Converts raw reader's event into a payload event.
3023 /// Returns `None`, if event should be skipped.
3024 #[inline(always)]
3025 fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3026 let (event, trim_next_event) = match event {
3027 Event::DocType(e) => (PayloadEvent::DocType(e), true),
3028 Event::Start(e) => (PayloadEvent::Start(e), true),
3029 Event::End(e) => (PayloadEvent::End(e), true),
3030 Event::Eof => (PayloadEvent::Eof, true),
3031
3032 // Do not trim next text event after Text or CDATA event
3033 Event::CData(e) => (PayloadEvent::CData(e), false),
3034 Event::Text(mut e) => {
3035 // If event is empty after trimming, skip it
3036 if self.trim_start && e.inplace_trim_start() {
3037 return None;
3038 }
3039 (PayloadEvent::Text(e), false)
3040 }
3041
3042 _ => return None,
3043 };
3044 self.trim_start = trim_next_event;
3045 Some(event)
3046 }
3047}
3048
3049impl Default for StartTrimmer {
3050 #[inline]
3051 fn default() -> Self {
3052 Self { trim_start: true }
3053 }
3054}
3055
3056////////////////////////////////////////////////////////////////////////////////////////////////////
3057
3058/// Trait used by the deserializer for iterating over input. This is manually
3059/// "specialized" for iterating over `&[u8]`.
3060///
3061/// You do not need to implement this trait, it is needed to abstract from
3062/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3063/// deserializer
3064pub trait XmlRead<'i> {
3065 /// Return an input-borrowing event.
3066 fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3067
3068 /// Skips until end element is found. Unlike `next()` it will not allocate
3069 /// when it cannot satisfy the lifetime.
3070 fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3071
3072 /// A copy of the reader's decoder used to decode strings.
3073 fn decoder(&self) -> Decoder;
3074}
3075
3076/// XML input source that reads from a std::io input stream.
3077///
3078/// You cannot create it, it is created automatically when you call
3079/// [`Deserializer::from_reader`]
3080pub struct IoReader<R: BufRead> {
3081 reader: Reader<R>,
3082 start_trimmer: StartTrimmer,
3083 buf: Vec<u8>,
3084}
3085
3086impl<R: BufRead> IoReader<R> {
3087 /// Returns the underlying XML reader.
3088 ///
3089 /// ```
3090 /// # use pretty_assertions::assert_eq;
3091 /// use serde::Deserialize;
3092 /// use std::io::Cursor;
3093 /// use quick_xml::de::Deserializer;
3094 /// use quick_xml::Reader;
3095 ///
3096 /// #[derive(Deserialize)]
3097 /// struct SomeStruct {
3098 /// field1: String,
3099 /// field2: String,
3100 /// }
3101 ///
3102 /// // Try to deserialize from broken XML
3103 /// let mut de = Deserializer::from_reader(Cursor::new(
3104 /// "<SomeStruct><field1><field2></SomeStruct>"
3105 /// // 0 ^= 28 ^= 41
3106 /// ));
3107 ///
3108 /// let err = SomeStruct::deserialize(&mut de);
3109 /// assert!(err.is_err());
3110 ///
3111 /// let reader: &Reader<Cursor<&str>> = de.get_ref().get_ref();
3112 ///
3113 /// assert_eq!(reader.error_position(), 28);
3114 /// assert_eq!(reader.buffer_position(), 41);
3115 /// ```
3116 pub const fn get_ref(&self) -> &Reader<R> {
3117 &self.reader
3118 }
3119}
3120
3121impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3122 fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3123 loop {
3124 self.buf.clear();
3125
3126 let event = self.reader.read_event_into(&mut self.buf)?;
3127 if let Some(event) = self.start_trimmer.trim(event) {
3128 return Ok(event.into_owned());
3129 }
3130 }
3131 }
3132
3133 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3134 match self.reader.read_to_end_into(name, &mut self.buf) {
3135 Err(e) => Err(e.into()),
3136 Ok(_) => Ok(()),
3137 }
3138 }
3139
3140 fn decoder(&self) -> Decoder {
3141 self.reader.decoder()
3142 }
3143}
3144
3145/// XML input source that reads from a slice of bytes and can borrow from it.
3146///
3147/// You cannot create it, it is created automatically when you call
3148/// [`Deserializer::from_str`].
3149pub struct SliceReader<'de> {
3150 reader: Reader<&'de [u8]>,
3151 start_trimmer: StartTrimmer,
3152}
3153
3154impl<'de> SliceReader<'de> {
3155 /// Returns the underlying XML reader.
3156 ///
3157 /// ```
3158 /// # use pretty_assertions::assert_eq;
3159 /// use serde::Deserialize;
3160 /// use quick_xml::de::Deserializer;
3161 /// use quick_xml::Reader;
3162 ///
3163 /// #[derive(Deserialize)]
3164 /// struct SomeStruct {
3165 /// field1: String,
3166 /// field2: String,
3167 /// }
3168 ///
3169 /// // Try to deserialize from broken XML
3170 /// let mut de = Deserializer::from_str(
3171 /// "<SomeStruct><field1><field2></SomeStruct>"
3172 /// // 0 ^= 28 ^= 41
3173 /// );
3174 ///
3175 /// let err = SomeStruct::deserialize(&mut de);
3176 /// assert!(err.is_err());
3177 ///
3178 /// let reader: &Reader<&[u8]> = de.get_ref().get_ref();
3179 ///
3180 /// assert_eq!(reader.error_position(), 28);
3181 /// assert_eq!(reader.buffer_position(), 41);
3182 /// ```
3183 pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
3184 &self.reader
3185 }
3186}
3187
3188impl<'de> XmlRead<'de> for SliceReader<'de> {
3189 fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3190 loop {
3191 let event = self.reader.read_event()?;
3192 if let Some(event) = self.start_trimmer.trim(event) {
3193 return Ok(event);
3194 }
3195 }
3196 }
3197
3198 fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3199 match self.reader.read_to_end(name) {
3200 Err(e) => Err(e.into()),
3201 Ok(_) => Ok(()),
3202 }
3203 }
3204
3205 fn decoder(&self) -> Decoder {
3206 self.reader.decoder()
3207 }
3208}
3209
3210#[cfg(test)]
3211mod tests {
3212 use super::*;
3213 use crate::errors::IllFormedError;
3214 use pretty_assertions::assert_eq;
3215
3216 fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3217 dbg!(source);
3218 Deserializer::from_str(source)
3219 }
3220
3221 #[cfg(feature = "overlapped-lists")]
3222 mod skip {
3223 use super::*;
3224 use crate::de::DeEvent::*;
3225 use crate::events::BytesEnd;
3226 use pretty_assertions::assert_eq;
3227
3228 /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3229 #[test]
3230 fn read_and_peek() {
3231 let mut de = make_de(
3232 r#"
3233 <root>
3234 <inner>
3235 text
3236 <inner/>
3237 </inner>
3238 <next/>
3239 <target/>
3240 </root>
3241 "#,
3242 );
3243
3244 // Initial conditions - both are empty
3245 assert_eq!(de.read, vec![]);
3246 assert_eq!(de.write, vec![]);
3247
3248 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3249 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3250
3251 // Mark that start_replay() should begin replay from this point
3252 let checkpoint = de.skip_checkpoint();
3253 assert_eq!(checkpoint, 0);
3254
3255 // Should skip first <inner> tree
3256 de.skip().unwrap();
3257 assert_eq!(de.read, vec![]);
3258 assert_eq!(
3259 de.write,
3260 vec![
3261 Start(BytesStart::new("inner")),
3262 Text("text".into()),
3263 Start(BytesStart::new("inner")),
3264 End(BytesEnd::new("inner")),
3265 End(BytesEnd::new("inner")),
3266 ]
3267 );
3268
3269 // Consume <next/>. Now unconsumed XML looks like:
3270 //
3271 // <inner>
3272 // text
3273 // <inner/>
3274 // </inner>
3275 // <target/>
3276 // </root>
3277 assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3278 assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3279
3280 // We finish writing. Next call to `next()` should start replay that messages:
3281 //
3282 // <inner>
3283 // text
3284 // <inner/>
3285 // </inner>
3286 //
3287 // and after that stream that messages:
3288 //
3289 // <target/>
3290 // </root>
3291 de.start_replay(checkpoint);
3292 assert_eq!(
3293 de.read,
3294 vec![
3295 Start(BytesStart::new("inner")),
3296 Text("text".into()),
3297 Start(BytesStart::new("inner")),
3298 End(BytesEnd::new("inner")),
3299 End(BytesEnd::new("inner")),
3300 ]
3301 );
3302 assert_eq!(de.write, vec![]);
3303 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3304
3305 // Mark that start_replay() should begin replay from this point
3306 let checkpoint = de.skip_checkpoint();
3307 assert_eq!(checkpoint, 0);
3308
3309 // Skip `$text` node and consume <inner/> after it
3310 de.skip().unwrap();
3311 assert_eq!(
3312 de.read,
3313 vec![
3314 Start(BytesStart::new("inner")),
3315 End(BytesEnd::new("inner")),
3316 End(BytesEnd::new("inner")),
3317 ]
3318 );
3319 assert_eq!(
3320 de.write,
3321 vec![
3322 // This comment here to keep the same formatting of both arrays
3323 // otherwise rustfmt suggest one-line it
3324 Text("text".into()),
3325 ]
3326 );
3327
3328 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3329 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3330
3331 // We finish writing. Next call to `next()` should start replay messages:
3332 //
3333 // text
3334 // </inner>
3335 //
3336 // and after that stream that messages:
3337 //
3338 // <target/>
3339 // </root>
3340 de.start_replay(checkpoint);
3341 assert_eq!(
3342 de.read,
3343 vec![
3344 // This comment here to keep the same formatting as others
3345 // otherwise rustfmt suggest one-line it
3346 Text("text".into()),
3347 End(BytesEnd::new("inner")),
3348 ]
3349 );
3350 assert_eq!(de.write, vec![]);
3351 assert_eq!(de.next().unwrap(), Text("text".into()));
3352 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3353 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3354 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3355 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3356 assert_eq!(de.next().unwrap(), Eof);
3357 }
3358
3359 /// Checks that `read_to_end()` behaves correctly after `skip()`
3360 #[test]
3361 fn read_to_end() {
3362 let mut de = make_de(
3363 r#"
3364 <root>
3365 <skip>
3366 text
3367 <skip/>
3368 </skip>
3369 <target>
3370 <target/>
3371 </target>
3372 </root>
3373 "#,
3374 );
3375
3376 // Initial conditions - both are empty
3377 assert_eq!(de.read, vec![]);
3378 assert_eq!(de.write, vec![]);
3379
3380 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3381
3382 // Mark that start_replay() should begin replay from this point
3383 let checkpoint = de.skip_checkpoint();
3384 assert_eq!(checkpoint, 0);
3385
3386 // Skip the <skip> tree
3387 de.skip().unwrap();
3388 assert_eq!(de.read, vec![]);
3389 assert_eq!(
3390 de.write,
3391 vec![
3392 Start(BytesStart::new("skip")),
3393 Text("text".into()),
3394 Start(BytesStart::new("skip")),
3395 End(BytesEnd::new("skip")),
3396 End(BytesEnd::new("skip")),
3397 ]
3398 );
3399
3400 // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3401 //
3402 // <skip>
3403 // text
3404 // <skip/>
3405 // </skip>
3406 // </root>
3407 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3408 de.read_to_end(QName(b"target")).unwrap();
3409 assert_eq!(de.read, vec![]);
3410 assert_eq!(
3411 de.write,
3412 vec![
3413 Start(BytesStart::new("skip")),
3414 Text("text".into()),
3415 Start(BytesStart::new("skip")),
3416 End(BytesEnd::new("skip")),
3417 End(BytesEnd::new("skip")),
3418 ]
3419 );
3420
3421 // We finish writing. Next call to `next()` should start replay that messages:
3422 //
3423 // <skip>
3424 // text
3425 // <skip/>
3426 // </skip>
3427 //
3428 // and after that stream that messages:
3429 //
3430 // </root>
3431 de.start_replay(checkpoint);
3432 assert_eq!(
3433 de.read,
3434 vec![
3435 Start(BytesStart::new("skip")),
3436 Text("text".into()),
3437 Start(BytesStart::new("skip")),
3438 End(BytesEnd::new("skip")),
3439 End(BytesEnd::new("skip")),
3440 ]
3441 );
3442 assert_eq!(de.write, vec![]);
3443
3444 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3445 de.read_to_end(QName(b"skip")).unwrap();
3446
3447 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3448 assert_eq!(de.next().unwrap(), Eof);
3449 }
3450
3451 /// Checks that replay replayes only part of events
3452 /// Test for https://github.com/tafia/quick-xml/issues/435
3453 #[test]
3454 fn partial_replay() {
3455 let mut de = make_de(
3456 r#"
3457 <root>
3458 <skipped-1/>
3459 <skipped-2/>
3460 <inner>
3461 <skipped-3/>
3462 <skipped-4/>
3463 <target-2/>
3464 </inner>
3465 <target-1/>
3466 </root>
3467 "#,
3468 );
3469
3470 // Initial conditions - both are empty
3471 assert_eq!(de.read, vec![]);
3472 assert_eq!(de.write, vec![]);
3473
3474 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3475
3476 // start_replay() should start replay from this point
3477 let checkpoint1 = de.skip_checkpoint();
3478 assert_eq!(checkpoint1, 0);
3479
3480 // Should skip first and second <skipped-N/> elements
3481 de.skip().unwrap(); // skipped-1
3482 de.skip().unwrap(); // skipped-2
3483 assert_eq!(de.read, vec![]);
3484 assert_eq!(
3485 de.write,
3486 vec![
3487 Start(BytesStart::new("skipped-1")),
3488 End(BytesEnd::new("skipped-1")),
3489 Start(BytesStart::new("skipped-2")),
3490 End(BytesEnd::new("skipped-2")),
3491 ]
3492 );
3493
3494 ////////////////////////////////////////////////////////////////////////////////////////
3495
3496 assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3497 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3498 assert_eq!(
3499 de.read,
3500 vec![
3501 // This comment here to keep the same formatting of both arrays
3502 // otherwise rustfmt suggest one-line it
3503 Start(BytesStart::new("skipped-3")),
3504 ]
3505 );
3506 assert_eq!(
3507 de.write,
3508 vec![
3509 Start(BytesStart::new("skipped-1")),
3510 End(BytesEnd::new("skipped-1")),
3511 Start(BytesStart::new("skipped-2")),
3512 End(BytesEnd::new("skipped-2")),
3513 ]
3514 );
3515
3516 // start_replay() should start replay from this point
3517 let checkpoint2 = de.skip_checkpoint();
3518 assert_eq!(checkpoint2, 4);
3519
3520 // Should skip third and forth <skipped-N/> elements
3521 de.skip().unwrap(); // skipped-3
3522 de.skip().unwrap(); // skipped-4
3523 assert_eq!(de.read, vec![]);
3524 assert_eq!(
3525 de.write,
3526 vec![
3527 // checkpoint 1
3528 Start(BytesStart::new("skipped-1")),
3529 End(BytesEnd::new("skipped-1")),
3530 Start(BytesStart::new("skipped-2")),
3531 End(BytesEnd::new("skipped-2")),
3532 // checkpoint 2
3533 Start(BytesStart::new("skipped-3")),
3534 End(BytesEnd::new("skipped-3")),
3535 Start(BytesStart::new("skipped-4")),
3536 End(BytesEnd::new("skipped-4")),
3537 ]
3538 );
3539 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3540 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3541 assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3542 assert_eq!(
3543 de.read,
3544 vec![
3545 // This comment here to keep the same formatting of both arrays
3546 // otherwise rustfmt suggest one-line it
3547 End(BytesEnd::new("inner")),
3548 ]
3549 );
3550 assert_eq!(
3551 de.write,
3552 vec![
3553 // checkpoint 1
3554 Start(BytesStart::new("skipped-1")),
3555 End(BytesEnd::new("skipped-1")),
3556 Start(BytesStart::new("skipped-2")),
3557 End(BytesEnd::new("skipped-2")),
3558 // checkpoint 2
3559 Start(BytesStart::new("skipped-3")),
3560 End(BytesEnd::new("skipped-3")),
3561 Start(BytesStart::new("skipped-4")),
3562 End(BytesEnd::new("skipped-4")),
3563 ]
3564 );
3565
3566 // Start replay events from checkpoint 2
3567 de.start_replay(checkpoint2);
3568 assert_eq!(
3569 de.read,
3570 vec![
3571 Start(BytesStart::new("skipped-3")),
3572 End(BytesEnd::new("skipped-3")),
3573 Start(BytesStart::new("skipped-4")),
3574 End(BytesEnd::new("skipped-4")),
3575 End(BytesEnd::new("inner")),
3576 ]
3577 );
3578 assert_eq!(
3579 de.write,
3580 vec![
3581 Start(BytesStart::new("skipped-1")),
3582 End(BytesEnd::new("skipped-1")),
3583 Start(BytesStart::new("skipped-2")),
3584 End(BytesEnd::new("skipped-2")),
3585 ]
3586 );
3587
3588 // Replayed events
3589 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3590 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3591 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3592 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3593
3594 assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3595 assert_eq!(de.read, vec![]);
3596 assert_eq!(
3597 de.write,
3598 vec![
3599 Start(BytesStart::new("skipped-1")),
3600 End(BytesEnd::new("skipped-1")),
3601 Start(BytesStart::new("skipped-2")),
3602 End(BytesEnd::new("skipped-2")),
3603 ]
3604 );
3605
3606 ////////////////////////////////////////////////////////////////////////////////////////
3607
3608 // New events
3609 assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3610 assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3611
3612 assert_eq!(de.read, vec![]);
3613 assert_eq!(
3614 de.write,
3615 vec![
3616 Start(BytesStart::new("skipped-1")),
3617 End(BytesEnd::new("skipped-1")),
3618 Start(BytesStart::new("skipped-2")),
3619 End(BytesEnd::new("skipped-2")),
3620 ]
3621 );
3622
3623 // Start replay events from checkpoint 1
3624 de.start_replay(checkpoint1);
3625 assert_eq!(
3626 de.read,
3627 vec![
3628 Start(BytesStart::new("skipped-1")),
3629 End(BytesEnd::new("skipped-1")),
3630 Start(BytesStart::new("skipped-2")),
3631 End(BytesEnd::new("skipped-2")),
3632 ]
3633 );
3634 assert_eq!(de.write, vec![]);
3635
3636 // Replayed events
3637 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3638 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3639 assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3640 assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3641
3642 assert_eq!(de.read, vec![]);
3643 assert_eq!(de.write, vec![]);
3644
3645 // New events
3646 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3647 assert_eq!(de.next().unwrap(), Eof);
3648 }
3649
3650 /// Checks that limiting buffer size works correctly
3651 #[test]
3652 fn limit() {
3653 use serde::Deserialize;
3654
3655 #[derive(Debug, Deserialize)]
3656 #[allow(unused)]
3657 struct List {
3658 item: Vec<()>,
3659 }
3660
3661 let mut de = make_de(
3662 r#"
3663 <any-name>
3664 <item/>
3665 <another-item>
3666 <some-element>with text</some-element>
3667 <yet-another-element/>
3668 </another-item>
3669 <item/>
3670 <item/>
3671 </any-name>
3672 "#,
3673 );
3674 de.event_buffer_size(NonZeroUsize::new(3));
3675
3676 match List::deserialize(&mut de) {
3677 Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3678 e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
3679 }
3680 }
3681
3682 /// Without handling Eof in `skip` this test failed with memory allocation
3683 #[test]
3684 fn invalid_xml() {
3685 use crate::de::DeEvent::*;
3686
3687 let mut de = make_de("<root>");
3688
3689 // Cache all events
3690 let checkpoint = de.skip_checkpoint();
3691 de.skip().unwrap();
3692 de.start_replay(checkpoint);
3693 assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3694 }
3695 }
3696
3697 mod read_to_end {
3698 use super::*;
3699 use crate::de::DeEvent::*;
3700 use pretty_assertions::assert_eq;
3701
3702 #[test]
3703 fn complex() {
3704 let mut de = make_de(
3705 r#"
3706 <root>
3707 <tag a="1"><tag>text</tag>content</tag>
3708 <tag a="2"><![CDATA[cdata content]]></tag>
3709 <self-closed/>
3710 </root>
3711 "#,
3712 );
3713
3714 assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3715
3716 assert_eq!(
3717 de.next().unwrap(),
3718 Start(BytesStart::from_content(r#"tag a="1""#, 3))
3719 );
3720 assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3721
3722 assert_eq!(
3723 de.next().unwrap(),
3724 Start(BytesStart::from_content(r#"tag a="2""#, 3))
3725 );
3726 assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3727 assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3728
3729 assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3730 assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3731
3732 assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3733 assert_eq!(de.next().unwrap(), Eof);
3734 }
3735
3736 #[test]
3737 fn invalid_xml1() {
3738 let mut de = make_de("<tag><tag></tag>");
3739
3740 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3741 assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3742
3743 match de.read_to_end(QName(b"tag")) {
3744 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3745 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3746 }
3747 x => panic!(
3748 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3749 x
3750 ),
3751 }
3752 assert_eq!(de.next().unwrap(), Eof);
3753 }
3754
3755 #[test]
3756 fn invalid_xml2() {
3757 let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3758
3759 assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3760 assert_eq!(de.peek().unwrap(), &Text("".into()));
3761
3762 match de.read_to_end(QName(b"tag")) {
3763 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3764 assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3765 }
3766 x => panic!(
3767 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3768 x
3769 ),
3770 }
3771 assert_eq!(de.next().unwrap(), Eof);
3772 }
3773 }
3774
3775 #[test]
3776 fn borrowing_reader_parity() {
3777 let s = r#"
3778 <item name="hello" source="world.rs">Some text</item>
3779 <item2/>
3780 <item3 value="world" />
3781 "#;
3782
3783 let mut reader1 = IoReader {
3784 reader: Reader::from_reader(s.as_bytes()),
3785 start_trimmer: StartTrimmer::default(),
3786 buf: Vec::new(),
3787 };
3788 let mut reader2 = SliceReader {
3789 reader: Reader::from_str(s),
3790 start_trimmer: StartTrimmer::default(),
3791 };
3792
3793 loop {
3794 let event1 = reader1.next().unwrap();
3795 let event2 = reader2.next().unwrap();
3796
3797 if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3798 break;
3799 }
3800
3801 assert_eq!(event1, event2);
3802 }
3803 }
3804
3805 #[test]
3806 fn borrowing_reader_events() {
3807 let s = r#"
3808 <item name="hello" source="world.rs">Some text</item>
3809 <item2></item2>
3810 <item3/>
3811 <item4 value="world" />
3812 "#;
3813
3814 let mut reader = SliceReader {
3815 reader: Reader::from_str(s),
3816 start_trimmer: StartTrimmer::default(),
3817 };
3818
3819 let config = reader.reader.config_mut();
3820 config.expand_empty_elements = true;
3821
3822 let mut events = Vec::new();
3823
3824 loop {
3825 let event = reader.next().unwrap();
3826 if let PayloadEvent::Eof = event {
3827 break;
3828 }
3829 events.push(event);
3830 }
3831
3832 use crate::de::PayloadEvent::*;
3833
3834 assert_eq!(
3835 events,
3836 vec![
3837 Start(BytesStart::from_content(
3838 r#"item name="hello" source="world.rs""#,
3839 4
3840 )),
3841 Text(BytesText::from_escaped("Some text")),
3842 End(BytesEnd::new("item")),
3843 Start(BytesStart::from_content("item2", 5)),
3844 End(BytesEnd::new("item2")),
3845 Start(BytesStart::from_content("item3", 5)),
3846 End(BytesEnd::new("item3")),
3847 Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3848 End(BytesEnd::new("item4")),
3849 ]
3850 )
3851 }
3852
3853 /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3854 /// because parser reports error early
3855 #[test]
3856 fn read_string() {
3857 match from_str::<String>(r#"</root>"#) {
3858 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3859 assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
3860 }
3861 x => panic!(
3862 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3863 x
3864 ),
3865 }
3866
3867 let s: String = from_str(r#"<root></root>"#).unwrap();
3868 assert_eq!(s, "");
3869
3870 match from_str::<String>(r#"<root></other>"#) {
3871 Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
3872 cause,
3873 IllFormedError::MismatchedEndTag {
3874 expected: "root".into(),
3875 found: "other".into(),
3876 }
3877 ),
3878 x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
3879 }
3880 }
3881
3882 /// Tests for https://github.com/tafia/quick-xml/issues/474.
3883 ///
3884 /// That tests ensures that comments and processed instructions is ignored
3885 /// and can split one logical string in pieces.
3886 mod merge_text {
3887 use super::*;
3888 use pretty_assertions::assert_eq;
3889
3890 #[test]
3891 fn text() {
3892 let mut de = make_de("text");
3893 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3894 }
3895
3896 #[test]
3897 fn cdata() {
3898 let mut de = make_de("<![CDATA[cdata]]>");
3899 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3900 }
3901
3902 #[test]
3903 fn text_and_cdata() {
3904 let mut de = make_de("text and <![CDATA[cdata]]>");
3905 assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3906 }
3907
3908 #[test]
3909 fn text_and_empty_cdata() {
3910 let mut de = make_de("text and <![CDATA[]]>");
3911 assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3912 }
3913
3914 #[test]
3915 fn cdata_and_text() {
3916 let mut de = make_de("<![CDATA[cdata]]> and text");
3917 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3918 }
3919
3920 #[test]
3921 fn empty_cdata_and_text() {
3922 let mut de = make_de("<![CDATA[]]> and text");
3923 assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3924 }
3925
3926 #[test]
3927 fn cdata_and_cdata() {
3928 let mut de = make_de(
3929 "\
3930 <![CDATA[cdata]]]]>\
3931 <![CDATA[>cdata]]>\
3932 ",
3933 );
3934 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3935 }
3936
3937 mod comment_between {
3938 use super::*;
3939 use pretty_assertions::assert_eq;
3940
3941 #[test]
3942 fn text() {
3943 let mut de = make_de(
3944 "\
3945 text \
3946 <!--comment 1--><!--comment 2--> \
3947 text\
3948 ",
3949 );
3950 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
3951 }
3952
3953 #[test]
3954 fn cdata() {
3955 let mut de = make_de(
3956 "\
3957 <![CDATA[cdata]]]]>\
3958 <!--comment 1--><!--comment 2-->\
3959 <![CDATA[>cdata]]>\
3960 ",
3961 );
3962 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3963 }
3964
3965 #[test]
3966 fn text_and_cdata() {
3967 let mut de = make_de(
3968 "\
3969 text \
3970 <!--comment 1--><!--comment 2-->\
3971 <![CDATA[ cdata]]>\
3972 ",
3973 );
3974 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
3975 }
3976
3977 #[test]
3978 fn text_and_empty_cdata() {
3979 let mut de = make_de(
3980 "\
3981 text \
3982 <!--comment 1--><!--comment 2-->\
3983 <![CDATA[]]>\
3984 ",
3985 );
3986 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3987 }
3988
3989 #[test]
3990 fn cdata_and_text() {
3991 let mut de = make_de(
3992 "\
3993 <![CDATA[cdata ]]>\
3994 <!--comment 1--><!--comment 2--> \
3995 text \
3996 ",
3997 );
3998 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
3999 }
4000
4001 #[test]
4002 fn empty_cdata_and_text() {
4003 let mut de = make_de(
4004 "\
4005 <![CDATA[]]>\
4006 <!--comment 1--><!--comment 2--> \
4007 text \
4008 ",
4009 );
4010 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4011 }
4012
4013 #[test]
4014 fn cdata_and_cdata() {
4015 let mut de = make_de(
4016 "\
4017 <![CDATA[cdata]]]>\
4018 <!--comment 1--><!--comment 2-->\
4019 <![CDATA[]>cdata]]>\
4020 ",
4021 );
4022 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4023 }
4024 }
4025
4026 mod pi_between {
4027 use super::*;
4028 use pretty_assertions::assert_eq;
4029
4030 #[test]
4031 fn text() {
4032 let mut de = make_de(
4033 "\
4034 text \
4035 <?pi 1?><?pi 2?> \
4036 text\
4037 ",
4038 );
4039 assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into()));
4040 }
4041
4042 #[test]
4043 fn cdata() {
4044 let mut de = make_de(
4045 "\
4046 <![CDATA[cdata]]]]>\
4047 <?pi 1?><?pi 2?>\
4048 <![CDATA[>cdata]]>\
4049 ",
4050 );
4051 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4052 }
4053
4054 #[test]
4055 fn text_and_cdata() {
4056 let mut de = make_de(
4057 "\
4058 text \
4059 <?pi 1?><?pi 2?>\
4060 <![CDATA[ cdata]]>\
4061 ",
4062 );
4063 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into()));
4064 }
4065
4066 #[test]
4067 fn text_and_empty_cdata() {
4068 let mut de = make_de(
4069 "\
4070 text \
4071 <?pi 1?><?pi 2?>\
4072 <![CDATA[]]>\
4073 ",
4074 );
4075 assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4076 }
4077
4078 #[test]
4079 fn cdata_and_text() {
4080 let mut de = make_de(
4081 "\
4082 <![CDATA[cdata ]]>\
4083 <?pi 1?><?pi 2?> \
4084 text \
4085 ",
4086 );
4087 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into()));
4088 }
4089
4090 #[test]
4091 fn empty_cdata_and_text() {
4092 let mut de = make_de(
4093 "\
4094 <![CDATA[]]>\
4095 <?pi 1?><?pi 2?> \
4096 text \
4097 ",
4098 );
4099 assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4100 }
4101
4102 #[test]
4103 fn cdata_and_cdata() {
4104 let mut de = make_de(
4105 "\
4106 <![CDATA[cdata]]]>\
4107 <?pi 1?><?pi 2?>\
4108 <![CDATA[]>cdata]]>\
4109 ",
4110 );
4111 assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4112 }
4113 }
4114 }
4115
4116 /// Tests for https://github.com/tafia/quick-xml/issues/474.
4117 ///
4118 /// This tests ensures that any combination of payload data is processed
4119 /// as expected.
4120 mod triples {
4121 use super::*;
4122 use pretty_assertions::assert_eq;
4123
4124 mod start {
4125 use super::*;
4126
4127 /// <tag1><tag2>...
4128 mod start {
4129 use super::*;
4130 use pretty_assertions::assert_eq;
4131
4132 #[test]
4133 fn start() {
4134 let mut de = make_de("<tag1><tag2><tag3>");
4135 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4136 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4137 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4138 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4139 }
4140
4141 /// Not matching end tag will result to error
4142 #[test]
4143 fn end() {
4144 let mut de = make_de("<tag1><tag2></tag2>");
4145 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4146 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4147 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4148 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4149 }
4150
4151 #[test]
4152 fn text() {
4153 let mut de = make_de("<tag1><tag2> text ");
4154 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4155 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4156 // Text is trimmed from both sides
4157 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4158 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4159 }
4160
4161 #[test]
4162 fn cdata() {
4163 let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4164 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4165 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4166 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4167 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4168 }
4169
4170 #[test]
4171 fn eof() {
4172 let mut de = make_de("<tag1><tag2>");
4173 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4174 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4175 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4176 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4177 }
4178 }
4179
4180 /// <tag></tag>...
4181 mod end {
4182 use super::*;
4183 use pretty_assertions::assert_eq;
4184
4185 #[test]
4186 fn start() {
4187 let mut de = make_de("<tag></tag><tag2>");
4188 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4189 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4190 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4191 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4192 }
4193
4194 #[test]
4195 fn end() {
4196 let mut de = make_de("<tag></tag></tag2>");
4197 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4198 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4199 match de.next() {
4200 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4201 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4202 }
4203 x => panic!(
4204 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4205 x
4206 ),
4207 }
4208 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4209 }
4210
4211 #[test]
4212 fn text() {
4213 let mut de = make_de("<tag></tag> text ");
4214 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4215 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4216 // Text is trimmed from both sides
4217 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4218 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4219 }
4220
4221 #[test]
4222 fn cdata() {
4223 let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4224 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4225 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4226 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4227 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4228 }
4229
4230 #[test]
4231 fn eof() {
4232 let mut de = make_de("<tag></tag>");
4233 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4234 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4235 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4236 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4237 }
4238 }
4239
4240 /// <tag> text ...
4241 mod text {
4242 use super::*;
4243 use pretty_assertions::assert_eq;
4244
4245 #[test]
4246 fn start() {
4247 let mut de = make_de("<tag> text <tag2>");
4248 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4249 // Text is trimmed from both sides
4250 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4251 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4252 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4253 }
4254
4255 #[test]
4256 fn end() {
4257 let mut de = make_de("<tag> text </tag>");
4258 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4259 // Text is trimmed from both sides
4260 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4261 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4262 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4263 }
4264
4265 // start::text::text has no difference from start::text
4266
4267 #[test]
4268 fn cdata() {
4269 let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4270 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4271 // Text is trimmed from the start
4272 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4273 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4274 }
4275
4276 #[test]
4277 fn eof() {
4278 let mut de = make_de("<tag> text ");
4279 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4280 // Text is trimmed from both sides
4281 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4282 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4283 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4284 }
4285 }
4286
4287 /// <tag><![CDATA[ cdata ]]>...
4288 mod cdata {
4289 use super::*;
4290 use pretty_assertions::assert_eq;
4291
4292 #[test]
4293 fn start() {
4294 let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4295 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4296 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4297 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4298 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4299 }
4300
4301 #[test]
4302 fn end() {
4303 let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4304 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4305 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4306 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4307 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4308 }
4309
4310 #[test]
4311 fn text() {
4312 let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4313 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4314 // Text is trimmed from the end
4315 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4316 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4317 }
4318
4319 #[test]
4320 fn cdata() {
4321 let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4322 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4323 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4324 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4325 }
4326
4327 #[test]
4328 fn eof() {
4329 let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4330 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4331 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4332 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4333 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4334 }
4335 }
4336 }
4337
4338 /// Start from End event will always generate an error
4339 #[test]
4340 fn end() {
4341 let mut de = make_de("</tag>");
4342 match de.next() {
4343 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4344 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4345 }
4346 x => panic!(
4347 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4348 x
4349 ),
4350 }
4351 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4352 }
4353
4354 mod text {
4355 use super::*;
4356 use pretty_assertions::assert_eq;
4357
4358 mod start {
4359 use super::*;
4360 use pretty_assertions::assert_eq;
4361
4362 #[test]
4363 fn start() {
4364 let mut de = make_de(" text <tag1><tag2>");
4365 // Text is trimmed from both sides
4366 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4367 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4368 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4369 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4370 }
4371
4372 /// Not matching end tag will result in error
4373 #[test]
4374 fn end() {
4375 let mut de = make_de(" text <tag></tag>");
4376 // Text is trimmed from both sides
4377 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4378 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4379 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4380 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381 }
4382
4383 #[test]
4384 fn text() {
4385 let mut de = make_de(" text <tag> text2 ");
4386 // Text is trimmed from both sides
4387 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4388 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4389 // Text is trimmed from both sides
4390 assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4391 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4392 }
4393
4394 #[test]
4395 fn cdata() {
4396 let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4397 // Text is trimmed from both sides
4398 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4399 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4400 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4401 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402 }
4403
4404 #[test]
4405 fn eof() {
4406 // Text is trimmed from both sides
4407 let mut de = make_de(" text <tag>");
4408 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4409 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4410 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4411 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4412 }
4413 }
4414
4415 /// End event without corresponding start event will always generate an error
4416 #[test]
4417 fn end() {
4418 let mut de = make_de(" text </tag>");
4419 // Text is trimmed from both sides
4420 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4421 match de.next() {
4422 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4423 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4424 }
4425 x => panic!(
4426 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4427 x
4428 ),
4429 }
4430 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4431 }
4432
4433 // text::text::something is equivalent to text::something
4434
4435 mod cdata {
4436 use super::*;
4437 use pretty_assertions::assert_eq;
4438
4439 #[test]
4440 fn start() {
4441 let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4442 // Text is trimmed from the start
4443 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4444 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4445 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4446 }
4447
4448 #[test]
4449 fn end() {
4450 let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4451 // Text is trimmed from the start
4452 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4453 match de.next() {
4454 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4455 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4456 }
4457 x => panic!(
4458 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4459 x
4460 ),
4461 }
4462 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4463 }
4464
4465 #[test]
4466 fn text() {
4467 let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4468 // Text is trimmed from the start and from the end
4469 assert_eq!(
4470 de.next().unwrap(),
4471 DeEvent::Text("text cdata text2".into())
4472 );
4473 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4474 }
4475
4476 #[test]
4477 fn cdata() {
4478 let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4479 // Text is trimmed from the start
4480 assert_eq!(
4481 de.next().unwrap(),
4482 DeEvent::Text("text cdata cdata2 ".into())
4483 );
4484 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4485 }
4486
4487 #[test]
4488 fn eof() {
4489 let mut de = make_de(" text <![CDATA[ cdata ]]>");
4490 // Text is trimmed from the start
4491 assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
4492 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4493 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4494 }
4495 }
4496 }
4497
4498 mod cdata {
4499 use super::*;
4500 use pretty_assertions::assert_eq;
4501
4502 mod start {
4503 use super::*;
4504 use pretty_assertions::assert_eq;
4505
4506 #[test]
4507 fn start() {
4508 let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4509 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4510 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4511 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4512 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4513 }
4514
4515 /// Not matching end tag will result in error
4516 #[test]
4517 fn end() {
4518 let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4519 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4520 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4521 assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4522 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4523 }
4524
4525 #[test]
4526 fn text() {
4527 let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4528 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4529 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4530 // Text is trimmed from both sides
4531 assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4532 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4533 }
4534
4535 #[test]
4536 fn cdata() {
4537 let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4538 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4539 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4540 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4541 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4542 }
4543
4544 #[test]
4545 fn eof() {
4546 let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4547 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4548 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4549 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4550 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4551 }
4552 }
4553
4554 /// End event without corresponding start event will always generate an error
4555 #[test]
4556 fn end() {
4557 let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4558 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4559 match de.next() {
4560 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4561 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4562 }
4563 x => panic!(
4564 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4565 x
4566 ),
4567 }
4568 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4569 }
4570
4571 mod text {
4572 use super::*;
4573 use pretty_assertions::assert_eq;
4574
4575 #[test]
4576 fn start() {
4577 let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4578 // Text is trimmed from the end
4579 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4580 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4581 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4582 }
4583
4584 #[test]
4585 fn end() {
4586 let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4587 // Text is trimmed from the end
4588 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4589 match de.next() {
4590 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4591 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4592 }
4593 x => panic!(
4594 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4595 x
4596 ),
4597 }
4598 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4599 }
4600
4601 // cdata::text::text is equivalent to cdata::text
4602
4603 #[test]
4604 fn cdata() {
4605 let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4606 assert_eq!(
4607 de.next().unwrap(),
4608 DeEvent::Text(" cdata text cdata2 ".into())
4609 );
4610 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611 }
4612
4613 #[test]
4614 fn eof() {
4615 let mut de = make_de("<![CDATA[ cdata ]]> text ");
4616 // Text is trimmed from the end
4617 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
4618 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4619 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4620 }
4621 }
4622
4623 mod cdata {
4624 use super::*;
4625 use pretty_assertions::assert_eq;
4626
4627 #[test]
4628 fn start() {
4629 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4630 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4631 assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4632 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4633 }
4634
4635 #[test]
4636 fn end() {
4637 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4638 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4639 match de.next() {
4640 Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4641 assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4642 }
4643 x => panic!(
4644 "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4645 x
4646 ),
4647 }
4648 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4649 }
4650
4651 #[test]
4652 fn text() {
4653 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4654 // Text is trimmed from the end
4655 assert_eq!(
4656 de.next().unwrap(),
4657 DeEvent::Text(" cdata cdata2 text".into())
4658 );
4659 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4660 }
4661
4662 #[test]
4663 fn cdata() {
4664 let mut de =
4665 make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4666 assert_eq!(
4667 de.next().unwrap(),
4668 DeEvent::Text(" cdata cdata2 cdata3 ".into())
4669 );
4670 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4671 }
4672
4673 #[test]
4674 fn eof() {
4675 let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4676 assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
4677 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4678 assert_eq!(de.next().unwrap(), DeEvent::Eof);
4679 }
4680 }
4681 }
4682 }
4683}