quick_xml/de/
mod.rs

1//! Serde `Deserializer` module.
2//!
3//! Due to the complexity of the XML standard and the fact that Serde was developed
4//! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to
5//! that fact that some XML concepts are inexpressible in terms of Serde derives
6//! and may require manual deserialization.
7//!
8//! The most notable restriction is the ability to distinguish between _elements_
9//! and _attributes_, as no other format used by serde has such a conception.
10//!
11//! Due to that the mapping is performed in a best effort manner.
12//!
13//!
14//!
15//! Table of Contents
16//! =================
17//! - [Mapping XML to Rust types](#mapping-xml-to-rust-types)
18//!   - [Basics](#basics)
19//!   - [Optional attributes and elements](#optional-attributes-and-elements)
20//!   - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type)
21//!   - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types)
22//! - [Generate Rust types from XML](#generate-rust-types-from-xml)
23//! - [Composition Rules](#composition-rules)
24//! - [Enum Representations](#enum-representations)
25//!   - [Normal enum variant](#normal-enum-variant)
26//!   - [`$text` enum variant](#text-enum-variant)
27//! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names)
28//!   - [`$text`](#text)
29//!   - [`$value`](#value)
30//!     - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives)
31//!     - [Structs and sequences of structs](#structs-and-sequences-of-structs)
32//!     - [Enums and sequences of enums](#enums-and-sequences-of-enums)
33//! - [Frequently Used Patterns](#frequently-used-patterns)
34//!   - [`<element>` lists](#element-lists)
35//!   - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements)
36//!   - [Internally Tagged Enums](#internally-tagged-enums)
37//!
38//!
39//!
40//! Mapping XML to Rust types
41//! =========================
42//!
43//! Type names are never considered when deserializing, so you can name your
44//! types as you wish. Other general rules:
45//! - `struct` field name could be represented in XML only as an attribute name
46//!   or an element name;
47//! - `enum` variant name could be represented in XML only as an attribute name
48//!   or an element name;
49//! - the unit struct, unit type `()` and unit enum variant can be deserialized
50//!   from any valid XML content:
51//!   - attribute and element names;
52//!   - attribute and element values;
53//!   - text or CDATA content (including mixed text and CDATA content).
54//!
55//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
56//!
57//! NOTE: All tests are marked with an `ignore` option, even though they do
58//! compile. This is  because rustdoc marks such blocks with an information
59//! icon unlike `no_run` blocks.
60//!
61//! </div>
62//!
63//! <table>
64//! <thead>
65//! <tr><th colspan="2">
66//!
67//! ## Basics
68//!
69//! </th></tr>
70//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
71//! </thead>
72//! <tbody style="vertical-align:top;">
73//! <tr>
74//! <td>
75//! Content of attributes and text / CDATA content of elements (including mixed
76//! text and CDATA content):
77//!
78//! ```xml
79//! <... ...="content" />
80//! ```
81//! ```xml
82//! <...>content</...>
83//! ```
84//! ```xml
85//! <...><![CDATA[content]]></...>
86//! ```
87//! ```xml
88//! <...>text<![CDATA[cdata]]>text</...>
89//! ```
90//! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case.
91//! </td>
92//! <td>
93//!
94//! You can use any type that can be deserialized from an `&str`, for example:
95//! - [`String`] and [`&str`]
96//! - [`Cow<str>`]
97//! - [`u32`], [`f32`] and other numeric types
98//! - `enum`s, like
99//!   ```
100//!   # use pretty_assertions::assert_eq;
101//!   # use serde::Deserialize;
102//!   # #[derive(Debug, PartialEq)]
103//!   #[derive(Deserialize)]
104//!   enum Language {
105//!     Rust,
106//!     Cpp,
107//!     #[serde(other)]
108//!     Other,
109//!   }
110//!   # #[derive(Debug, PartialEq, Deserialize)]
111//!   # struct X { #[serde(rename = "$text")] x: Language }
112//!   # assert_eq!(X { x: Language::Rust  }, quick_xml::de::from_str("<x>Rust</x>").unwrap());
113//!   # assert_eq!(X { x: Language::Cpp   }, quick_xml::de::from_str("<x>C<![CDATA[p]]>p</x>").unwrap());
114//!   # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("<x><![CDATA[other]]></x>").unwrap());
115//!   ```
116//!
117//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
118//!
119//! NOTE: deserialization to non-owned types (i.e. borrow from the input),
120//! such as `&str`, is possible only if you parse document in the UTF-8
121//! encoding and content does not contain entity references such as `&amp;`,
122//! or character references such as `&#xD;`, as well as text content represented
123//! by one piece of [text] or [CDATA] element.
124//! </div>
125//! <!-- TODO: document an error type returned -->
126//!
127//! [text]: Event::Text
128//! [CDATA]: Event::CData
129//! </td>
130//! </tr>
131//! <!-- 2 ===================================================================================== -->
132//! <tr>
133//! <td>
134//!
135//! Content of attributes and text / CDATA content of elements (including mixed
136//! text and CDATA content), which represents a space-delimited lists, as
137//! specified in the XML Schema specification for [`xs:list`] `simpleType`:
138//!
139//! ```xml
140//! <... ...="element1 element2 ..." />
141//! ```
142//! ```xml
143//! <...>
144//!   element1
145//!   element2
146//!   ...
147//! </...>
148//! ```
149//! ```xml
150//! <...><![CDATA[
151//!   element1
152//!   element2
153//!   ...
154//! ]]></...>
155//! ```
156//!
157//! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes
158//! </td>
159//! <td>
160//!
161//! Use any type that deserialized using [`deserialize_seq()`] call, for example:
162//!
163//! ```
164//! type List = Vec<u32>;
165//! ```
166//!
167//! See the next row to learn where in your struct definition you should
168//! use that type.
169//!
170//! According to the XML Schema specification, delimiters for elements is one
171//! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s).
172//!
173//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
174//!
175//! NOTE: according to the XML Schema restrictions, you cannot escape those
176//! white-space characters, so list elements will _never_ contain them.
177//! In practice you will usually use `xs:list`s for lists of numbers or enumerated
178//! values which looks like identifiers in many languages, for example, `item`,
179//! `some_item` or `some-item`, so that shouldn't be a problem.
180//!
181//! NOTE: according to the XML Schema specification, list elements can be
182//! delimited only by spaces. Other delimiters (for example, commas) are not
183//! allowed.
184//!
185//! </div>
186//!
187//! [`deserialize_seq()`]: de::Deserializer::deserialize_seq
188//! </td>
189//! </tr>
190//! <!-- 3 ===================================================================================== -->
191//! <tr>
192//! <td>
193//! A typical XML with attributes. The root tag name does not matter:
194//!
195//! ```xml
196//! <any-tag one="..." two="..."/>
197//! ```
198//! </td>
199//! <td>
200//!
201//! A structure where each XML attribute is mapped to a field with a name
202//! starting with `@`. Because Rust identifiers do not permit the `@` character,
203//! you should use the `#[serde(rename = "@...")]` attribute to rename it.
204//! The name of the struct itself does not matter:
205//!
206//! ```
207//! # use serde::Deserialize;
208//! # type T = ();
209//! # type U = ();
210//! // Get both attributes
211//! # #[derive(Debug, PartialEq)]
212//! #[derive(Deserialize)]
213//! struct AnyName {
214//!   #[serde(rename = "@one")]
215//!   one: T,
216//!
217//!   #[serde(rename = "@two")]
218//!   two: U,
219//! }
220//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
221//! ```
222//! ```
223//! # use serde::Deserialize;
224//! # type T = ();
225//! // Get only the one attribute, ignore the other
226//! # #[derive(Debug, PartialEq)]
227//! #[derive(Deserialize)]
228//! struct AnyName {
229//!   #[serde(rename = "@one")]
230//!   one: T,
231//! }
232//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
233//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."/>"#).unwrap();
234//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
235//! ```
236//! ```
237//! # use serde::Deserialize;
238//! // Ignore all attributes
239//! // You can also use the `()` type (unit type)
240//! # #[derive(Debug, PartialEq)]
241//! #[derive(Deserialize)]
242//! struct AnyName;
243//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
244//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
245//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
246//! ```
247//!
248//! All these structs can be used to deserialize from an XML on the
249//! left side depending on amount of information that you want to get.
250//! Of course, you can combine them with elements extractor structs (see below).
251//!
252//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
253//!
254//! NOTE: XML allows you to have an attribute and an element with the same name
255//! inside the one element. quick-xml deals with that by prepending a `@` prefix
256//! to the name of attributes.
257//! </div>
258//! </td>
259//! </tr>
260//! <!-- 4 ===================================================================================== -->
261//! <tr>
262//! <td>
263//! A typical XML with child elements. The root tag name does not matter:
264//!
265//! ```xml
266//! <any-tag>
267//!   <one>...</one>
268//!   <two>...</two>
269//! </any-tag>
270//! ```
271//! </td>
272//! <td>
273//! A structure where each XML child element is mapped to the field.
274//! Each element name becomes a name of field. The name of the struct itself
275//! does not matter:
276//!
277//! ```
278//! # use serde::Deserialize;
279//! # type T = ();
280//! # type U = ();
281//! // Get both elements
282//! # #[derive(Debug, PartialEq)]
283//! #[derive(Deserialize)]
284//! struct AnyName {
285//!   one: T,
286//!   two: U,
287//! }
288//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
289//! #
290//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap_err();
291//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap_err();
292//! ```
293//! ```
294//! # use serde::Deserialize;
295//! # type T = ();
296//! // Get only the one element, ignore the other
297//! # #[derive(Debug, PartialEq)]
298//! #[derive(Deserialize)]
299//! struct AnyName {
300//!   one: T,
301//! }
302//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
303//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
304//! ```
305//! ```
306//! # use serde::Deserialize;
307//! // Ignore all elements
308//! // You can also use the `()` type (unit type)
309//! # #[derive(Debug, PartialEq)]
310//! #[derive(Deserialize)]
311//! struct AnyName;
312//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..." two="..."/>"#).unwrap();
313//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag><one>...</one><two>...</two></any-tag>"#).unwrap();
314//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><two>...</two></any-tag>"#).unwrap();
315//! # quick_xml::de::from_str::<AnyName>(r#"<any-tag one="..."><one>...</one></any-tag>"#).unwrap();
316//! ```
317//!
318//! All these structs can be used to deserialize from an XML on the
319//! left side depending on amount of information that you want to get.
320//! Of course, you can combine them with attributes extractor structs (see above).
321//!
322//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
323//!
324//! NOTE: XML allows you to have an attribute and an element with the same name
325//! inside the one element. quick-xml deals with that by prepending a `@` prefix
326//! to the name of attributes.
327//! </div>
328//! </td>
329//! </tr>
330//! <!-- 5 ===================================================================================== -->
331//! <tr>
332//! <td>
333//! An XML with an attribute and a child element named equally:
334//!
335//! ```xml
336//! <any-tag field="...">
337//!   <field>...</field>
338//! </any-tag>
339//! ```
340//! </td>
341//! <td>
342//!
343//! You MUST specify `#[serde(rename = "@field")]` on a field that will be used
344//! for an attribute:
345//!
346//! ```
347//! # use pretty_assertions::assert_eq;
348//! # use serde::Deserialize;
349//! # type T = ();
350//! # type U = ();
351//! # #[derive(Debug, PartialEq)]
352//! #[derive(Deserialize)]
353//! struct AnyName {
354//!   #[serde(rename = "@field")]
355//!   attribute: T,
356//!   field: U,
357//! }
358//! # assert_eq!(
359//! #   AnyName { attribute: (), field: () },
360//! #   quick_xml::de::from_str(r#"
361//! #     <any-tag field="...">
362//! #       <field>...</field>
363//! #     </any-tag>
364//! #   "#).unwrap(),
365//! # );
366//! ```
367//! </td>
368//! </tr>
369//! <!-- ======================================================================================= -->
370//! <tr><th colspan="2">
371//!
372//! ## Optional attributes and elements
373//!
374//! </th></tr>
375//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
376//! <!-- 6 ===================================================================================== -->
377//! <tr>
378//! <td>
379//! An optional XML attribute that you want to capture.
380//! The root tag name does not matter:
381//!
382//! ```xml
383//! <any-tag optional="..."/>
384//! ```
385//! ```xml
386//! <any-tag/>
387//! ```
388//! </td>
389//! <td>
390//!
391//! A structure with an optional field, renamed according to the requirements
392//! for attributes:
393//!
394//! ```
395//! # use pretty_assertions::assert_eq;
396//! # use serde::Deserialize;
397//! # type T = ();
398//! # #[derive(Debug, PartialEq)]
399//! #[derive(Deserialize)]
400//! struct AnyName {
401//!   #[serde(rename = "@optional")]
402//!   optional: Option<T>,
403//! }
404//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag optional="..."/>"#).unwrap());
405//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
406//! ```
407//! When the XML attribute is present, type `T` will be deserialized from
408//! an attribute value (which is a string). Note, that if `T = String` or other
409//! string type, the empty attribute is mapped to a `Some("")`, whereas `None`
410//! represents the missed attribute:
411//! ```xml
412//! <any-tag optional="..."/><!-- Some("...") -->
413//! <any-tag optional=""/>   <!-- Some("") -->
414//! <any-tag/>               <!-- None -->
415//! ```
416//! </td>
417//! </tr>
418//! <!-- 7 ===================================================================================== -->
419//! <tr>
420//! <td>
421//! An optional XML elements that you want to capture.
422//! The root tag name does not matter:
423//!
424//! ```xml
425//! <any-tag/>
426//!   <optional>...</optional>
427//! </any-tag>
428//! ```
429//! ```xml
430//! <any-tag/>
431//!   <optional/>
432//! </any-tag>
433//! ```
434//! ```xml
435//! <any-tag/>
436//! ```
437//! </td>
438//! <td>
439//!
440//! A structure with an optional field:
441//!
442//! ```
443//! # use pretty_assertions::assert_eq;
444//! # use serde::Deserialize;
445//! # type T = ();
446//! # #[derive(Debug, PartialEq)]
447//! #[derive(Deserialize)]
448//! struct AnyName {
449//!   optional: Option<T>,
450//! }
451//! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"<any-tag><optional>...</optional></any-tag>"#).unwrap());
452//! # assert_eq!(AnyName { optional: None     }, quick_xml::de::from_str(r#"<any-tag/>"#).unwrap());
453//! ```
454//! When the XML element is present, type `T` will be deserialized from an
455//! element (which is a string or a multi-mapping -- i.e. mapping which can have
456//! duplicated keys).
457//! <div style="background:rgba(80, 240, 100, 0.20);padding:0.75em;">
458//!
459//! Currently some edge cases exists described in the issue [#497].
460//! </div>
461//! </td>
462//! </tr>
463//! <!-- ======================================================================================= -->
464//! <tr><th colspan="2">
465//!
466//! ## Choices (`xs:choice` XML Schema type)
467//!
468//! </th></tr>
469//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
470//! <!-- 8 ===================================================================================== -->
471//! <tr>
472//! <td>
473//! An XML with different root tag names, as well as text / CDATA content:
474//!
475//! ```xml
476//! <one field1="...">...</one>
477//! ```
478//! ```xml
479//! <two>
480//!   <field2>...</field2>
481//! </two>
482//! ```
483//! ```xml
484//! Text <![CDATA[or (mixed)
485//! CDATA]]> content
486//! ```
487//! </td>
488//! <td>
489//!
490//! An enum where each variant has the name of a possible root tag. The name of
491//! the enum itself does not matter.
492//!
493//! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`.
494//!
495//! All these structs can be used to deserialize from any XML on the
496//! left side depending on amount of information that you want to get:
497//!
498//! ```
499//! # use pretty_assertions::assert_eq;
500//! # use serde::Deserialize;
501//! # type T = ();
502//! # type U = ();
503//! # #[derive(Debug, PartialEq)]
504//! #[derive(Deserialize)]
505//! #[serde(rename_all = "snake_case")]
506//! enum AnyName {
507//!   One { #[serde(rename = "@field1")] field1: T },
508//!   Two { field2: U },
509//!
510//!   /// Use unit variant, if you do not care of a content.
511//!   /// You can use tuple variant if you want to parse
512//!   /// textual content as an xs:list.
513//!   /// Struct variants are will pass a string to the
514//!   /// struct enum variant visitor, which typically
515//!   /// returns Err(Custom)
516//!   #[serde(rename = "$text")]
517//!   Text(String),
518//! }
519//! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
520//! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
521//! # assert_eq!(AnyName::Text("text  cdata ".into()), quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
522//! ```
523//! ```
524//! # use pretty_assertions::assert_eq;
525//! # use serde::Deserialize;
526//! # type T = ();
527//! # #[derive(Debug, PartialEq)]
528//! #[derive(Deserialize)]
529//! struct Two {
530//!   field2: T,
531//! }
532//! # #[derive(Debug, PartialEq)]
533//! #[derive(Deserialize)]
534//! #[serde(rename_all = "snake_case")]
535//! enum AnyName {
536//!   // `field1` content discarded
537//!   One,
538//!   Two(Two),
539//!   #[serde(rename = "$text")]
540//!   Text,
541//! }
542//! # assert_eq!(AnyName::One,                     quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
543//! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
544//! # assert_eq!(AnyName::Text,                    quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
545//! ```
546//! ```
547//! # use pretty_assertions::assert_eq;
548//! # use serde::Deserialize;
549//! # #[derive(Debug, PartialEq)]
550//! #[derive(Deserialize)]
551//! #[serde(rename_all = "snake_case")]
552//! enum AnyName {
553//!   One,
554//!   // the <two> and textual content will be mapped to this
555//!   #[serde(other)]
556//!   Other,
557//! }
558//! # assert_eq!(AnyName::One,   quick_xml::de::from_str(r#"<one field1="...">...</one>"#).unwrap());
559//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"<two><field2>...</field2></two>"#).unwrap());
560//! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text <![CDATA[ cdata ]]>"#).unwrap());
561//! ```
562//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
563//!
564//! NOTE: You should have variants for all possible tag names in your enum
565//! or have an `#[serde(other)]` variant.
566//! <!-- TODO: document an error type if that requirement is violated -->
567//! </div>
568//! </td>
569//! </tr>
570//! <!-- 9 ===================================================================================== -->
571//! <tr>
572//! <td>
573//!
574//! `<xs:choice>` embedded in the other element, and at the same time you want
575//! to get access to other attributes that can appear in the same container
576//! (`<any-tag>`). Also this case can be described, as if you want to choose
577//! Rust enum variant based on a tag name:
578//!
579//! ```xml
580//! <any-tag field="...">
581//!   <one>...</one>
582//! </any-tag>
583//! ```
584//! ```xml
585//! <any-tag field="...">
586//!   <two>...</two>
587//! </any-tag>
588//! ```
589//! ```xml
590//! <any-tag field="...">
591//!   Text <![CDATA[or (mixed)
592//!   CDATA]]> content
593//! </any-tag>
594//! ```
595//! </td>
596//! <td>
597//!
598//! A structure with a field which type is an `enum`.
599//!
600//! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`.
601//!
602//! Names of the enum, struct, and struct field with `Choice` type does not matter:
603//!
604//! ```
605//! # use pretty_assertions::assert_eq;
606//! # use serde::Deserialize;
607//! # type T = ();
608//! # #[derive(Debug, PartialEq)]
609//! #[derive(Deserialize)]
610//! #[serde(rename_all = "snake_case")]
611//! enum Choice {
612//!   One,
613//!   Two,
614//!
615//!   /// Use unit variant, if you do not care of a content.
616//!   /// You can use tuple variant if you want to parse
617//!   /// textual content as an xs:list.
618//!   /// Struct variants are will pass a string to the
619//!   /// struct enum variant visitor, which typically
620//!   /// returns Err(Custom)
621//!   #[serde(rename = "$text")]
622//!   Text(String),
623//! }
624//! # #[derive(Debug, PartialEq)]
625//! #[derive(Deserialize)]
626//! struct AnyName {
627//!   #[serde(rename = "@field")]
628//!   field: T,
629//!
630//!   #[serde(rename = "$value")]
631//!   any_name: Choice,
632//! }
633//! # assert_eq!(
634//! #   AnyName { field: (), any_name: Choice::One },
635//! #   quick_xml::de::from_str(r#"<any-tag field="..."><one>...</one></any-tag>"#).unwrap(),
636//! # );
637//! # assert_eq!(
638//! #   AnyName { field: (), any_name: Choice::Two },
639//! #   quick_xml::de::from_str(r#"<any-tag field="..."><two>...</two></any-tag>"#).unwrap(),
640//! # );
641//! # assert_eq!(
642//! #   AnyName { field: (), any_name: Choice::Text("text  cdata ".into()) },
643//! #   quick_xml::de::from_str(r#"<any-tag field="...">text <![CDATA[ cdata ]]></any-tag>"#).unwrap(),
644//! # );
645//! ```
646//! </td>
647//! </tr>
648//! <!-- 10 ==================================================================================== -->
649//! <tr>
650//! <td>
651//!
652//! `<xs:choice>` embedded in the other element, and at the same time you want
653//! to get access to other elements that can appear in the same container
654//! (`<any-tag>`). Also this case can be described, as if you want to choose
655//! Rust enum variant based on a tag name:
656//!
657//! ```xml
658//! <any-tag>
659//!   <field>...</field>
660//!   <one>...</one>
661//! </any-tag>
662//! ```
663//! ```xml
664//! <any-tag>
665//!   <two>...</two>
666//!   <field>...</field>
667//! </any-tag>
668//! ```
669//! </td>
670//! <td>
671//!
672//! A structure with a field which type is an `enum`.
673//!
674//! Names of the enum, struct, and struct field with `Choice` type does not matter:
675//!
676//! ```
677//! # use pretty_assertions::assert_eq;
678//! # use serde::Deserialize;
679//! # type T = ();
680//! # #[derive(Debug, PartialEq)]
681//! #[derive(Deserialize)]
682//! #[serde(rename_all = "snake_case")]
683//! enum Choice {
684//!   One,
685//!   Two,
686//! }
687//! # #[derive(Debug, PartialEq)]
688//! #[derive(Deserialize)]
689//! struct AnyName {
690//!   field: T,
691//!
692//!   #[serde(rename = "$value")]
693//!   any_name: Choice,
694//! }
695//! # assert_eq!(
696//! #   AnyName { field: (), any_name: Choice::One },
697//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><one>...</one></any-tag>"#).unwrap(),
698//! # );
699//! # assert_eq!(
700//! #   AnyName { field: (), any_name: Choice::Two },
701//! #   quick_xml::de::from_str(r#"<any-tag><two>...</two><field>...</field></any-tag>"#).unwrap(),
702//! # );
703//! ```
704//!
705//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
706//!
707//! NOTE: if your `Choice` enum would contain an `#[serde(other)]`
708//! variant, element `<field>` will be mapped to the `field` and not to the enum
709//! variant.
710//! </div>
711//!
712//! </td>
713//! </tr>
714//! <!-- 11 ==================================================================================== -->
715//! <tr>
716//! <td>
717//!
718//! `<xs:choice>` encapsulated in other element with a fixed name:
719//!
720//! ```xml
721//! <any-tag field="...">
722//!   <choice>
723//!     <one>...</one>
724//!   </choice>
725//! </any-tag>
726//! ```
727//! ```xml
728//! <any-tag field="...">
729//!   <choice>
730//!     <two>...</two>
731//!   </choice>
732//! </any-tag>
733//! ```
734//! </td>
735//! <td>
736//!
737//! A structure with a field of an intermediate type with one field of `enum` type.
738//! Actually, this example is not necessary, because you can construct it by yourself
739//! using the composition rules that were described above. However the XML construction
740//! described here is very common, so it is shown explicitly.
741//!
742//! Names of the enum and struct does not matter:
743//!
744//! ```
745//! # use pretty_assertions::assert_eq;
746//! # use serde::Deserialize;
747//! # type T = ();
748//! # #[derive(Debug, PartialEq)]
749//! #[derive(Deserialize)]
750//! #[serde(rename_all = "snake_case")]
751//! enum Choice {
752//!   One,
753//!   Two,
754//! }
755//! # #[derive(Debug, PartialEq)]
756//! #[derive(Deserialize)]
757//! struct Holder {
758//!   #[serde(rename = "$value")]
759//!   any_name: Choice,
760//! }
761//! # #[derive(Debug, PartialEq)]
762//! #[derive(Deserialize)]
763//! struct AnyName {
764//!   #[serde(rename = "@field")]
765//!   field: T,
766//!
767//!   choice: Holder,
768//! }
769//! # assert_eq!(
770//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
771//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><one>...</one></choice></any-tag>"#).unwrap(),
772//! # );
773//! # assert_eq!(
774//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
775//! #   quick_xml::de::from_str(r#"<any-tag field="..."><choice><two>...</two></choice></any-tag>"#).unwrap(),
776//! # );
777//! ```
778//! </td>
779//! </tr>
780//! <!-- 12 ==================================================================================== -->
781//! <tr>
782//! <td>
783//!
784//! `<xs:choice>` encapsulated in other element with a fixed name:
785//!
786//! ```xml
787//! <any-tag>
788//!   <field>...</field>
789//!   <choice>
790//!     <one>...</one>
791//!   </choice>
792//! </any-tag>
793//! ```
794//! ```xml
795//! <any-tag>
796//!   <choice>
797//!     <two>...</two>
798//!   </choice>
799//!   <field>...</field>
800//! </any-tag>
801//! ```
802//! </td>
803//! <td>
804//!
805//! A structure with a field of an intermediate type with one field of `enum` type.
806//! Actually, this example is not necessary, because you can construct it by yourself
807//! using the composition rules that were described above. However the XML construction
808//! described here is very common, so it is shown explicitly.
809//!
810//! Names of the enum and struct does not matter:
811//!
812//! ```
813//! # use pretty_assertions::assert_eq;
814//! # use serde::Deserialize;
815//! # type T = ();
816//! # #[derive(Debug, PartialEq)]
817//! #[derive(Deserialize)]
818//! #[serde(rename_all = "snake_case")]
819//! enum Choice {
820//!   One,
821//!   Two,
822//! }
823//! # #[derive(Debug, PartialEq)]
824//! #[derive(Deserialize)]
825//! struct Holder {
826//!   #[serde(rename = "$value")]
827//!   any_name: Choice,
828//! }
829//! # #[derive(Debug, PartialEq)]
830//! #[derive(Deserialize)]
831//! struct AnyName {
832//!   field: T,
833//!
834//!   choice: Holder,
835//! }
836//! # assert_eq!(
837//! #   AnyName { field: (), choice: Holder { any_name: Choice::One } },
838//! #   quick_xml::de::from_str(r#"<any-tag><field>...</field><choice><one>...</one></choice></any-tag>"#).unwrap(),
839//! # );
840//! # assert_eq!(
841//! #   AnyName { field: (), choice: Holder { any_name: Choice::Two } },
842//! #   quick_xml::de::from_str(r#"<any-tag><choice><two>...</two></choice><field>...</field></any-tag>"#).unwrap(),
843//! # );
844//! ```
845//! </td>
846//! </tr>
847//! <!-- ======================================================================================== -->
848//! <tr><th colspan="2">
849//!
850//! ## Sequences (`xs:all` and `xs:sequence` XML Schema types)
851//!
852//! </th></tr>
853//! <tr><th>To parse all these XML's...</th><th>...use these Rust type(s)</th></tr>
854//! <!-- 13 ==================================================================================== -->
855//! <tr>
856//! <td>
857//! A sequence inside of a tag without a dedicated name:
858//!
859//! ```xml
860//! <any-tag/>
861//! ```
862//! ```xml
863//! <any-tag>
864//!   <item/>
865//! </any-tag>
866//! ```
867//! ```xml
868//! <any-tag>
869//!   <item/>
870//!   <item/>
871//!   <item/>
872//! </any-tag>
873//! ```
874//! </td>
875//! <td>
876//!
877//! A structure with a field which is a sequence type, for example, [`Vec`].
878//! Because XML syntax does not distinguish between empty sequences and missed
879//! elements, we should indicate that on the Rust side, because serde will require
880//! that field `item` exists. You can do that in two possible ways:
881//!
882//! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]:
883//! ```
884//! # use pretty_assertions::assert_eq;
885//! # use serde::Deserialize;
886//! # type Item = ();
887//! # #[derive(Debug, PartialEq)]
888//! #[derive(Deserialize)]
889//! struct AnyName {
890//!   #[serde(default)]
891//!   item: Vec<Item>,
892//! }
893//! # assert_eq!(
894//! #   AnyName { item: vec![] },
895//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
896//! # );
897//! # assert_eq!(
898//! #   AnyName { item: vec![()] },
899//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
900//! # );
901//! # assert_eq!(
902//! #   AnyName { item: vec![(), (), ()] },
903//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
904//! # );
905//! ```
906//!
907//! Use the [`Option`]. In that case inner array will always contains at least one
908//! element after deserialization:
909//! ```ignore
910//! # use pretty_assertions::assert_eq;
911//! # use serde::Deserialize;
912//! # type Item = ();
913//! # #[derive(Debug, PartialEq)]
914//! #[derive(Deserialize)]
915//! struct AnyName {
916//!   item: Option<Vec<Item>>,
917//! }
918//! # assert_eq!(
919//! #   AnyName { item: None },
920//! #   quick_xml::de::from_str(r#"<any-tag/>"#).unwrap(),
921//! # );
922//! # assert_eq!(
923//! #   AnyName { item: Some(vec![()]) },
924//! #   quick_xml::de::from_str(r#"<any-tag><item/></any-tag>"#).unwrap(),
925//! # );
926//! # assert_eq!(
927//! #   AnyName { item: Some(vec![(), (), ()]) },
928//! #   quick_xml::de::from_str(r#"<any-tag><item/><item/><item/></any-tag>"#).unwrap(),
929//! # );
930//! ```
931//!
932//! See also [Frequently Used Patterns](#element-lists).
933//!
934//! [field]: https://serde.rs/field-attrs.html#default
935//! [struct]: https://serde.rs/container-attrs.html#default
936//! </td>
937//! </tr>
938//! <!-- 14 ==================================================================================== -->
939//! <tr>
940//! <td>
941//! A sequence with a strict order, probably with mixed content
942//! (text / CDATA and tags):
943//!
944//! ```xml
945//! <one>...</one>
946//! text
947//! <![CDATA[cdata]]>
948//! <two>...</two>
949//! <one>...</one>
950//! ```
951//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
952//!
953//! NOTE: this is just an example for showing mapping. XML does not allow
954//! multiple root tags -- you should wrap the sequence into a tag.
955//! </div>
956//! </td>
957//! <td>
958//!
959//! All elements mapped to the heterogeneous sequential type: tuple or named tuple.
960//! Each element of the tuple should be able to be deserialized from the nested
961//! element content (`...`), except the enum types which would be deserialized
962//! from the full element (`<one>...</one>`), so they could use the element name
963//! to choose the right variant:
964//!
965//! ```
966//! # use pretty_assertions::assert_eq;
967//! # use serde::Deserialize;
968//! # type One = ();
969//! # type Two = ();
970//! # /*
971//! type One = ...;
972//! type Two = ...;
973//! # */
974//! # #[derive(Debug, PartialEq)]
975//! #[derive(Deserialize)]
976//! struct AnyName(One, String, Two, One);
977//! # assert_eq!(
978//! #   AnyName((), "text cdata".into(), (), ()),
979//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
980//! # );
981//! ```
982//! ```
983//! # use pretty_assertions::assert_eq;
984//! # use serde::Deserialize;
985//! # #[derive(Debug, PartialEq)]
986//! #[derive(Deserialize)]
987//! #[serde(rename_all = "snake_case")]
988//! enum Choice {
989//!   One,
990//! }
991//! # type Two = ();
992//! # /*
993//! type Two = ...;
994//! # */
995//! type AnyName = (Choice, String, Two, Choice);
996//! # assert_eq!(
997//! #   (Choice::One, "text cdata".to_string(), (), Choice::One),
998//! #   quick_xml::de::from_str(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
999//! # );
1000//! ```
1001//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1002//!
1003//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1004//! so you cannot have two adjacent string types in your sequence.
1005//!
1006//! NOTE: In the case that the list might contain tags that are overlapped with
1007//! tags that do not correspond to the list you should add the feature [`overlapped-lists`].
1008//! </div>
1009//! </td>
1010//! </tr>
1011//! <!-- 15 ==================================================================================== -->
1012//! <tr>
1013//! <td>
1014//! A sequence with a non-strict order, probably with a mixed content
1015//! (text / CDATA and tags).
1016//!
1017//! ```xml
1018//! <one>...</one>
1019//! text
1020//! <![CDATA[cdata]]>
1021//! <two>...</two>
1022//! <one>...</one>
1023//! ```
1024//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1025//!
1026//! NOTE: this is just an example for showing mapping. XML does not allow
1027//! multiple root tags -- you should wrap the sequence into a tag.
1028//! </div>
1029//! </td>
1030//! <td>
1031//! A homogeneous sequence of elements with a fixed or dynamic size:
1032//!
1033//! ```
1034//! # use pretty_assertions::assert_eq;
1035//! # use serde::Deserialize;
1036//! # #[derive(Debug, PartialEq)]
1037//! #[derive(Deserialize)]
1038//! #[serde(rename_all = "snake_case")]
1039//! enum Choice {
1040//!   One,
1041//!   Two,
1042//!   #[serde(other)]
1043//!   Other,
1044//! }
1045//! type AnyName = [Choice; 4];
1046//! # assert_eq!(
1047//! #   [Choice::One, Choice::Other, Choice::Two, Choice::One],
1048//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1049//! # );
1050//! ```
1051//! ```
1052//! # use pretty_assertions::assert_eq;
1053//! # use serde::Deserialize;
1054//! # #[derive(Debug, PartialEq)]
1055//! #[derive(Deserialize)]
1056//! #[serde(rename_all = "snake_case")]
1057//! enum Choice {
1058//!   One,
1059//!   Two,
1060//!   #[serde(rename = "$text")]
1061//!   Other(String),
1062//! }
1063//! type AnyName = Vec<Choice>;
1064//! # assert_eq!(
1065//! #   vec![
1066//! #     Choice::One,
1067//! #     Choice::Other("text cdata".into()),
1068//! #     Choice::Two,
1069//! #     Choice::One,
1070//! #   ],
1071//! #   quick_xml::de::from_str::<AnyName>(r#"<one>...</one>text <![CDATA[cdata]]><two>...</two><one>...</one>"#).unwrap(),
1072//! # );
1073//! ```
1074//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1075//!
1076//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1077//! so you cannot have two adjacent string types in your sequence.
1078//! </div>
1079//! </td>
1080//! </tr>
1081//! <!-- 16 ==================================================================================== -->
1082//! <tr>
1083//! <td>
1084//! A sequence with a strict order, probably with a mixed content,
1085//! (text and tags) inside of the other element:
1086//!
1087//! ```xml
1088//! <any-tag attribute="...">
1089//!   <one>...</one>
1090//!   text
1091//!   <![CDATA[cdata]]>
1092//!   <two>...</two>
1093//!   <one>...</one>
1094//! </any-tag>
1095//! ```
1096//! </td>
1097//! <td>
1098//!
1099//! A structure where all child elements mapped to the one field which have
1100//! a heterogeneous sequential type: tuple or named tuple. Each element of the
1101//! tuple should be able to be deserialized from the full element (`<one>...</one>`).
1102//!
1103//! You MUST specify `#[serde(rename = "$value")]` on that field:
1104//!
1105//! ```
1106//! # use pretty_assertions::assert_eq;
1107//! # use serde::Deserialize;
1108//! # type One = ();
1109//! # type Two = ();
1110//! # /*
1111//! type One = ...;
1112//! type Two = ...;
1113//! # */
1114//!
1115//! # #[derive(Debug, PartialEq)]
1116//! #[derive(Deserialize)]
1117//! struct AnyName {
1118//!   #[serde(rename = "@attribute")]
1119//! # attribute: (),
1120//! # /*
1121//!   attribute: ...,
1122//! # */
1123//!   // Does not (yet?) supported by the serde
1124//!   // https://github.com/serde-rs/serde/issues/1905
1125//!   // #[serde(flatten)]
1126//!   #[serde(rename = "$value")]
1127//!   any_name: (One, String, Two, One),
1128//! }
1129//! # assert_eq!(
1130//! #   AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) },
1131//! #   quick_xml::de::from_str("\
1132//! #     <any-tag attribute='...'>\
1133//! #       <one>...</one>\
1134//! #       text \
1135//! #       <![CDATA[cdata]]>\
1136//! #       <two>...</two>\
1137//! #       <one>...</one>\
1138//! #     </any-tag>"
1139//! #   ).unwrap(),
1140//! # );
1141//! ```
1142//! ```
1143//! # use pretty_assertions::assert_eq;
1144//! # use serde::Deserialize;
1145//! # type One = ();
1146//! # type Two = ();
1147//! # /*
1148//! type One = ...;
1149//! type Two = ...;
1150//! # */
1151//!
1152//! # #[derive(Debug, PartialEq)]
1153//! #[derive(Deserialize)]
1154//! struct NamedTuple(One, String, Two, One);
1155//!
1156//! # #[derive(Debug, PartialEq)]
1157//! #[derive(Deserialize)]
1158//! struct AnyName {
1159//!   #[serde(rename = "@attribute")]
1160//! # attribute: (),
1161//! # /*
1162//!   attribute: ...,
1163//! # */
1164//!   // Does not (yet?) supported by the serde
1165//!   // https://github.com/serde-rs/serde/issues/1905
1166//!   // #[serde(flatten)]
1167//!   #[serde(rename = "$value")]
1168//!   any_name: NamedTuple,
1169//! }
1170//! # assert_eq!(
1171//! #   AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) },
1172//! #   quick_xml::de::from_str("\
1173//! #     <any-tag attribute='...'>\
1174//! #       <one>...</one>\
1175//! #       text \
1176//! #       <![CDATA[cdata]]>\
1177//! #       <two>...</two>\
1178//! #       <one>...</one>\
1179//! #     </any-tag>"
1180//! #   ).unwrap(),
1181//! # );
1182//! ```
1183//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1184//!
1185//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1186//! so you cannot have two adjacent string types in your sequence.
1187//! </div>
1188//! </td>
1189//! </tr>
1190//! <!-- 17 ==================================================================================== -->
1191//! <tr>
1192//! <td>
1193//! A sequence with a non-strict order, probably with a mixed content
1194//! (text / CDATA and tags) inside of the other element:
1195//!
1196//! ```xml
1197//! <any-tag>
1198//!   <one>...</one>
1199//!   text
1200//!   <![CDATA[cdata]]>
1201//!   <two>...</two>
1202//!   <one>...</one>
1203//! </any-tag>
1204//! ```
1205//! </td>
1206//! <td>
1207//!
1208//! A structure where all child elements mapped to the one field which have
1209//! a homogeneous sequential type: array-like container. A container type `T`
1210//! should be able to be deserialized from the nested element content (`...`),
1211//! except if it is an enum type which would be deserialized from the full
1212//! element (`<one>...</one>`).
1213//!
1214//! You MUST specify `#[serde(rename = "$value")]` on that field:
1215//!
1216//! ```
1217//! # use pretty_assertions::assert_eq;
1218//! # use serde::Deserialize;
1219//! # #[derive(Debug, PartialEq)]
1220//! #[derive(Deserialize)]
1221//! #[serde(rename_all = "snake_case")]
1222//! enum Choice {
1223//!   One,
1224//!   Two,
1225//!   #[serde(rename = "$text")]
1226//!   Other(String),
1227//! }
1228//! # #[derive(Debug, PartialEq)]
1229//! #[derive(Deserialize)]
1230//! struct AnyName {
1231//!   #[serde(rename = "@attribute")]
1232//! # attribute: (),
1233//! # /*
1234//!   attribute: ...,
1235//! # */
1236//!   // Does not (yet?) supported by the serde
1237//!   // https://github.com/serde-rs/serde/issues/1905
1238//!   // #[serde(flatten)]
1239//!   #[serde(rename = "$value")]
1240//!   any_name: [Choice; 4],
1241//! }
1242//! # assert_eq!(
1243//! #   AnyName { attribute: (), any_name: [
1244//! #     Choice::One,
1245//! #     Choice::Other("text cdata".into()),
1246//! #     Choice::Two,
1247//! #     Choice::One,
1248//! #   ] },
1249//! #   quick_xml::de::from_str("\
1250//! #     <any-tag attribute='...'>\
1251//! #       <one>...</one>\
1252//! #       text \
1253//! #       <![CDATA[cdata]]>\
1254//! #       <two>...</two>\
1255//! #       <one>...</one>\
1256//! #     </any-tag>"
1257//! #   ).unwrap(),
1258//! # );
1259//! ```
1260//! ```
1261//! # use pretty_assertions::assert_eq;
1262//! # use serde::Deserialize;
1263//! # #[derive(Debug, PartialEq)]
1264//! #[derive(Deserialize)]
1265//! #[serde(rename_all = "snake_case")]
1266//! enum Choice {
1267//!   One,
1268//!   Two,
1269//!   #[serde(rename = "$text")]
1270//!   Other(String),
1271//! }
1272//! # #[derive(Debug, PartialEq)]
1273//! #[derive(Deserialize)]
1274//! struct AnyName {
1275//!   #[serde(rename = "@attribute")]
1276//! # attribute: (),
1277//! # /*
1278//!   attribute: ...,
1279//! # */
1280//!   // Does not (yet?) supported by the serde
1281//!   // https://github.com/serde-rs/serde/issues/1905
1282//!   // #[serde(flatten)]
1283//!   #[serde(rename = "$value")]
1284//!   any_name: Vec<Choice>,
1285//! }
1286//! # assert_eq!(
1287//! #   AnyName { attribute: (), any_name: vec![
1288//! #     Choice::One,
1289//! #     Choice::Other("text cdata".into()),
1290//! #     Choice::Two,
1291//! #     Choice::One,
1292//! #   ] },
1293//! #   quick_xml::de::from_str("\
1294//! #     <any-tag attribute='...'>\
1295//! #       <one>...</one>\
1296//! #       text \
1297//! #       <![CDATA[cdata]]>\
1298//! #       <two>...</two>\
1299//! #       <one>...</one>\
1300//! #     </any-tag>"
1301//! #   ).unwrap(),
1302//! # );
1303//! ```
1304//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1305//!
1306//! NOTE: consequent text and CDATA nodes are merged into the one text node,
1307//! so you cannot have two adjacent string types in your sequence.
1308//! </div>
1309//! </td>
1310//! </tr>
1311//! </tbody>
1312//! </table>
1313//!
1314//!
1315//! Generate Rust types from XML
1316//! ============================
1317//!
1318//! To speed up the creation of Rust types that represent a given XML file you can
1319//! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator).
1320//! It provides a standalone binary and a Rust library that parses one or more XML files
1321//! and generates a collection of structs that are compatible with quick_xml::de.
1322//!
1323//!
1324//!
1325//! Composition Rules
1326//! =================
1327//!
1328//! The XML format is very different from other formats supported by `serde`.
1329//! One such difference it is how data in the serialized form is related to
1330//! the Rust type. Usually each byte in the data can be associated only with
1331//! one field in the data structure. However, XML is an exception.
1332//!
1333//! For example, took this XML:
1334//!
1335//! ```xml
1336//! <any>
1337//!   <key attr="value"/>
1338//! </any>
1339//! ```
1340//!
1341//! and try to deserialize it to the struct `AnyName`:
1342//!
1343//! ```no_run
1344//! # use serde::Deserialize;
1345//! #[derive(Deserialize)]
1346//! struct AnyName { // AnyName calls `deserialize_struct` on `<any><key attr="value"/></any>`
1347//!                  //                         Used data:          ^^^^^^^^^^^^^^^^^^^
1348//!   key: Inner,    // Inner   calls `deserialize_struct` on `<key attr="value"/>`
1349//!                  //                         Used data:          ^^^^^^^^^^^^
1350//! }
1351//! #[derive(Deserialize)]
1352//! struct Inner {
1353//!   #[serde(rename = "@attr")]
1354//!   attr: String,  // String  calls `deserialize_string` on `value`
1355//!                  //                         Used data:     ^^^^^
1356//! }
1357//! ```
1358//!
1359//! Comments shows what methods of a [`Deserializer`] called by each struct
1360//! `deserialize` method and which input their seen. **Used data** shows, what
1361//! content is actually used for deserializing. As you see, name of the inner
1362//! `<key>` tag used both as a map key / outer struct field name and as part
1363//! of the inner struct (although _value_ of the tag, i.e. `key` is not used
1364//! by it).
1365//!
1366//!
1367//!
1368//! Enum Representations
1369//! ====================
1370//!
1371//! `quick-xml` represents enums differently in normal fields, `$text` fields and
1372//! `$value` fields. A normal representation is compatible with serde's adjacent
1373//! and internal tags feature -- tag for adjacently and internally tagged enums
1374//! are serialized using [`Serializer::serialize_unit_variant`] and deserialized
1375//! using [`Deserializer::deserialize_enum`].
1376//!
1377//! Use those simple rules to remember, how enum would be represented in XML:
1378//! - In `$value` field the representation is always the same as top-level representation;
1379//! - In `$text` field the representation is always the same as in normal field,
1380//!   but surrounding tags with field name are removed;
1381//! - In normal field the representation is always contains a tag with a field name.
1382//!
1383//! Normal enum variant
1384//! -------------------
1385//!
1386//! To model an `xs:choice` XML construct use `$value` field.
1387//! To model a top-level `xs:choice` just use the enum type.
1388//!
1389//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1390//! |-------|-----------------------------------------|---------------------|---------------------|
1391//! |Unit   |`<Unit/>`                                |`<field>Unit</field>`|`Unit`               |
1392//! |Newtype|`<Newtype>42</Newtype>`                  |Err(Custom) [^0]     |Err(Custom) [^0]     |
1393//! |Tuple  |`<Tuple>42</Tuple><Tuple>answer</Tuple>` |Err(Custom) [^0]     |Err(Custom) [^0]     |
1394//! |Struct |`<Struct><q>42</q><a>answer</a></Struct>`|Err(Custom) [^0]     |Err(Custom) [^0]     |
1395//!
1396//! `$text` enum variant
1397//! --------------------
1398//!
1399//! |Kind   |Top-level and in `$value` field          |In normal field      |In `$text` field     |
1400//! |-------|-----------------------------------------|---------------------|---------------------|
1401//! |Unit   |_(empty)_                                |`<field/>`           |_(empty)_            |
1402//! |Newtype|`42`                                     |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]|
1403//! |Tuple  |`42 answer`                              |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]|
1404//! |Struct |Err(Custom) [^0]                         |Err(Custom) [^0]     |Err(Custom) [^0]     |
1405//!
1406//! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom`
1407//!       error will be returned, but custom deserialize implementation can successfully deserialize
1408//!       value from a string which will be passed to it.
1409//!
1410//! [^1]: If this serialize as `<field>42</field>` then it will be ambiguity during deserialization,
1411//!       because it clash with `Unit` representation in normal field.
1412//!
1413//! [^2]: If this serialize as `42` then it will be ambiguity during deserialization,
1414//!       because it clash with `Unit` representation in `$text` field.
1415//!
1416//! [^3]: If this serialize as `<field>42 answer</field>` then it will be ambiguity during deserialization,
1417//!       because it clash with `Unit` representation in normal field.
1418//!
1419//! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization,
1420//!       because it clash with `Unit` representation in `$text` field.
1421//!
1422//!
1423//!
1424//! Difference between `$text` and `$value` special names
1425//! =====================================================
1426//!
1427//! quick-xml supports two special names for fields -- `$text` and `$value`.
1428//! Although they may seem the same, there is a distinction. Two different
1429//! names is required mostly for serialization, because quick-xml should know
1430//! how you want to serialize certain constructs, which could be represented
1431//! through XML in multiple different ways.
1432//!
1433//! The only difference is in how complex types and sequences are serialized.
1434//! If you doubt which one you should select, begin with [`$value`](#value).
1435//!
1436//! ## `$text`
1437//! `$text` is used when you want to write your XML as a text or a CDATA content.
1438//! More formally, field with that name represents simple type definition with
1439//! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic,
1440//! as described in the [specification].
1441//!
1442//! As a result, not all types of such fields can be serialized. Only serialization
1443//! of following types are supported:
1444//! - all primitive types (strings, numbers, booleans)
1445//! - unit variants of enumerations (serializes to a name of a variant)
1446//! - newtypes (delegates serialization to inner type)
1447//! - [`Option`] of above (`None` serializes to nothing)
1448//! - sequences (including tuples and tuple variants of enumerations) of above,
1449//!   excluding `None` and empty string elements (because it will not be possible
1450//!   to deserialize them back). The elements are separated by space(s)
1451//! - unit type `()` and unit structs (serializes to nothing)
1452//!
1453//! Complex types, such as structs and maps, are not supported in this field.
1454//! If you want them, you should use `$value`.
1455//!
1456//! Sequences serialized to a space-delimited string, that is why only certain
1457//! types are allowed in this mode:
1458//!
1459//! ```
1460//! # use serde::{Deserialize, Serialize};
1461//! # use quick_xml::de::from_str;
1462//! # use quick_xml::se::to_string;
1463//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1464//! struct AnyName {
1465//!     #[serde(rename = "$text")]
1466//!     field: Vec<usize>,
1467//! }
1468//!
1469//! let obj = AnyName { field: vec![1, 2, 3] };
1470//! let xml = to_string(&obj).unwrap();
1471//! assert_eq!(xml, "<AnyName>1 2 3</AnyName>");
1472//!
1473//! let object: AnyName = from_str(&xml).unwrap();
1474//! assert_eq!(object, obj);
1475//! ```
1476//!
1477//! ## `$value`
1478//! <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
1479//!
1480//! NOTE: a name `#content` would better explain the purpose of that field,
1481//! but `$value` is used for compatibility with other XML serde crates, which
1482//! uses that name. This will allow you to switch XML crates more smoothly if required.
1483//! </div>
1484//!
1485//! Representation of primitive types in `$value` does not differ from their
1486//! representation in `$text` field. The difference is how sequences are serialized.
1487//! `$value` serializes each sequence item as a separate XML element. The name
1488//! of that element is taken from serialized type, and because only `enum`s provide
1489//! such name (their variant name), only they should be used for such fields.
1490//!
1491//! `$value` fields does not support `struct` types with fields, the serialization
1492//! of such types would end with an `Err(Unsupported)`. Unit structs and unit
1493//! type `()` serializing to nothing and can be deserialized from any content.
1494//!
1495//! Serialization and deserialization of `$value` field performed as usual, except
1496//! that name for an XML element will be given by the serialized type, instead of
1497//! field. The latter allow to serialize enumerated types, where variant is encoded
1498//! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`.
1499//!
1500//! In the example below, field will be serialized as `<field/>`, because elements
1501//! get their names from the field name. It cannot be deserialized, because `Enum`
1502//! expects elements `<A/>`, `<B/>` or `<C/>`, but `AnyName` looked only for `<field/>`:
1503//!
1504//! ```
1505//! # use serde::{Deserialize, Serialize};
1506//! # use pretty_assertions::assert_eq;
1507//! # #[derive(PartialEq, Debug)]
1508//! #[derive(Deserialize, Serialize)]
1509//! enum Enum { A, B, C }
1510//!
1511//! # #[derive(PartialEq, Debug)]
1512//! #[derive(Deserialize, Serialize)]
1513//! struct AnyName {
1514//!     // <field>A</field>, <field>B</field>, or <field>C</field>
1515//!     field: Enum,
1516//! }
1517//! # assert_eq!(
1518//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1519//! #     "<AnyName><field>A</field></AnyName>",
1520//! # );
1521//! # assert_eq!(
1522//! #     AnyName { field: Enum::B },
1523//! #     quick_xml::de::from_str("<root><field>B</field></root>").unwrap(),
1524//! # );
1525//! ```
1526//!
1527//! If you rename field to `$value`, then `field` would be serialized as `<A/>`,
1528//! `<B/>` or `<C/>`, depending on the its content. It is also possible to
1529//! deserialize it from the same elements:
1530//!
1531//! ```
1532//! # use serde::{Deserialize, Serialize};
1533//! # use pretty_assertions::assert_eq;
1534//! # #[derive(Deserialize, Serialize, PartialEq, Debug)]
1535//! # enum Enum { A, B, C }
1536//! #
1537//! # #[derive(PartialEq, Debug)]
1538//! #[derive(Deserialize, Serialize)]
1539//! struct AnyName {
1540//!     // <A/>, <B/> or <C/>
1541//!     #[serde(rename = "$value")]
1542//!     field: Enum,
1543//! }
1544//! # assert_eq!(
1545//! #     quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(),
1546//! #     "<AnyName><A/></AnyName>",
1547//! # );
1548//! # assert_eq!(
1549//! #     AnyName { field: Enum::B },
1550//! #     quick_xml::de::from_str("<root><B/></root>").unwrap(),
1551//! # );
1552//! ```
1553//!
1554//! ### Primitives and sequences of primitives
1555//!
1556//! Sequences serialized to a list of elements. Note, that types that does not
1557//! produce their own tag (i. e. primitives) are written as is, without delimiters:
1558//!
1559//! ```
1560//! # use serde::{Deserialize, Serialize};
1561//! # use pretty_assertions::assert_eq;
1562//! # use quick_xml::de::from_str;
1563//! # use quick_xml::se::to_string;
1564//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1565//! struct AnyName {
1566//!     #[serde(rename = "$value")]
1567//!     field: Vec<usize>,
1568//! }
1569//!
1570//! let obj = AnyName { field: vec![1, 2, 3] };
1571//! let xml = to_string(&obj).unwrap();
1572//! // Note, that types that does not produce their own tag are written as is!
1573//! assert_eq!(xml, "<AnyName>123</AnyName>");
1574//!
1575//! let object: AnyName = from_str("<AnyName>123</AnyName>").unwrap();
1576//! assert_eq!(object, AnyName { field: vec![123] });
1577//!
1578//! // `1 2 3` is mapped to a single `usize` element
1579//! // It is impossible to deserialize list of primitives to such field
1580//! from_str::<AnyName>("<AnyName>1 2 3</AnyName>").unwrap_err();
1581//! ```
1582//!
1583//! A particular case of that example is a string `$value` field, which probably
1584//! would be a most used example of that attribute:
1585//!
1586//! ```
1587//! # use serde::{Deserialize, Serialize};
1588//! # use pretty_assertions::assert_eq;
1589//! # use quick_xml::de::from_str;
1590//! # use quick_xml::se::to_string;
1591//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1592//! struct AnyName {
1593//!     #[serde(rename = "$value")]
1594//!     field: String,
1595//! }
1596//!
1597//! let obj = AnyName { field: "content".to_string() };
1598//! let xml = to_string(&obj).unwrap();
1599//! assert_eq!(xml, "<AnyName>content</AnyName>");
1600//! ```
1601//!
1602//! ### Structs and sequences of structs
1603//!
1604//! Note, that structures do not have a serializable name as well (name of the
1605//! type is never used), so it is impossible to serialize non-unit struct or
1606//! sequence of non-unit structs in `$value` field. (sequences of) unit structs
1607//! are serialized as empty string, because units itself serializing
1608//! to nothing:
1609//!
1610//! ```
1611//! # use serde::{Deserialize, Serialize};
1612//! # use pretty_assertions::assert_eq;
1613//! # use quick_xml::de::from_str;
1614//! # use quick_xml::se::to_string;
1615//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1616//! struct Unit;
1617//!
1618//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1619//! struct AnyName {
1620//!     // #[serde(default)] is required to deserialization of empty lists
1621//!     // This is a general note, not related to $value
1622//!     #[serde(rename = "$value", default)]
1623//!     field: Vec<Unit>,
1624//! }
1625//!
1626//! let obj = AnyName { field: vec![Unit, Unit, Unit] };
1627//! let xml = to_string(&obj).unwrap();
1628//! assert_eq!(xml, "<AnyName/>");
1629//!
1630//! let object: AnyName = from_str("<AnyName/>").unwrap();
1631//! assert_eq!(object, AnyName { field: vec![] });
1632//!
1633//! let object: AnyName = from_str("<AnyName></AnyName>").unwrap();
1634//! assert_eq!(object, AnyName { field: vec![] });
1635//!
1636//! let object: AnyName = from_str("<AnyName><A/><B/><C/></AnyName>").unwrap();
1637//! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] });
1638//! ```
1639//!
1640//! ### Enums and sequences of enums
1641//!
1642//! Enumerations uses the variant name as an element name:
1643//!
1644//! ```
1645//! # use serde::{Deserialize, Serialize};
1646//! # use pretty_assertions::assert_eq;
1647//! # use quick_xml::de::from_str;
1648//! # use quick_xml::se::to_string;
1649//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1650//! struct AnyName {
1651//!     #[serde(rename = "$value")]
1652//!     field: Vec<Enum>,
1653//! }
1654//!
1655//! #[derive(Deserialize, Serialize, PartialEq, Debug)]
1656//! enum Enum { A, B, C }
1657//!
1658//! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] };
1659//! let xml = to_string(&obj).unwrap();
1660//! assert_eq!(
1661//!     xml,
1662//!     "<AnyName>\
1663//!         <A/>\
1664//!         <B/>\
1665//!         <C/>\
1666//!      </AnyName>"
1667//! );
1668//!
1669//! let object: AnyName = from_str(&xml).unwrap();
1670//! assert_eq!(object, obj);
1671//! ```
1672//!
1673//! ----------------------------------------------------------------------------
1674//!
1675//! You can have either `$text` or `$value` field in your structs. Unfortunately,
1676//! that is not enforced, so you can theoretically have both, but you should
1677//! avoid that.
1678//!
1679//!
1680//!
1681//! Frequently Used Patterns
1682//! ========================
1683//!
1684//! Some XML constructs used so frequent, that it is worth to document the recommended
1685//! way to represent them in the Rust. The sections below describes them.
1686//!
1687//! `<element>` lists
1688//! -----------------
1689//! Many XML formats wrap lists of elements in the additional container,
1690//! although this is not required by the XML rules:
1691//!
1692//! ```xml
1693//! <root>
1694//!   <field1/>
1695//!   <field2/>
1696//!   <list><!-- Container -->
1697//!     <element/>
1698//!     <element/>
1699//!     <element/>
1700//!   </list>
1701//!   <field3/>
1702//! </root>
1703//! ```
1704//! In this case, there is a great desire to describe this XML in this way:
1705//! ```
1706//! /// Represents <element/>
1707//! type Element = ();
1708//!
1709//! /// Represents <root>...</root>
1710//! struct AnyName {
1711//!     // Incorrect
1712//!     list: Vec<Element>,
1713//! }
1714//! ```
1715//! This will not work, because potentially `<list>` element can have attributes
1716//! and other elements inside. You should define the struct for the `<list>`
1717//! explicitly, as you do that in the XSD for that XML:
1718//! ```
1719//! /// Represents <element/>
1720//! type Element = ();
1721//!
1722//! /// Represents <root>...</root>
1723//! struct AnyName {
1724//!     // Correct
1725//!     list: List,
1726//! }
1727//! /// Represents <list>...</list>
1728//! struct List {
1729//!     element: Vec<Element>,
1730//! }
1731//! ```
1732//!
1733//! If you want to simplify your API, you could write a simple function for unwrapping
1734//! inner list and apply it via [`deserialize_with`]:
1735//!
1736//! ```
1737//! # use pretty_assertions::assert_eq;
1738//! use quick_xml::de::from_str;
1739//! use serde::{Deserialize, Deserializer};
1740//!
1741//! /// Represents <element/>
1742//! type Element = ();
1743//!
1744//! /// Represents <root>...</root>
1745//! #[derive(Deserialize, Debug, PartialEq)]
1746//! struct AnyName {
1747//!     #[serde(deserialize_with = "unwrap_list")]
1748//!     list: Vec<Element>,
1749//! }
1750//!
1751//! fn unwrap_list<'de, D>(deserializer: D) -> Result<Vec<Element>, D::Error>
1752//! where
1753//!     D: Deserializer<'de>,
1754//! {
1755//!     /// Represents <list>...</list>
1756//!     #[derive(Deserialize)]
1757//!     struct List {
1758//!         // default allows empty list
1759//!         #[serde(default)]
1760//!         element: Vec<Element>,
1761//!     }
1762//!     Ok(List::deserialize(deserializer)?.element)
1763//! }
1764//!
1765//! assert_eq!(
1766//!     AnyName { list: vec![(), (), ()] },
1767//!     from_str("
1768//!         <root>
1769//!           <list>
1770//!             <element/>
1771//!             <element/>
1772//!             <element/>
1773//!           </list>
1774//!         </root>
1775//!     ").unwrap(),
1776//! );
1777//! ```
1778//!
1779//! Instead of writing such functions manually, you also could try <https://lib.rs/crates/serde-query>.
1780//!
1781//! Overlapped (Out-of-Order) Elements
1782//! ----------------------------------
1783//! In the case that the list might contain tags that are overlapped with
1784//! tags that do not correspond to the list (this is a usual case in XML
1785//! documents) like this:
1786//! ```xml
1787//! <any-name>
1788//!   <item/>
1789//!   <another-item/>
1790//!   <item/>
1791//!   <item/>
1792//! </any-name>
1793//! ```
1794//! you should enable the [`overlapped-lists`] feature to make it possible
1795//! to deserialize this to:
1796//! ```no_run
1797//! # use serde::Deserialize;
1798//! #[derive(Deserialize)]
1799//! #[serde(rename_all = "kebab-case")]
1800//! struct AnyName {
1801//!     item: Vec<()>,
1802//!     another_item: (),
1803//! }
1804//! ```
1805//!
1806//!
1807//! Internally Tagged Enums
1808//! -----------------------
1809//! [Tagged enums] are currently not supported because of an issue in the Serde
1810//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in
1811//! Serde which could be useful for XML parsing ([serde#1495]). This can be worked
1812//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]`
1813//! or implementing [`Deserialize`], but this can get very tedious very fast for
1814//! files with large amounts of tagged enums. To help with this issue quick-xml
1815//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the
1816//! macro documentation for details.
1817//!
1818//!
1819//! [`overlapped-lists`]: ../index.html#overlapped-lists
1820//! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition
1821//! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with
1822//! [#497]: https://github.com/tafia/quick-xml/issues/497
1823//! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant
1824//! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum
1825//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged
1826//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183
1827//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495
1828//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586
1829//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum
1830
1831// Macros should be defined before the modules that using them
1832// Also, macros should be imported before using them
1833use serde::serde_if_integer128;
1834
1835macro_rules! deserialize_type {
1836    ($deserialize:ident => $visit:ident, $($mut:tt)?) => {
1837        fn $deserialize<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1838        where
1839            V: Visitor<'de>,
1840        {
1841            // No need to unescape because valid integer representations cannot be escaped
1842            let text = self.read_string()?;
1843            visitor.$visit(text.parse()?)
1844        }
1845    };
1846}
1847
1848/// Implement deserialization methods for scalar types, such as numbers, strings,
1849/// byte arrays, booleans and identifiers.
1850macro_rules! deserialize_primitives {
1851    ($($mut:tt)?) => {
1852        deserialize_type!(deserialize_i8 => visit_i8, $($mut)?);
1853        deserialize_type!(deserialize_i16 => visit_i16, $($mut)?);
1854        deserialize_type!(deserialize_i32 => visit_i32, $($mut)?);
1855        deserialize_type!(deserialize_i64 => visit_i64, $($mut)?);
1856
1857        deserialize_type!(deserialize_u8 => visit_u8, $($mut)?);
1858        deserialize_type!(deserialize_u16 => visit_u16, $($mut)?);
1859        deserialize_type!(deserialize_u32 => visit_u32, $($mut)?);
1860        deserialize_type!(deserialize_u64 => visit_u64, $($mut)?);
1861
1862        serde_if_integer128! {
1863            deserialize_type!(deserialize_i128 => visit_i128, $($mut)?);
1864            deserialize_type!(deserialize_u128 => visit_u128, $($mut)?);
1865        }
1866
1867        deserialize_type!(deserialize_f32 => visit_f32, $($mut)?);
1868        deserialize_type!(deserialize_f64 => visit_f64, $($mut)?);
1869
1870        fn deserialize_bool<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1871        where
1872            V: Visitor<'de>,
1873        {
1874            let text = self.read_string()?;
1875
1876            str2bool(&text, visitor)
1877        }
1878
1879        /// Character represented as [strings](#method.deserialize_str).
1880        #[inline]
1881        fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, DeError>
1882        where
1883            V: Visitor<'de>,
1884        {
1885            self.deserialize_str(visitor)
1886        }
1887
1888        fn deserialize_str<V>($($mut)? self, visitor: V) -> Result<V::Value, DeError>
1889        where
1890            V: Visitor<'de>,
1891        {
1892            let text = self.read_string()?;
1893            match text {
1894                Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
1895                Cow::Owned(string) => visitor.visit_string(string),
1896            }
1897        }
1898
1899        /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
1900        #[inline]
1901        fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, DeError>
1902        where
1903            V: Visitor<'de>,
1904        {
1905            self.deserialize_str(visitor)
1906        }
1907
1908        /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any).
1909        #[inline]
1910        fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, DeError>
1911        where
1912            V: Visitor<'de>,
1913        {
1914            self.deserialize_any(visitor)
1915        }
1916
1917        /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes).
1918        #[inline]
1919        fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
1920        where
1921            V: Visitor<'de>,
1922        {
1923            self.deserialize_bytes(visitor)
1924        }
1925
1926        /// Representation of the named units the same as [unnamed units](#method.deserialize_unit).
1927        #[inline]
1928        fn deserialize_unit_struct<V>(
1929            self,
1930            _name: &'static str,
1931            visitor: V,
1932        ) -> Result<V::Value, DeError>
1933        where
1934            V: Visitor<'de>,
1935        {
1936            self.deserialize_unit(visitor)
1937        }
1938
1939        /// Representation of tuples the same as [sequences](#method.deserialize_seq).
1940        #[inline]
1941        fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
1942        where
1943            V: Visitor<'de>,
1944        {
1945            self.deserialize_seq(visitor)
1946        }
1947
1948        /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
1949        #[inline]
1950        fn deserialize_tuple_struct<V>(
1951            self,
1952            _name: &'static str,
1953            len: usize,
1954            visitor: V,
1955        ) -> Result<V::Value, DeError>
1956        where
1957            V: Visitor<'de>,
1958        {
1959            self.deserialize_tuple(len, visitor)
1960        }
1961
1962        /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct)
1963        /// with empty name and fields.
1964        #[inline]
1965        fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, DeError>
1966        where
1967            V: Visitor<'de>,
1968        {
1969            self.deserialize_struct("", &[], visitor)
1970        }
1971
1972        /// Identifiers represented as [strings](#method.deserialize_str).
1973        #[inline]
1974        fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, DeError>
1975        where
1976            V: Visitor<'de>,
1977        {
1978            self.deserialize_str(visitor)
1979        }
1980
1981        /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit).
1982        #[inline]
1983        fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, DeError>
1984        where
1985            V: Visitor<'de>,
1986        {
1987            self.deserialize_unit(visitor)
1988        }
1989    };
1990}
1991
1992mod key;
1993mod map;
1994mod resolver;
1995mod simple_type;
1996mod text;
1997mod var;
1998
1999pub use crate::errors::serialize::DeError;
2000pub use resolver::{EntityResolver, PredefinedEntityResolver};
2001
2002use crate::{
2003    de::map::ElementMapAccess,
2004    encoding::Decoder,
2005    errors::Error,
2006    events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
2007    name::QName,
2008    reader::Reader,
2009};
2010use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
2011use std::borrow::Cow;
2012#[cfg(feature = "overlapped-lists")]
2013use std::collections::VecDeque;
2014use std::io::BufRead;
2015use std::mem::replace;
2016#[cfg(feature = "overlapped-lists")]
2017use std::num::NonZeroUsize;
2018use std::ops::Deref;
2019
2020/// Data represented by a text node or a CDATA node. XML markup is not expected
2021pub(crate) const TEXT_KEY: &str = "$text";
2022/// Data represented by any XML markup inside
2023pub(crate) const VALUE_KEY: &str = "$value";
2024
2025/// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2026/// events. _Consequent_ means that events should follow each other or be
2027/// delimited only by (any count of) [`Comment`] or [`PI`] events.
2028///
2029/// Internally text is stored in `Cow<str>`. Cloning of text is cheap while it
2030/// is borrowed and makes copies of data when it is owned.
2031///
2032/// [`Text`]: Event::Text
2033/// [`CData`]: Event::CData
2034/// [`Comment`]: Event::Comment
2035/// [`PI`]: Event::PI
2036#[derive(Clone, Debug, PartialEq, Eq)]
2037pub struct Text<'a> {
2038    text: Cow<'a, str>,
2039}
2040
2041impl<'a> Deref for Text<'a> {
2042    type Target = str;
2043
2044    #[inline]
2045    fn deref(&self) -> &Self::Target {
2046        self.text.deref()
2047    }
2048}
2049
2050impl<'a> From<&'a str> for Text<'a> {
2051    #[inline]
2052    fn from(text: &'a str) -> Self {
2053        Self {
2054            text: Cow::Borrowed(text),
2055        }
2056    }
2057}
2058
2059////////////////////////////////////////////////////////////////////////////////////////////////////
2060
2061/// Simplified event which contains only these variants that used by deserializer
2062#[derive(Clone, Debug, PartialEq, Eq)]
2063pub enum DeEvent<'a> {
2064    /// Start tag (with attributes) `<tag attr="value">`.
2065    Start(BytesStart<'a>),
2066    /// End tag `</tag>`.
2067    End(BytesEnd<'a>),
2068    /// Decoded and concatenated content of consequent [`Text`] and [`CData`]
2069    /// events. _Consequent_ means that events should follow each other or be
2070    /// delimited only by (any count of) [`Comment`] or [`PI`] events.
2071    ///
2072    /// [`Text`]: Event::Text
2073    /// [`CData`]: Event::CData
2074    /// [`Comment`]: Event::Comment
2075    /// [`PI`]: Event::PI
2076    Text(Text<'a>),
2077    /// End of XML document.
2078    Eof,
2079}
2080
2081////////////////////////////////////////////////////////////////////////////////////////////////////
2082
2083/// Simplified event which contains only these variants that used by deserializer,
2084/// but [`Text`] events not yet fully processed.
2085///
2086/// [`Text`] events should be trimmed if they does not surrounded by the other
2087/// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`]
2088/// event, where they are trimmed from the start, but not from the end. To trim
2089/// end spaces we should lookahead by one deserializer event (i. e. skip all
2090/// comments and processing instructions).
2091///
2092/// [`Text`]: Event::Text
2093/// [`CData`]: Event::CData
2094#[derive(Clone, Debug, PartialEq, Eq)]
2095pub enum PayloadEvent<'a> {
2096    /// Start tag (with attributes) `<tag attr="value">`.
2097    Start(BytesStart<'a>),
2098    /// End tag `</tag>`.
2099    End(BytesEnd<'a>),
2100    /// Escaped character data between tags.
2101    Text(BytesText<'a>),
2102    /// Unescaped character data stored in `<![CDATA[...]]>`.
2103    CData(BytesCData<'a>),
2104    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
2105    DocType(BytesText<'a>),
2106    /// End of XML document.
2107    Eof,
2108}
2109
2110impl<'a> PayloadEvent<'a> {
2111    /// Ensures that all data is owned to extend the object's lifetime if necessary.
2112    #[inline]
2113    fn into_owned(self) -> PayloadEvent<'static> {
2114        match self {
2115            PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()),
2116            PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()),
2117            PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()),
2118            PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()),
2119            PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()),
2120            PayloadEvent::Eof => PayloadEvent::Eof,
2121        }
2122    }
2123}
2124
2125/// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s.
2126/// [`PayloadEvent::Text`] events, that followed by any event except
2127/// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end.
2128struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> {
2129    /// A source of low-level XML events
2130    reader: R,
2131    /// Intermediate event, that could be returned by the next call to `next()`.
2132    /// If that is the `Text` event then leading spaces already trimmed, but
2133    /// trailing spaces is not. Before the event will be returned, trimming of
2134    /// the spaces could be necessary
2135    lookahead: Result<PayloadEvent<'i>, DeError>,
2136
2137    /// Used to resolve unknown entities that would otherwise cause the parser
2138    /// to return an [`EscapeError::UnrecognizedEntity`] error.
2139    ///
2140    /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity
2141    entity_resolver: E,
2142}
2143
2144impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
2145    fn new(mut reader: R, entity_resolver: E) -> Self {
2146        // Lookahead by one event immediately, so we do not need to check in the
2147        // loop if we need lookahead or not
2148        let lookahead = reader.next();
2149
2150        Self {
2151            reader,
2152            lookahead,
2153            entity_resolver,
2154        }
2155    }
2156
2157    /// Returns `true` if all events was consumed
2158    const fn is_empty(&self) -> bool {
2159        matches!(self.lookahead, Ok(PayloadEvent::Eof))
2160    }
2161
2162    /// Read next event and put it in lookahead, return the current lookahead
2163    #[inline(always)]
2164    fn next_impl(&mut self) -> Result<PayloadEvent<'i>, DeError> {
2165        replace(&mut self.lookahead, self.reader.next())
2166    }
2167
2168    /// Returns `true` when next event is not a text event in any form.
2169    #[inline(always)]
2170    const fn current_event_is_last_text(&self) -> bool {
2171        // If next event is a text or CDATA, we should not trim trailing spaces
2172        !matches!(
2173            self.lookahead,
2174            Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_))
2175        )
2176    }
2177
2178    /// Read all consequent [`Text`] and [`CData`] events until non-text event
2179    /// occurs. Content of all events would be appended to `result` and returned
2180    /// as [`DeEvent::Text`].
2181    ///
2182    /// [`Text`]: PayloadEvent::Text
2183    /// [`CData`]: PayloadEvent::CData
2184    fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result<DeEvent<'i>, DeError> {
2185        loop {
2186            if self.current_event_is_last_text() {
2187                break;
2188            }
2189
2190            match self.next_impl()? {
2191                PayloadEvent::Text(mut e) => {
2192                    if self.current_event_is_last_text() {
2193                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2194                        e.inplace_trim_end();
2195                    }
2196                    result
2197                        .to_mut()
2198                        .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?);
2199                }
2200                PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2201
2202                // SAFETY: current_event_is_last_text checks that event is Text or CData
2203                _ => unreachable!("Only `Text` and `CData` events can come here"),
2204            }
2205        }
2206        Ok(DeEvent::Text(Text { text: result }))
2207    }
2208
2209    /// Return an input-borrowing event.
2210    fn next(&mut self) -> Result<DeEvent<'i>, DeError> {
2211        loop {
2212            return match self.next_impl()? {
2213                PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
2214                PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2215                PayloadEvent::Text(mut e) => {
2216                    if self.current_event_is_last_text() && e.inplace_trim_end() {
2217                        // FIXME: Actually, we should trim after decoding text, but now we trim before
2218                        continue;
2219                    }
2220                    self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?)
2221                }
2222                PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2223                PayloadEvent::DocType(e) => {
2224                    self.entity_resolver
2225                        .capture(e)
2226                        .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?;
2227                    continue;
2228                }
2229                PayloadEvent::Eof => Ok(DeEvent::Eof),
2230            };
2231        }
2232    }
2233
2234    #[inline]
2235    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2236        match self.lookahead {
2237            // We pre-read event with the same name that is required to be skipped.
2238            // First call of `read_to_end` will end out pre-read event, the second
2239            // will consume other events
2240            Ok(PayloadEvent::Start(ref e)) if e.name() == name => {
2241                let result1 = self.reader.read_to_end(name);
2242                let result2 = self.reader.read_to_end(name);
2243
2244                // In case of error `next_impl` returns `Eof`
2245                let _ = self.next_impl();
2246                result1?;
2247                result2?;
2248            }
2249            // We pre-read event with the same name that is required to be skipped.
2250            // Because this is end event, we already consume the whole tree, so
2251            // nothing to do, just update lookahead
2252            Ok(PayloadEvent::End(ref e)) if e.name() == name => {
2253                let _ = self.next_impl();
2254            }
2255            Ok(_) => {
2256                let result = self.reader.read_to_end(name);
2257
2258                // In case of error `next_impl` returns `Eof`
2259                let _ = self.next_impl();
2260                result?;
2261            }
2262            // Read next lookahead event, unpack error from the current lookahead
2263            Err(_) => {
2264                self.next_impl()?;
2265            }
2266        }
2267        Ok(())
2268    }
2269
2270    #[inline]
2271    fn decoder(&self) -> Decoder {
2272        self.reader.decoder()
2273    }
2274}
2275
2276////////////////////////////////////////////////////////////////////////////////////////////////////
2277
2278/// Deserialize an instance of type `T` from a string of XML text.
2279pub fn from_str<'de, T>(s: &'de str) -> Result<T, DeError>
2280where
2281    T: Deserialize<'de>,
2282{
2283    let mut de = Deserializer::from_str(s);
2284    T::deserialize(&mut de)
2285}
2286
2287/// Deserialize from a reader. This method will do internal copies of data
2288/// readed from `reader`. If you want have a `&str` input and want to borrow
2289/// as much as possible, use [`from_str`].
2290pub fn from_reader<R, T>(reader: R) -> Result<T, DeError>
2291where
2292    R: BufRead,
2293    T: DeserializeOwned,
2294{
2295    let mut de = Deserializer::from_reader(reader);
2296    T::deserialize(&mut de)
2297}
2298
2299// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
2300// valid boolean representations are only "true", "false", "1", and "0"
2301fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
2302where
2303    V: de::Visitor<'de>,
2304{
2305    match value {
2306        "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
2307            visitor.visit_bool(true)
2308        }
2309        "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
2310            visitor.visit_bool(false)
2311        }
2312        _ => Err(DeError::InvalidBoolean(value.into())),
2313    }
2314}
2315
2316fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
2317where
2318    V: Visitor<'de>,
2319{
2320    #[cfg(feature = "encoding")]
2321    {
2322        let value = decoder.decode(value)?;
2323        // No need to unescape because valid boolean representations cannot be escaped
2324        str2bool(value.as_ref(), visitor)
2325    }
2326
2327    #[cfg(not(feature = "encoding"))]
2328    {
2329        // No need to unescape because valid boolean representations cannot be escaped
2330        match value {
2331            b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => {
2332                visitor.visit_bool(true)
2333            }
2334            b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => {
2335                visitor.visit_bool(false)
2336            }
2337            e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())),
2338        }
2339    }
2340}
2341
2342////////////////////////////////////////////////////////////////////////////////////////////////////
2343
2344/// A structure that deserializes XML into Rust values.
2345pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver>
2346where
2347    R: XmlRead<'de>,
2348{
2349    /// An XML reader that streams events into this deserializer
2350    reader: XmlReader<'de, R, E>,
2351
2352    /// When deserializing sequences sometimes we have to skip unwanted events.
2353    /// That events should be stored and then replayed. This is a replay buffer,
2354    /// that streams events while not empty. When it exhausted, events will
2355    /// requested from [`Self::reader`].
2356    #[cfg(feature = "overlapped-lists")]
2357    read: VecDeque<DeEvent<'de>>,
2358    /// When deserializing sequences sometimes we have to skip events, because XML
2359    /// is tolerant to elements order and even if in the XSD order is strictly
2360    /// specified (using `xs:sequence`) most of XML parsers allows order violations.
2361    /// That means, that elements, forming a sequence, could be overlapped with
2362    /// other elements, do not related to that sequence.
2363    ///
2364    /// In order to support this, deserializer will scan events and skip unwanted
2365    /// events, store them here. After call [`Self::start_replay()`] all events
2366    /// moved from this to [`Self::read`].
2367    #[cfg(feature = "overlapped-lists")]
2368    write: VecDeque<DeEvent<'de>>,
2369    /// Maximum number of events that can be skipped when processing sequences
2370    /// that occur out-of-order. This field is used to prevent potential
2371    /// denial-of-service (DoS) attacks which could cause infinite memory
2372    /// consumption when parsing a very large amount of XML into a sequence field.
2373    #[cfg(feature = "overlapped-lists")]
2374    limit: Option<NonZeroUsize>,
2375
2376    #[cfg(not(feature = "overlapped-lists"))]
2377    peek: Option<DeEvent<'de>>,
2378
2379    /// Buffer to store attribute name as a field name exposed to serde consumers
2380    key_buf: String,
2381}
2382
2383impl<'de, R, E> Deserializer<'de, R, E>
2384where
2385    R: XmlRead<'de>,
2386    E: EntityResolver,
2387{
2388    /// Create an XML deserializer from one of the possible quick_xml input sources.
2389    ///
2390    /// Typically it is more convenient to use one of these methods instead:
2391    ///
2392    ///  - [`Deserializer::from_str`]
2393    ///  - [`Deserializer::from_reader`]
2394    fn new(reader: R, entity_resolver: E) -> Self {
2395        Self {
2396            reader: XmlReader::new(reader, entity_resolver),
2397
2398            #[cfg(feature = "overlapped-lists")]
2399            read: VecDeque::new(),
2400            #[cfg(feature = "overlapped-lists")]
2401            write: VecDeque::new(),
2402            #[cfg(feature = "overlapped-lists")]
2403            limit: None,
2404
2405            #[cfg(not(feature = "overlapped-lists"))]
2406            peek: None,
2407
2408            key_buf: String::new(),
2409        }
2410    }
2411
2412    /// Returns `true` if all events was consumed.
2413    pub fn is_empty(&self) -> bool {
2414        #[cfg(feature = "overlapped-lists")]
2415        if self.read.is_empty() {
2416            return self.reader.is_empty();
2417        }
2418        #[cfg(not(feature = "overlapped-lists"))]
2419        if self.peek.is_none() {
2420            return self.reader.is_empty();
2421        }
2422        false
2423    }
2424
2425    /// Returns the underlying XML reader.
2426    ///
2427    /// ```
2428    /// # use pretty_assertions::assert_eq;
2429    /// use serde::Deserialize;
2430    /// use quick_xml::de::Deserializer;
2431    /// use quick_xml::Reader;
2432    ///
2433    /// #[derive(Deserialize)]
2434    /// struct SomeStruct {
2435    ///     field1: String,
2436    ///     field2: String,
2437    /// }
2438    ///
2439    /// // Try to deserialize from broken XML
2440    /// let mut de = Deserializer::from_str(
2441    ///     "<SomeStruct><field1><field2></SomeStruct>"
2442    /// //   0                           ^= 28        ^= 41
2443    /// );
2444    ///
2445    /// let err = SomeStruct::deserialize(&mut de);
2446    /// assert!(err.is_err());
2447    ///
2448    /// let reader: &Reader<_> = de.get_ref().get_ref();
2449    ///
2450    /// assert_eq!(reader.error_position(), 28);
2451    /// assert_eq!(reader.buffer_position(), 41);
2452    /// ```
2453    pub const fn get_ref(&self) -> &R {
2454        &self.reader.reader
2455    }
2456
2457    /// Set the maximum number of events that could be skipped during deserialization
2458    /// of sequences.
2459    ///
2460    /// If `<element>` contains more than specified nested elements, `$text` or
2461    /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
2462    /// deserialization of sequence field (any type that uses [`deserialize_seq`]
2463    /// for the deserialization, for example, `Vec<T>`).
2464    ///
2465    /// This method can be used to prevent a [DoS] attack and infinite memory
2466    /// consumption when parsing a very large XML to a sequence field.
2467    ///
2468    /// It is strongly recommended to set limit to some value when you parse data
2469    /// from untrusted sources. You should choose a value that your typical XMLs
2470    /// can have _between_ different elements that corresponds to the same sequence.
2471    ///
2472    /// # Examples
2473    ///
2474    /// Let's imagine, that we deserialize such structure:
2475    /// ```
2476    /// struct List {
2477    ///   item: Vec<()>,
2478    /// }
2479    /// ```
2480    ///
2481    /// The XML that we try to parse look like this:
2482    /// ```xml
2483    /// <any-name>
2484    ///   <item/>
2485    ///   <!-- Bufferization starts at this point -->
2486    ///   <another-item>
2487    ///     <some-element>with text</some-element>
2488    ///     <yet-another-element/>
2489    ///   </another-item>
2490    ///   <!-- Buffer will be emptied at this point; 7 events were buffered -->
2491    ///   <item/>
2492    ///   <!-- There is nothing to buffer, because elements follows each other -->
2493    ///   <item/>
2494    /// </any-name>
2495    /// ```
2496    ///
2497    /// There, when we deserialize the `item` field, we need to buffer 7 events,
2498    /// before we can deserialize the second `<item/>`:
2499    ///
2500    /// - `<another-item>`
2501    /// - `<some-element>`
2502    /// - `$text(with text)`
2503    /// - `</some-element>`
2504    /// - `<yet-another-element/>` (virtual start event)
2505    /// - `<yet-another-element/>` (virtual end event)
2506    /// - `</another-item>`
2507    ///
2508    /// Note, that `<yet-another-element/>` internally represented as 2 events:
2509    /// one for the start tag and one for the end tag. In the future this can be
2510    /// eliminated, but for now we use [auto-expanding feature] of a reader,
2511    /// because this simplifies deserializer code.
2512    ///
2513    /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
2514    /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
2515    /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
2516    #[cfg(feature = "overlapped-lists")]
2517    pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
2518        self.limit = limit;
2519        self
2520    }
2521
2522    #[cfg(feature = "overlapped-lists")]
2523    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2524        if self.read.is_empty() {
2525            self.read.push_front(self.reader.next()?);
2526        }
2527        if let Some(event) = self.read.front() {
2528            return Ok(event);
2529        }
2530        // SAFETY: `self.read` was filled in the code above.
2531        // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2532        // if unsafe code will be allowed
2533        unreachable!()
2534    }
2535    #[cfg(not(feature = "overlapped-lists"))]
2536    fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
2537        if self.peek.is_none() {
2538            self.peek = Some(self.reader.next()?);
2539        }
2540        match self.peek.as_ref() {
2541            Some(v) => Ok(v),
2542            // SAFETY: a `None` variant for `self.peek` would have been replaced
2543            // by a `Some` variant in the code above.
2544            // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
2545            // if unsafe code will be allowed
2546            None => unreachable!(),
2547        }
2548    }
2549
2550    fn next(&mut self) -> Result<DeEvent<'de>, DeError> {
2551        // Replay skipped or peeked events
2552        #[cfg(feature = "overlapped-lists")]
2553        if let Some(event) = self.read.pop_front() {
2554            return Ok(event);
2555        }
2556        #[cfg(not(feature = "overlapped-lists"))]
2557        if let Some(e) = self.peek.take() {
2558            return Ok(e);
2559        }
2560        self.reader.next()
2561    }
2562
2563    /// Returns the mark after which all events, skipped by [`Self::skip()`] call,
2564    /// should be replayed after calling [`Self::start_replay()`].
2565    #[cfg(feature = "overlapped-lists")]
2566    #[inline]
2567    #[must_use = "returned checkpoint should be used in `start_replay`"]
2568    fn skip_checkpoint(&self) -> usize {
2569        self.write.len()
2570    }
2571
2572    /// Extracts XML tree of events from and stores them in the skipped events
2573    /// buffer from which they can be retrieved later. You MUST call
2574    /// [`Self::start_replay()`] after calling this to give access to the skipped
2575    /// events and release internal buffers.
2576    #[cfg(feature = "overlapped-lists")]
2577    fn skip(&mut self) -> Result<(), DeError> {
2578        let event = self.next()?;
2579        self.skip_event(event)?;
2580        match self.write.back() {
2581            // Skip all subtree, if we skip a start event
2582            Some(DeEvent::Start(e)) => {
2583                let end = e.name().as_ref().to_owned();
2584                let mut depth = 0;
2585                loop {
2586                    let event = self.next()?;
2587                    match event {
2588                        DeEvent::Start(ref e) if e.name().as_ref() == end => {
2589                            self.skip_event(event)?;
2590                            depth += 1;
2591                        }
2592                        DeEvent::End(ref e) if e.name().as_ref() == end => {
2593                            self.skip_event(event)?;
2594                            if depth == 0 {
2595                                break;
2596                            }
2597                            depth -= 1;
2598                        }
2599                        DeEvent::Eof => {
2600                            self.skip_event(event)?;
2601                            break;
2602                        }
2603                        _ => self.skip_event(event)?,
2604                    }
2605                }
2606            }
2607            _ => (),
2608        }
2609        Ok(())
2610    }
2611
2612    #[cfg(feature = "overlapped-lists")]
2613    #[inline]
2614    fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
2615        if let Some(max) = self.limit {
2616            if self.write.len() >= max.get() {
2617                return Err(DeError::TooManyEvents(max));
2618            }
2619        }
2620        self.write.push_back(event);
2621        Ok(())
2622    }
2623
2624    /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`]
2625    /// skip buffer to [`Self::read`] buffer.
2626    ///
2627    /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
2628    /// return events that was skipped previously by calling [`Self::skip()`],
2629    /// and only when all that events will be consumed, the deserializer starts
2630    /// to drain events from underlying reader.
2631    ///
2632    /// This method MUST be called if any number of [`Self::skip()`] was called
2633    /// after [`Self::new()`] or `start_replay()` or you'll lost events.
2634    #[cfg(feature = "overlapped-lists")]
2635    fn start_replay(&mut self, checkpoint: usize) {
2636        if checkpoint == 0 {
2637            self.write.append(&mut self.read);
2638            std::mem::swap(&mut self.read, &mut self.write);
2639        } else {
2640            let mut read = self.write.split_off(checkpoint);
2641            read.append(&mut self.read);
2642            self.read = read;
2643        }
2644    }
2645
2646    #[inline]
2647    fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
2648        self.read_string_impl(true)
2649    }
2650
2651    /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_)
2652    /// events, merge them into one string. If there are no such events, returns
2653    /// an empty string.
2654    ///
2655    /// If `allow_start` is `false`, then only text events are consumed, for other
2656    /// events an error is returned (see table below).
2657    ///
2658    /// If `allow_start` is `true`, then two or three events are expected:
2659    /// - [`DeEvent::Start`];
2660    /// - _(optional)_ [`DeEvent::Text`] which content is returned;
2661    /// - [`DeEvent::End`]. If text event was missed, an empty string is returned.
2662    ///
2663    /// Corresponding events are consumed.
2664    ///
2665    /// # Handling events
2666    ///
2667    /// The table below shows how events is handled by this method:
2668    ///
2669    /// |Event             |XML                        |Handling
2670    /// |------------------|---------------------------|----------------------------------------
2671    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart)
2672    /// |[`DeEvent::End`]  |`</any-tag>`               |This is impossible situation, the method will panic if it happens
2673    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged
2674    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2675    ///
2676    /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`:
2677    ///
2678    /// |Event             |XML                        |Handling
2679    /// |------------------|---------------------------|----------------------------------------------------------------------------------
2680    /// |[`DeEvent::Start`]|`<any-tag>...</any-tag>`   |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
2681    /// |[`DeEvent::End`]  |`</tag>`                   |Returns an empty slice. The reader guarantee that tag will match the open one
2682    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
2683    /// |[`DeEvent::Eof`]  |                           |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
2684    ///
2685    /// [`Text`]: Event::Text
2686    /// [`CData`]: Event::CData
2687    fn read_string_impl(&mut self, allow_start: bool) -> Result<Cow<'de, str>, DeError> {
2688        match self.next()? {
2689            DeEvent::Text(e) => Ok(e.text),
2690            // allow one nested level
2691            DeEvent::Start(e) if allow_start => self.read_text(e.name()),
2692            DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2693            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2694            // If we here, then out deserializer has a bug
2695            DeEvent::End(e) => unreachable!("{:?}", e),
2696            DeEvent::Eof => Err(DeError::UnexpectedEof),
2697        }
2698    }
2699    /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the
2700    /// [`DeEvent::End`] event.
2701    ///
2702    /// # Parameters
2703    /// - `name`: name of a tag opened before reading text. The corresponding end tag
2704    ///   should present in input just after the text
2705    fn read_text(&mut self, name: QName) -> Result<Cow<'de, str>, DeError> {
2706        match self.next()? {
2707            DeEvent::Text(e) => match self.next()? {
2708                // The matching tag name is guaranteed by the reader
2709                DeEvent::End(_) => Ok(e.text),
2710                // SAFETY: Cannot be two consequent Text events, they would be merged into one
2711                DeEvent::Text(_) => unreachable!(),
2712                DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
2713                DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2714            },
2715            // We can get End event in case of `<tag></tag>` or `<tag/>` input
2716            // Return empty text in that case
2717            // The matching tag name is guaranteed by the reader
2718            DeEvent::End(_) => Ok("".into()),
2719            DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())),
2720            DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()),
2721        }
2722    }
2723
2724    /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
2725    /// dropped. This method should be called after [`Self::next()`]
2726    #[cfg(feature = "overlapped-lists")]
2727    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2728        let mut depth = 0;
2729        loop {
2730            match self.read.pop_front() {
2731                Some(DeEvent::Start(e)) if e.name() == name => {
2732                    depth += 1;
2733                }
2734                Some(DeEvent::End(e)) if e.name() == name => {
2735                    if depth == 0 {
2736                        break;
2737                    }
2738                    depth -= 1;
2739                }
2740
2741                // Drop all other skipped events
2742                Some(_) => continue,
2743
2744                // If we do not have skipped events, use effective reading that will
2745                // not allocate memory for events
2746                None => {
2747                    // We should close all opened tags, because we could buffer
2748                    // Start events, but not the corresponding End events. So we
2749                    // keep reading events until we exit all nested tags.
2750                    // `read_to_end()` will return an error if an Eof was encountered
2751                    // preliminary (in case of malformed XML).
2752                    //
2753                    // <tag><tag></tag></tag>
2754                    // ^^^^^^^^^^             - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
2755                    //           ^^^^^^       - read by the first call of `self.reader.read_to_end()`
2756                    //                 ^^^^^^ - read by the second call of `self.reader.read_to_end()`
2757                    loop {
2758                        self.reader.read_to_end(name)?;
2759                        if depth == 0 {
2760                            break;
2761                        }
2762                        depth -= 1;
2763                    }
2764                    break;
2765                }
2766            }
2767        }
2768        Ok(())
2769    }
2770    #[cfg(not(feature = "overlapped-lists"))]
2771    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
2772        // First one might be in self.peek
2773        match self.next()? {
2774            DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
2775            DeEvent::End(e) if e.name() == name => return Ok(()),
2776            _ => (),
2777        }
2778        self.reader.read_to_end(name)
2779    }
2780}
2781
2782impl<'de> Deserializer<'de, SliceReader<'de>> {
2783    /// Create new deserializer that will borrow data from the specified string.
2784    ///
2785    /// Deserializer created with this method will not resolve custom entities.
2786    #[allow(clippy::should_implement_trait)]
2787    pub fn from_str(source: &'de str) -> Self {
2788        Self::from_str_with_resolver(source, PredefinedEntityResolver)
2789    }
2790}
2791
2792impl<'de, E> Deserializer<'de, SliceReader<'de>, E>
2793where
2794    E: EntityResolver,
2795{
2796    /// Create new deserializer that will borrow data from the specified string
2797    /// and use specified entity resolver.
2798    pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
2799        let mut reader = Reader::from_str(source);
2800        let config = reader.config_mut();
2801        config.expand_empty_elements = true;
2802
2803        Self::new(
2804            SliceReader {
2805                reader,
2806                start_trimmer: StartTrimmer::default(),
2807            },
2808            entity_resolver,
2809        )
2810    }
2811}
2812
2813impl<'de, R> Deserializer<'de, IoReader<R>>
2814where
2815    R: BufRead,
2816{
2817    /// Create new deserializer that will copy data from the specified reader
2818    /// into internal buffer.
2819    ///
2820    /// If you already have a string use [`Self::from_str`] instead, because it
2821    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2822    /// UTF-8, you can decode it first before using [`from_str`].
2823    ///
2824    /// Deserializer created with this method will not resolve custom entities.
2825    pub fn from_reader(reader: R) -> Self {
2826        Self::with_resolver(reader, PredefinedEntityResolver)
2827    }
2828}
2829
2830impl<'de, R, E> Deserializer<'de, IoReader<R>, E>
2831where
2832    R: BufRead,
2833    E: EntityResolver,
2834{
2835    /// Create new deserializer that will copy data from the specified reader
2836    /// into internal buffer and use specified entity resolver.
2837    ///
2838    /// If you already have a string use [`Self::from_str`] instead, because it
2839    /// will borrow instead of copy. If you have `&[u8]` which is known to represent
2840    /// UTF-8, you can decode it first before using [`from_str`].
2841    pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
2842        let mut reader = Reader::from_reader(reader);
2843        let config = reader.config_mut();
2844        config.expand_empty_elements = true;
2845
2846        Self::new(
2847            IoReader {
2848                reader,
2849                start_trimmer: StartTrimmer::default(),
2850                buf: Vec::new(),
2851            },
2852            entity_resolver,
2853        )
2854    }
2855}
2856
2857impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E>
2858where
2859    R: XmlRead<'de>,
2860    E: EntityResolver,
2861{
2862    type Error = DeError;
2863
2864    deserialize_primitives!();
2865
2866    fn deserialize_struct<V>(
2867        self,
2868        _name: &'static str,
2869        fields: &'static [&'static str],
2870        visitor: V,
2871    ) -> Result<V::Value, DeError>
2872    where
2873        V: Visitor<'de>,
2874    {
2875        match self.next()? {
2876            DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?),
2877            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2878            // If we here, then out deserializer has a bug
2879            DeEvent::End(e) => unreachable!("{:?}", e),
2880            // Deserializer methods are only hints, if deserializer could not satisfy
2881            // request, it should return the data that it has. It is responsibility
2882            // of a Visitor to return an error if it does not understand the data
2883            DeEvent::Text(e) => match e.text {
2884                Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
2885                Cow::Owned(s) => visitor.visit_string(s),
2886            },
2887            DeEvent::Eof => Err(DeError::UnexpectedEof),
2888        }
2889    }
2890
2891    /// Unit represented in XML as a `xs:element` or text/CDATA content.
2892    /// Any content inside `xs:element` is ignored and skipped.
2893    ///
2894    /// Produces unit struct from any of following inputs:
2895    /// - any `<tag ...>...</tag>`
2896    /// - any `<tag .../>`
2897    /// - any consequent text / CDATA content (can consist of several parts
2898    ///   delimited by comments and processing instructions)
2899    ///
2900    /// # Events handling
2901    ///
2902    /// |Event             |XML                        |Handling
2903    /// |------------------|---------------------------|-------------------------------------------
2904    /// |[`DeEvent::Start`]|`<tag>...</tag>`           |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event
2905    /// |[`DeEvent::End`]  |`</tag>`                   |This is impossible situation, the method will panic if it happens
2906    /// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored
2907    /// |[`DeEvent::Eof`]  |                           |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
2908    fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, DeError>
2909    where
2910        V: Visitor<'de>,
2911    {
2912        match self.next()? {
2913            DeEvent::Start(s) => {
2914                self.read_to_end(s.name())?;
2915                visitor.visit_unit()
2916            }
2917            DeEvent::Text(_) => visitor.visit_unit(),
2918            // SAFETY: The reader is guaranteed that we don't have unmatched tags
2919            // If we here, then out deserializer has a bug
2920            DeEvent::End(e) => unreachable!("{:?}", e),
2921            DeEvent::Eof => Err(DeError::UnexpectedEof),
2922        }
2923    }
2924
2925    /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`]
2926    /// with the same deserializer.
2927    fn deserialize_newtype_struct<V>(
2928        self,
2929        _name: &'static str,
2930        visitor: V,
2931    ) -> Result<V::Value, DeError>
2932    where
2933        V: Visitor<'de>,
2934    {
2935        visitor.visit_newtype_struct(self)
2936    }
2937
2938    fn deserialize_enum<V>(
2939        self,
2940        _name: &'static str,
2941        _variants: &'static [&'static str],
2942        visitor: V,
2943    ) -> Result<V::Value, DeError>
2944    where
2945        V: Visitor<'de>,
2946    {
2947        visitor.visit_enum(var::EnumAccess::new(self))
2948    }
2949
2950    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, DeError>
2951    where
2952        V: Visitor<'de>,
2953    {
2954        visitor.visit_seq(self)
2955    }
2956
2957    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, DeError>
2958    where
2959        V: Visitor<'de>,
2960    {
2961        match self.peek()? {
2962            DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
2963            DeEvent::Eof => visitor.visit_none(),
2964            _ => visitor.visit_some(self),
2965        }
2966    }
2967
2968    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, DeError>
2969    where
2970        V: Visitor<'de>,
2971    {
2972        match self.peek()? {
2973            DeEvent::Text(_) => self.deserialize_str(visitor),
2974            _ => self.deserialize_map(visitor),
2975        }
2976    }
2977}
2978
2979/// An accessor to sequence elements forming a value for top-level sequence of XML
2980/// elements.
2981///
2982/// Technically, multiple top-level elements violates XML rule of only one top-level
2983/// element, but we consider this as several concatenated XML documents.
2984impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E>
2985where
2986    R: XmlRead<'de>,
2987    E: EntityResolver,
2988{
2989    type Error = DeError;
2990
2991    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
2992    where
2993        T: DeserializeSeed<'de>,
2994    {
2995        match self.peek()? {
2996            DeEvent::Eof => {
2997                // We need to consume event in order to self.is_empty() worked
2998                self.next()?;
2999                Ok(None)
3000            }
3001
3002            // Start(tag), End(tag), Text
3003            _ => seed.deserialize(&mut **self).map(Some),
3004        }
3005    }
3006}
3007
3008////////////////////////////////////////////////////////////////////////////////////////////////////
3009
3010/// Helper struct that contains a state for an algorithm of converting events
3011/// from raw events to semi-trimmed events that is independent from a way of
3012/// events reading.
3013struct StartTrimmer {
3014    /// If `true`, then leading whitespace will be removed from next returned
3015    /// [`Event::Text`]. This field is set to `true` after reading each event
3016    /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events
3017    /// read right after them does not trimmed.
3018    trim_start: bool,
3019}
3020
3021impl StartTrimmer {
3022    /// Converts raw reader's event into a payload event.
3023    /// Returns `None`, if event should be skipped.
3024    #[inline(always)]
3025    fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3026        let (event, trim_next_event) = match event {
3027            Event::DocType(e) => (PayloadEvent::DocType(e), true),
3028            Event::Start(e) => (PayloadEvent::Start(e), true),
3029            Event::End(e) => (PayloadEvent::End(e), true),
3030            Event::Eof => (PayloadEvent::Eof, true),
3031
3032            // Do not trim next text event after Text or CDATA event
3033            Event::CData(e) => (PayloadEvent::CData(e), false),
3034            Event::Text(mut e) => {
3035                // If event is empty after trimming, skip it
3036                if self.trim_start && e.inplace_trim_start() {
3037                    return None;
3038                }
3039                (PayloadEvent::Text(e), false)
3040            }
3041
3042            _ => return None,
3043        };
3044        self.trim_start = trim_next_event;
3045        Some(event)
3046    }
3047}
3048
3049impl Default for StartTrimmer {
3050    #[inline]
3051    fn default() -> Self {
3052        Self { trim_start: true }
3053    }
3054}
3055
3056////////////////////////////////////////////////////////////////////////////////////////////////////
3057
3058/// Trait used by the deserializer for iterating over input. This is manually
3059/// "specialized" for iterating over `&[u8]`.
3060///
3061/// You do not need to implement this trait, it is needed to abstract from
3062/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
3063/// deserializer
3064pub trait XmlRead<'i> {
3065    /// Return an input-borrowing event.
3066    fn next(&mut self) -> Result<PayloadEvent<'i>, DeError>;
3067
3068    /// Skips until end element is found. Unlike `next()` it will not allocate
3069    /// when it cannot satisfy the lifetime.
3070    fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;
3071
3072    /// A copy of the reader's decoder used to decode strings.
3073    fn decoder(&self) -> Decoder;
3074}
3075
3076/// XML input source that reads from a std::io input stream.
3077///
3078/// You cannot create it, it is created automatically when you call
3079/// [`Deserializer::from_reader`]
3080pub struct IoReader<R: BufRead> {
3081    reader: Reader<R>,
3082    start_trimmer: StartTrimmer,
3083    buf: Vec<u8>,
3084}
3085
3086impl<R: BufRead> IoReader<R> {
3087    /// Returns the underlying XML reader.
3088    ///
3089    /// ```
3090    /// # use pretty_assertions::assert_eq;
3091    /// use serde::Deserialize;
3092    /// use std::io::Cursor;
3093    /// use quick_xml::de::Deserializer;
3094    /// use quick_xml::Reader;
3095    ///
3096    /// #[derive(Deserialize)]
3097    /// struct SomeStruct {
3098    ///     field1: String,
3099    ///     field2: String,
3100    /// }
3101    ///
3102    /// // Try to deserialize from broken XML
3103    /// let mut de = Deserializer::from_reader(Cursor::new(
3104    ///     "<SomeStruct><field1><field2></SomeStruct>"
3105    /// //   0                           ^= 28        ^= 41
3106    /// ));
3107    ///
3108    /// let err = SomeStruct::deserialize(&mut de);
3109    /// assert!(err.is_err());
3110    ///
3111    /// let reader: &Reader<Cursor<&str>> = de.get_ref().get_ref();
3112    ///
3113    /// assert_eq!(reader.error_position(), 28);
3114    /// assert_eq!(reader.buffer_position(), 41);
3115    /// ```
3116    pub const fn get_ref(&self) -> &Reader<R> {
3117        &self.reader
3118    }
3119}
3120
3121impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
3122    fn next(&mut self) -> Result<PayloadEvent<'static>, DeError> {
3123        loop {
3124            self.buf.clear();
3125
3126            let event = self.reader.read_event_into(&mut self.buf)?;
3127            if let Some(event) = self.start_trimmer.trim(event) {
3128                return Ok(event.into_owned());
3129            }
3130        }
3131    }
3132
3133    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3134        match self.reader.read_to_end_into(name, &mut self.buf) {
3135            Err(e) => Err(e.into()),
3136            Ok(_) => Ok(()),
3137        }
3138    }
3139
3140    fn decoder(&self) -> Decoder {
3141        self.reader.decoder()
3142    }
3143}
3144
3145/// XML input source that reads from a slice of bytes and can borrow from it.
3146///
3147/// You cannot create it, it is created automatically when you call
3148/// [`Deserializer::from_str`].
3149pub struct SliceReader<'de> {
3150    reader: Reader<&'de [u8]>,
3151    start_trimmer: StartTrimmer,
3152}
3153
3154impl<'de> SliceReader<'de> {
3155    /// Returns the underlying XML reader.
3156    ///
3157    /// ```
3158    /// # use pretty_assertions::assert_eq;
3159    /// use serde::Deserialize;
3160    /// use quick_xml::de::Deserializer;
3161    /// use quick_xml::Reader;
3162    ///
3163    /// #[derive(Deserialize)]
3164    /// struct SomeStruct {
3165    ///     field1: String,
3166    ///     field2: String,
3167    /// }
3168    ///
3169    /// // Try to deserialize from broken XML
3170    /// let mut de = Deserializer::from_str(
3171    ///     "<SomeStruct><field1><field2></SomeStruct>"
3172    /// //   0                           ^= 28        ^= 41
3173    /// );
3174    ///
3175    /// let err = SomeStruct::deserialize(&mut de);
3176    /// assert!(err.is_err());
3177    ///
3178    /// let reader: &Reader<&[u8]> = de.get_ref().get_ref();
3179    ///
3180    /// assert_eq!(reader.error_position(), 28);
3181    /// assert_eq!(reader.buffer_position(), 41);
3182    /// ```
3183    pub const fn get_ref(&self) -> &Reader<&'de [u8]> {
3184        &self.reader
3185    }
3186}
3187
3188impl<'de> XmlRead<'de> for SliceReader<'de> {
3189    fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
3190        loop {
3191            let event = self.reader.read_event()?;
3192            if let Some(event) = self.start_trimmer.trim(event) {
3193                return Ok(event);
3194            }
3195        }
3196    }
3197
3198    fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
3199        match self.reader.read_to_end(name) {
3200            Err(e) => Err(e.into()),
3201            Ok(_) => Ok(()),
3202        }
3203    }
3204
3205    fn decoder(&self) -> Decoder {
3206        self.reader.decoder()
3207    }
3208}
3209
3210#[cfg(test)]
3211mod tests {
3212    use super::*;
3213    use crate::errors::IllFormedError;
3214    use pretty_assertions::assert_eq;
3215
3216    fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> {
3217        dbg!(source);
3218        Deserializer::from_str(source)
3219    }
3220
3221    #[cfg(feature = "overlapped-lists")]
3222    mod skip {
3223        use super::*;
3224        use crate::de::DeEvent::*;
3225        use crate::events::BytesEnd;
3226        use pretty_assertions::assert_eq;
3227
3228        /// Checks that `peek()` and `read()` behaves correctly after `skip()`
3229        #[test]
3230        fn read_and_peek() {
3231            let mut de = make_de(
3232                r#"
3233                <root>
3234                    <inner>
3235                        text
3236                        <inner/>
3237                    </inner>
3238                    <next/>
3239                    <target/>
3240                </root>
3241                "#,
3242            );
3243
3244            // Initial conditions - both are empty
3245            assert_eq!(de.read, vec![]);
3246            assert_eq!(de.write, vec![]);
3247
3248            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3249            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner")));
3250
3251            // Mark that start_replay() should begin replay from this point
3252            let checkpoint = de.skip_checkpoint();
3253            assert_eq!(checkpoint, 0);
3254
3255            // Should skip first <inner> tree
3256            de.skip().unwrap();
3257            assert_eq!(de.read, vec![]);
3258            assert_eq!(
3259                de.write,
3260                vec![
3261                    Start(BytesStart::new("inner")),
3262                    Text("text".into()),
3263                    Start(BytesStart::new("inner")),
3264                    End(BytesEnd::new("inner")),
3265                    End(BytesEnd::new("inner")),
3266                ]
3267            );
3268
3269            // Consume <next/>. Now unconsumed XML looks like:
3270            //
3271            //   <inner>
3272            //     text
3273            //     <inner/>
3274            //   </inner>
3275            //   <target/>
3276            // </root>
3277            assert_eq!(de.next().unwrap(), Start(BytesStart::new("next")));
3278            assert_eq!(de.next().unwrap(), End(BytesEnd::new("next")));
3279
3280            // We finish writing. Next call to `next()` should start replay that messages:
3281            //
3282            //   <inner>
3283            //     text
3284            //     <inner/>
3285            //   </inner>
3286            //
3287            // and after that stream that messages:
3288            //
3289            //   <target/>
3290            // </root>
3291            de.start_replay(checkpoint);
3292            assert_eq!(
3293                de.read,
3294                vec![
3295                    Start(BytesStart::new("inner")),
3296                    Text("text".into()),
3297                    Start(BytesStart::new("inner")),
3298                    End(BytesEnd::new("inner")),
3299                    End(BytesEnd::new("inner")),
3300                ]
3301            );
3302            assert_eq!(de.write, vec![]);
3303            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3304
3305            // Mark that start_replay() should begin replay from this point
3306            let checkpoint = de.skip_checkpoint();
3307            assert_eq!(checkpoint, 0);
3308
3309            // Skip `$text` node and consume <inner/> after it
3310            de.skip().unwrap();
3311            assert_eq!(
3312                de.read,
3313                vec![
3314                    Start(BytesStart::new("inner")),
3315                    End(BytesEnd::new("inner")),
3316                    End(BytesEnd::new("inner")),
3317                ]
3318            );
3319            assert_eq!(
3320                de.write,
3321                vec![
3322                    // This comment here to keep the same formatting of both arrays
3323                    // otherwise rustfmt suggest one-line it
3324                    Text("text".into()),
3325                ]
3326            );
3327
3328            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3329            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3330
3331            // We finish writing. Next call to `next()` should start replay messages:
3332            //
3333            //     text
3334            //   </inner>
3335            //
3336            // and after that stream that messages:
3337            //
3338            //   <target/>
3339            // </root>
3340            de.start_replay(checkpoint);
3341            assert_eq!(
3342                de.read,
3343                vec![
3344                    // This comment here to keep the same formatting as others
3345                    // otherwise rustfmt suggest one-line it
3346                    Text("text".into()),
3347                    End(BytesEnd::new("inner")),
3348                ]
3349            );
3350            assert_eq!(de.write, vec![]);
3351            assert_eq!(de.next().unwrap(), Text("text".into()));
3352            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3353            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3354            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target")));
3355            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3356            assert_eq!(de.next().unwrap(), Eof);
3357        }
3358
3359        /// Checks that `read_to_end()` behaves correctly after `skip()`
3360        #[test]
3361        fn read_to_end() {
3362            let mut de = make_de(
3363                r#"
3364                <root>
3365                    <skip>
3366                        text
3367                        <skip/>
3368                    </skip>
3369                    <target>
3370                        <target/>
3371                    </target>
3372                </root>
3373                "#,
3374            );
3375
3376            // Initial conditions - both are empty
3377            assert_eq!(de.read, vec![]);
3378            assert_eq!(de.write, vec![]);
3379
3380            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3381
3382            // Mark that start_replay() should begin replay from this point
3383            let checkpoint = de.skip_checkpoint();
3384            assert_eq!(checkpoint, 0);
3385
3386            // Skip the <skip> tree
3387            de.skip().unwrap();
3388            assert_eq!(de.read, vec![]);
3389            assert_eq!(
3390                de.write,
3391                vec![
3392                    Start(BytesStart::new("skip")),
3393                    Text("text".into()),
3394                    Start(BytesStart::new("skip")),
3395                    End(BytesEnd::new("skip")),
3396                    End(BytesEnd::new("skip")),
3397                ]
3398            );
3399
3400            // Drop all events that represents <target> tree. Now unconsumed XML looks like:
3401            //
3402            //   <skip>
3403            //     text
3404            //     <skip/>
3405            //   </skip>
3406            // </root>
3407            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target")));
3408            de.read_to_end(QName(b"target")).unwrap();
3409            assert_eq!(de.read, vec![]);
3410            assert_eq!(
3411                de.write,
3412                vec![
3413                    Start(BytesStart::new("skip")),
3414                    Text("text".into()),
3415                    Start(BytesStart::new("skip")),
3416                    End(BytesEnd::new("skip")),
3417                    End(BytesEnd::new("skip")),
3418                ]
3419            );
3420
3421            // We finish writing. Next call to `next()` should start replay that messages:
3422            //
3423            //   <skip>
3424            //     text
3425            //     <skip/>
3426            //   </skip>
3427            //
3428            // and after that stream that messages:
3429            //
3430            // </root>
3431            de.start_replay(checkpoint);
3432            assert_eq!(
3433                de.read,
3434                vec![
3435                    Start(BytesStart::new("skip")),
3436                    Text("text".into()),
3437                    Start(BytesStart::new("skip")),
3438                    End(BytesEnd::new("skip")),
3439                    End(BytesEnd::new("skip")),
3440                ]
3441            );
3442            assert_eq!(de.write, vec![]);
3443
3444            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip")));
3445            de.read_to_end(QName(b"skip")).unwrap();
3446
3447            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3448            assert_eq!(de.next().unwrap(), Eof);
3449        }
3450
3451        /// Checks that replay replayes only part of events
3452        /// Test for https://github.com/tafia/quick-xml/issues/435
3453        #[test]
3454        fn partial_replay() {
3455            let mut de = make_de(
3456                r#"
3457                <root>
3458                    <skipped-1/>
3459                    <skipped-2/>
3460                    <inner>
3461                        <skipped-3/>
3462                        <skipped-4/>
3463                        <target-2/>
3464                    </inner>
3465                    <target-1/>
3466                </root>
3467                "#,
3468            );
3469
3470            // Initial conditions - both are empty
3471            assert_eq!(de.read, vec![]);
3472            assert_eq!(de.write, vec![]);
3473
3474            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3475
3476            // start_replay() should start replay from this point
3477            let checkpoint1 = de.skip_checkpoint();
3478            assert_eq!(checkpoint1, 0);
3479
3480            // Should skip first and second <skipped-N/> elements
3481            de.skip().unwrap(); // skipped-1
3482            de.skip().unwrap(); // skipped-2
3483            assert_eq!(de.read, vec![]);
3484            assert_eq!(
3485                de.write,
3486                vec![
3487                    Start(BytesStart::new("skipped-1")),
3488                    End(BytesEnd::new("skipped-1")),
3489                    Start(BytesStart::new("skipped-2")),
3490                    End(BytesEnd::new("skipped-2")),
3491                ]
3492            );
3493
3494            ////////////////////////////////////////////////////////////////////////////////////////
3495
3496            assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner")));
3497            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3")));
3498            assert_eq!(
3499                de.read,
3500                vec![
3501                    // This comment here to keep the same formatting of both arrays
3502                    // otherwise rustfmt suggest one-line it
3503                    Start(BytesStart::new("skipped-3")),
3504                ]
3505            );
3506            assert_eq!(
3507                de.write,
3508                vec![
3509                    Start(BytesStart::new("skipped-1")),
3510                    End(BytesEnd::new("skipped-1")),
3511                    Start(BytesStart::new("skipped-2")),
3512                    End(BytesEnd::new("skipped-2")),
3513                ]
3514            );
3515
3516            // start_replay() should start replay from this point
3517            let checkpoint2 = de.skip_checkpoint();
3518            assert_eq!(checkpoint2, 4);
3519
3520            // Should skip third and forth <skipped-N/> elements
3521            de.skip().unwrap(); // skipped-3
3522            de.skip().unwrap(); // skipped-4
3523            assert_eq!(de.read, vec![]);
3524            assert_eq!(
3525                de.write,
3526                vec![
3527                    // checkpoint 1
3528                    Start(BytesStart::new("skipped-1")),
3529                    End(BytesEnd::new("skipped-1")),
3530                    Start(BytesStart::new("skipped-2")),
3531                    End(BytesEnd::new("skipped-2")),
3532                    // checkpoint 2
3533                    Start(BytesStart::new("skipped-3")),
3534                    End(BytesEnd::new("skipped-3")),
3535                    Start(BytesStart::new("skipped-4")),
3536                    End(BytesEnd::new("skipped-4")),
3537                ]
3538            );
3539            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2")));
3540            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2")));
3541            assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner")));
3542            assert_eq!(
3543                de.read,
3544                vec![
3545                    // This comment here to keep the same formatting of both arrays
3546                    // otherwise rustfmt suggest one-line it
3547                    End(BytesEnd::new("inner")),
3548                ]
3549            );
3550            assert_eq!(
3551                de.write,
3552                vec![
3553                    // checkpoint 1
3554                    Start(BytesStart::new("skipped-1")),
3555                    End(BytesEnd::new("skipped-1")),
3556                    Start(BytesStart::new("skipped-2")),
3557                    End(BytesEnd::new("skipped-2")),
3558                    // checkpoint 2
3559                    Start(BytesStart::new("skipped-3")),
3560                    End(BytesEnd::new("skipped-3")),
3561                    Start(BytesStart::new("skipped-4")),
3562                    End(BytesEnd::new("skipped-4")),
3563                ]
3564            );
3565
3566            // Start replay events from checkpoint 2
3567            de.start_replay(checkpoint2);
3568            assert_eq!(
3569                de.read,
3570                vec![
3571                    Start(BytesStart::new("skipped-3")),
3572                    End(BytesEnd::new("skipped-3")),
3573                    Start(BytesStart::new("skipped-4")),
3574                    End(BytesEnd::new("skipped-4")),
3575                    End(BytesEnd::new("inner")),
3576                ]
3577            );
3578            assert_eq!(
3579                de.write,
3580                vec![
3581                    Start(BytesStart::new("skipped-1")),
3582                    End(BytesEnd::new("skipped-1")),
3583                    Start(BytesStart::new("skipped-2")),
3584                    End(BytesEnd::new("skipped-2")),
3585                ]
3586            );
3587
3588            // Replayed events
3589            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3")));
3590            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3")));
3591            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4")));
3592            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4")));
3593
3594            assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner")));
3595            assert_eq!(de.read, vec![]);
3596            assert_eq!(
3597                de.write,
3598                vec![
3599                    Start(BytesStart::new("skipped-1")),
3600                    End(BytesEnd::new("skipped-1")),
3601                    Start(BytesStart::new("skipped-2")),
3602                    End(BytesEnd::new("skipped-2")),
3603                ]
3604            );
3605
3606            ////////////////////////////////////////////////////////////////////////////////////////
3607
3608            // New events
3609            assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1")));
3610            assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1")));
3611
3612            assert_eq!(de.read, vec![]);
3613            assert_eq!(
3614                de.write,
3615                vec![
3616                    Start(BytesStart::new("skipped-1")),
3617                    End(BytesEnd::new("skipped-1")),
3618                    Start(BytesStart::new("skipped-2")),
3619                    End(BytesEnd::new("skipped-2")),
3620                ]
3621            );
3622
3623            // Start replay events from checkpoint 1
3624            de.start_replay(checkpoint1);
3625            assert_eq!(
3626                de.read,
3627                vec![
3628                    Start(BytesStart::new("skipped-1")),
3629                    End(BytesEnd::new("skipped-1")),
3630                    Start(BytesStart::new("skipped-2")),
3631                    End(BytesEnd::new("skipped-2")),
3632                ]
3633            );
3634            assert_eq!(de.write, vec![]);
3635
3636            // Replayed events
3637            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1")));
3638            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1")));
3639            assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2")));
3640            assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2")));
3641
3642            assert_eq!(de.read, vec![]);
3643            assert_eq!(de.write, vec![]);
3644
3645            // New events
3646            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3647            assert_eq!(de.next().unwrap(), Eof);
3648        }
3649
3650        /// Checks that limiting buffer size works correctly
3651        #[test]
3652        fn limit() {
3653            use serde::Deserialize;
3654
3655            #[derive(Debug, Deserialize)]
3656            #[allow(unused)]
3657            struct List {
3658                item: Vec<()>,
3659            }
3660
3661            let mut de = make_de(
3662                r#"
3663                <any-name>
3664                    <item/>
3665                    <another-item>
3666                        <some-element>with text</some-element>
3667                        <yet-another-element/>
3668                    </another-item>
3669                    <item/>
3670                    <item/>
3671                </any-name>
3672                "#,
3673            );
3674            de.event_buffer_size(NonZeroUsize::new(3));
3675
3676            match List::deserialize(&mut de) {
3677                Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
3678                e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e),
3679            }
3680        }
3681
3682        /// Without handling Eof in `skip` this test failed with memory allocation
3683        #[test]
3684        fn invalid_xml() {
3685            use crate::de::DeEvent::*;
3686
3687            let mut de = make_de("<root>");
3688
3689            // Cache all events
3690            let checkpoint = de.skip_checkpoint();
3691            de.skip().unwrap();
3692            de.start_replay(checkpoint);
3693            assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
3694        }
3695    }
3696
3697    mod read_to_end {
3698        use super::*;
3699        use crate::de::DeEvent::*;
3700        use pretty_assertions::assert_eq;
3701
3702        #[test]
3703        fn complex() {
3704            let mut de = make_de(
3705                r#"
3706                <root>
3707                    <tag a="1"><tag>text</tag>content</tag>
3708                    <tag a="2"><![CDATA[cdata content]]></tag>
3709                    <self-closed/>
3710                </root>
3711                "#,
3712            );
3713
3714            assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
3715
3716            assert_eq!(
3717                de.next().unwrap(),
3718                Start(BytesStart::from_content(r#"tag a="1""#, 3))
3719            );
3720            assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
3721
3722            assert_eq!(
3723                de.next().unwrap(),
3724                Start(BytesStart::from_content(r#"tag a="2""#, 3))
3725            );
3726            assert_eq!(de.next().unwrap(), Text("cdata content".into()));
3727            assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
3728
3729            assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
3730            assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
3731
3732            assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
3733            assert_eq!(de.next().unwrap(), Eof);
3734        }
3735
3736        #[test]
3737        fn invalid_xml1() {
3738            let mut de = make_de("<tag><tag></tag>");
3739
3740            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3741            assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
3742
3743            match de.read_to_end(QName(b"tag")) {
3744                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3745                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3746                }
3747                x => panic!(
3748                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3749                    x
3750                ),
3751            }
3752            assert_eq!(de.next().unwrap(), Eof);
3753        }
3754
3755        #[test]
3756        fn invalid_xml2() {
3757            let mut de = make_de("<tag><![CDATA[]]><tag></tag>");
3758
3759            assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
3760            assert_eq!(de.peek().unwrap(), &Text("".into()));
3761
3762            match de.read_to_end(QName(b"tag")) {
3763                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3764                    assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
3765                }
3766                x => panic!(
3767                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3768                    x
3769                ),
3770            }
3771            assert_eq!(de.next().unwrap(), Eof);
3772        }
3773    }
3774
3775    #[test]
3776    fn borrowing_reader_parity() {
3777        let s = r#"
3778            <item name="hello" source="world.rs">Some text</item>
3779            <item2/>
3780            <item3 value="world" />
3781        "#;
3782
3783        let mut reader1 = IoReader {
3784            reader: Reader::from_reader(s.as_bytes()),
3785            start_trimmer: StartTrimmer::default(),
3786            buf: Vec::new(),
3787        };
3788        let mut reader2 = SliceReader {
3789            reader: Reader::from_str(s),
3790            start_trimmer: StartTrimmer::default(),
3791        };
3792
3793        loop {
3794            let event1 = reader1.next().unwrap();
3795            let event2 = reader2.next().unwrap();
3796
3797            if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) {
3798                break;
3799            }
3800
3801            assert_eq!(event1, event2);
3802        }
3803    }
3804
3805    #[test]
3806    fn borrowing_reader_events() {
3807        let s = r#"
3808            <item name="hello" source="world.rs">Some text</item>
3809            <item2></item2>
3810            <item3/>
3811            <item4 value="world" />
3812        "#;
3813
3814        let mut reader = SliceReader {
3815            reader: Reader::from_str(s),
3816            start_trimmer: StartTrimmer::default(),
3817        };
3818
3819        let config = reader.reader.config_mut();
3820        config.expand_empty_elements = true;
3821
3822        let mut events = Vec::new();
3823
3824        loop {
3825            let event = reader.next().unwrap();
3826            if let PayloadEvent::Eof = event {
3827                break;
3828            }
3829            events.push(event);
3830        }
3831
3832        use crate::de::PayloadEvent::*;
3833
3834        assert_eq!(
3835            events,
3836            vec![
3837                Start(BytesStart::from_content(
3838                    r#"item name="hello" source="world.rs""#,
3839                    4
3840                )),
3841                Text(BytesText::from_escaped("Some text")),
3842                End(BytesEnd::new("item")),
3843                Start(BytesStart::from_content("item2", 5)),
3844                End(BytesEnd::new("item2")),
3845                Start(BytesStart::from_content("item3", 5)),
3846                End(BytesEnd::new("item3")),
3847                Start(BytesStart::from_content(r#"item4 value="world" "#, 5)),
3848                End(BytesEnd::new("item4")),
3849            ]
3850        )
3851    }
3852
3853    /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
3854    /// because parser reports error early
3855    #[test]
3856    fn read_string() {
3857        match from_str::<String>(r#"</root>"#) {
3858            Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
3859                assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
3860            }
3861            x => panic!(
3862                "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
3863                x
3864            ),
3865        }
3866
3867        let s: String = from_str(r#"<root></root>"#).unwrap();
3868        assert_eq!(s, "");
3869
3870        match from_str::<String>(r#"<root></other>"#) {
3871            Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
3872                cause,
3873                IllFormedError::MismatchedEndTag {
3874                    expected: "root".into(),
3875                    found: "other".into(),
3876                }
3877            ),
3878            x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x),
3879        }
3880    }
3881
3882    /// Tests for https://github.com/tafia/quick-xml/issues/474.
3883    ///
3884    /// That tests ensures that comments and processed instructions is ignored
3885    /// and can split one logical string in pieces.
3886    mod merge_text {
3887        use super::*;
3888        use pretty_assertions::assert_eq;
3889
3890        #[test]
3891        fn text() {
3892            let mut de = make_de("text");
3893            assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
3894        }
3895
3896        #[test]
3897        fn cdata() {
3898            let mut de = make_de("<![CDATA[cdata]]>");
3899            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into()));
3900        }
3901
3902        #[test]
3903        fn text_and_cdata() {
3904            let mut de = make_de("text and <![CDATA[cdata]]>");
3905            assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into()));
3906        }
3907
3908        #[test]
3909        fn text_and_empty_cdata() {
3910            let mut de = make_de("text and <![CDATA[]]>");
3911            assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into()));
3912        }
3913
3914        #[test]
3915        fn cdata_and_text() {
3916            let mut de = make_de("<![CDATA[cdata]]> and text");
3917            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into()));
3918        }
3919
3920        #[test]
3921        fn empty_cdata_and_text() {
3922            let mut de = make_de("<![CDATA[]]> and text");
3923            assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into()));
3924        }
3925
3926        #[test]
3927        fn cdata_and_cdata() {
3928            let mut de = make_de(
3929                "\
3930                    <![CDATA[cdata]]]]>\
3931                    <![CDATA[>cdata]]>\
3932                ",
3933            );
3934            assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3935        }
3936
3937        mod comment_between {
3938            use super::*;
3939            use pretty_assertions::assert_eq;
3940
3941            #[test]
3942            fn text() {
3943                let mut de = make_de(
3944                    "\
3945                        text \
3946                        <!--comment 1--><!--comment 2--> \
3947                        text\
3948                    ",
3949                );
3950                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
3951            }
3952
3953            #[test]
3954            fn cdata() {
3955                let mut de = make_de(
3956                    "\
3957                        <![CDATA[cdata]]]]>\
3958                        <!--comment 1--><!--comment 2-->\
3959                        <![CDATA[>cdata]]>\
3960                    ",
3961                );
3962                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
3963            }
3964
3965            #[test]
3966            fn text_and_cdata() {
3967                let mut de = make_de(
3968                    "\
3969                        text \
3970                        <!--comment 1--><!--comment 2-->\
3971                        <![CDATA[ cdata]]>\
3972                    ",
3973                );
3974                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
3975            }
3976
3977            #[test]
3978            fn text_and_empty_cdata() {
3979                let mut de = make_de(
3980                    "\
3981                        text \
3982                        <!--comment 1--><!--comment 2-->\
3983                        <![CDATA[]]>\
3984                    ",
3985                );
3986                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
3987            }
3988
3989            #[test]
3990            fn cdata_and_text() {
3991                let mut de = make_de(
3992                    "\
3993                        <![CDATA[cdata ]]>\
3994                        <!--comment 1--><!--comment 2--> \
3995                        text \
3996                    ",
3997                );
3998                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
3999            }
4000
4001            #[test]
4002            fn empty_cdata_and_text() {
4003                let mut de = make_de(
4004                    "\
4005                        <![CDATA[]]>\
4006                        <!--comment 1--><!--comment 2--> \
4007                        text \
4008                    ",
4009                );
4010                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4011            }
4012
4013            #[test]
4014            fn cdata_and_cdata() {
4015                let mut de = make_de(
4016                    "\
4017                        <![CDATA[cdata]]]>\
4018                        <!--comment 1--><!--comment 2-->\
4019                        <![CDATA[]>cdata]]>\
4020                    ",
4021                );
4022                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4023            }
4024        }
4025
4026        mod pi_between {
4027            use super::*;
4028            use pretty_assertions::assert_eq;
4029
4030            #[test]
4031            fn text() {
4032                let mut de = make_de(
4033                    "\
4034                        text \
4035                        <?pi 1?><?pi 2?> \
4036                        text\
4037                    ",
4038                );
4039                assert_eq!(de.next().unwrap(), DeEvent::Text("text  text".into()));
4040            }
4041
4042            #[test]
4043            fn cdata() {
4044                let mut de = make_de(
4045                    "\
4046                        <![CDATA[cdata]]]]>\
4047                        <?pi 1?><?pi 2?>\
4048                        <![CDATA[>cdata]]>\
4049                    ",
4050                );
4051                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4052            }
4053
4054            #[test]
4055            fn text_and_cdata() {
4056                let mut de = make_de(
4057                    "\
4058                        text \
4059                        <?pi 1?><?pi 2?>\
4060                        <![CDATA[ cdata]]>\
4061                    ",
4062                );
4063                assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata".into()));
4064            }
4065
4066            #[test]
4067            fn text_and_empty_cdata() {
4068                let mut de = make_de(
4069                    "\
4070                        text \
4071                        <?pi 1?><?pi 2?>\
4072                        <![CDATA[]]>\
4073                    ",
4074                );
4075                assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into()));
4076            }
4077
4078            #[test]
4079            fn cdata_and_text() {
4080                let mut de = make_de(
4081                    "\
4082                        <![CDATA[cdata ]]>\
4083                        <?pi 1?><?pi 2?> \
4084                        text \
4085                    ",
4086                );
4087                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata  text".into()));
4088            }
4089
4090            #[test]
4091            fn empty_cdata_and_text() {
4092                let mut de = make_de(
4093                    "\
4094                        <![CDATA[]]>\
4095                        <?pi 1?><?pi 2?> \
4096                        text \
4097                    ",
4098                );
4099                assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into()));
4100            }
4101
4102            #[test]
4103            fn cdata_and_cdata() {
4104                let mut de = make_de(
4105                    "\
4106                        <![CDATA[cdata]]]>\
4107                        <?pi 1?><?pi 2?>\
4108                        <![CDATA[]>cdata]]>\
4109                    ",
4110                );
4111                assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into()));
4112            }
4113        }
4114    }
4115
4116    /// Tests for https://github.com/tafia/quick-xml/issues/474.
4117    ///
4118    /// This tests ensures that any combination of payload data is processed
4119    /// as expected.
4120    mod triples {
4121        use super::*;
4122        use pretty_assertions::assert_eq;
4123
4124        mod start {
4125            use super::*;
4126
4127            /// <tag1><tag2>...
4128            mod start {
4129                use super::*;
4130                use pretty_assertions::assert_eq;
4131
4132                #[test]
4133                fn start() {
4134                    let mut de = make_de("<tag1><tag2><tag3>");
4135                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4136                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4137                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3")));
4138                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4139                }
4140
4141                /// Not matching end tag will result to error
4142                #[test]
4143                fn end() {
4144                    let mut de = make_de("<tag1><tag2></tag2>");
4145                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4146                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4147                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2")));
4148                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4149                }
4150
4151                #[test]
4152                fn text() {
4153                    let mut de = make_de("<tag1><tag2> text ");
4154                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4155                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4156                    // Text is trimmed from both sides
4157                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4158                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4159                }
4160
4161                #[test]
4162                fn cdata() {
4163                    let mut de = make_de("<tag1><tag2><![CDATA[ cdata ]]>");
4164                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4165                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4166                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4167                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4168                }
4169
4170                #[test]
4171                fn eof() {
4172                    let mut de = make_de("<tag1><tag2>");
4173                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4174                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4175                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4176                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4177                }
4178            }
4179
4180            /// <tag></tag>...
4181            mod end {
4182                use super::*;
4183                use pretty_assertions::assert_eq;
4184
4185                #[test]
4186                fn start() {
4187                    let mut de = make_de("<tag></tag><tag2>");
4188                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4189                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4190                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4191                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4192                }
4193
4194                #[test]
4195                fn end() {
4196                    let mut de = make_de("<tag></tag></tag2>");
4197                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4198                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4199                    match de.next() {
4200                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4201                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
4202                        }
4203                        x => panic!(
4204                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4205                            x
4206                        ),
4207                    }
4208                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4209                }
4210
4211                #[test]
4212                fn text() {
4213                    let mut de = make_de("<tag></tag> text ");
4214                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4215                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4216                    // Text is trimmed from both sides
4217                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4218                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4219                }
4220
4221                #[test]
4222                fn cdata() {
4223                    let mut de = make_de("<tag></tag><![CDATA[ cdata ]]>");
4224                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4225                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4226                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4227                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4228                }
4229
4230                #[test]
4231                fn eof() {
4232                    let mut de = make_de("<tag></tag>");
4233                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4234                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4235                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4236                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4237                }
4238            }
4239
4240            /// <tag> text ...
4241            mod text {
4242                use super::*;
4243                use pretty_assertions::assert_eq;
4244
4245                #[test]
4246                fn start() {
4247                    let mut de = make_de("<tag> text <tag2>");
4248                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4249                    // Text is trimmed from both sides
4250                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4251                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4252                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4253                }
4254
4255                #[test]
4256                fn end() {
4257                    let mut de = make_de("<tag> text </tag>");
4258                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4259                    // Text is trimmed from both sides
4260                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4261                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4262                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4263                }
4264
4265                // start::text::text has no difference from start::text
4266
4267                #[test]
4268                fn cdata() {
4269                    let mut de = make_de("<tag> text <![CDATA[ cdata ]]>");
4270                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4271                    // Text is trimmed from the start
4272                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4273                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4274                }
4275
4276                #[test]
4277                fn eof() {
4278                    let mut de = make_de("<tag> text ");
4279                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4280                    // Text is trimmed from both sides
4281                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4282                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4283                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4284                }
4285            }
4286
4287            /// <tag><![CDATA[ cdata ]]>...
4288            mod cdata {
4289                use super::*;
4290                use pretty_assertions::assert_eq;
4291
4292                #[test]
4293                fn start() {
4294                    let mut de = make_de("<tag><![CDATA[ cdata ]]><tag2>");
4295                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4296                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4297                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4298                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4299                }
4300
4301                #[test]
4302                fn end() {
4303                    let mut de = make_de("<tag><![CDATA[ cdata ]]></tag>");
4304                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4305                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4306                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4307                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4308                }
4309
4310                #[test]
4311                fn text() {
4312                    let mut de = make_de("<tag><![CDATA[ cdata ]]> text ");
4313                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4314                    // Text is trimmed from the end
4315                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4316                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4317                }
4318
4319                #[test]
4320                fn cdata() {
4321                    let mut de = make_de("<tag><![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4322                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4323                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4324                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4325                }
4326
4327                #[test]
4328                fn eof() {
4329                    let mut de = make_de("<tag><![CDATA[ cdata ]]>");
4330                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4331                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4332                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4333                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4334                }
4335            }
4336        }
4337
4338        /// Start from End event will always generate an error
4339        #[test]
4340        fn end() {
4341            let mut de = make_de("</tag>");
4342            match de.next() {
4343                Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4344                    assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4345                }
4346                x => panic!(
4347                    "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4348                    x
4349                ),
4350            }
4351            assert_eq!(de.next().unwrap(), DeEvent::Eof);
4352        }
4353
4354        mod text {
4355            use super::*;
4356            use pretty_assertions::assert_eq;
4357
4358            mod start {
4359                use super::*;
4360                use pretty_assertions::assert_eq;
4361
4362                #[test]
4363                fn start() {
4364                    let mut de = make_de(" text <tag1><tag2>");
4365                    // Text is trimmed from both sides
4366                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4367                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4368                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4369                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4370                }
4371
4372                /// Not matching end tag will result in error
4373                #[test]
4374                fn end() {
4375                    let mut de = make_de(" text <tag></tag>");
4376                    // Text is trimmed from both sides
4377                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4378                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4379                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4380                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4381                }
4382
4383                #[test]
4384                fn text() {
4385                    let mut de = make_de(" text <tag> text2 ");
4386                    // Text is trimmed from both sides
4387                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4388                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4389                    // Text is trimmed from both sides
4390                    assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into()));
4391                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4392                }
4393
4394                #[test]
4395                fn cdata() {
4396                    let mut de = make_de(" text <tag><![CDATA[ cdata ]]>");
4397                    // Text is trimmed from both sides
4398                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4399                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4400                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4401                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4402                }
4403
4404                #[test]
4405                fn eof() {
4406                    // Text is trimmed from both sides
4407                    let mut de = make_de(" text <tag>");
4408                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4409                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4410                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4411                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4412                }
4413            }
4414
4415            /// End event without corresponding start event will always generate an error
4416            #[test]
4417            fn end() {
4418                let mut de = make_de(" text </tag>");
4419                // Text is trimmed from both sides
4420                assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4421                match de.next() {
4422                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4423                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4424                    }
4425                    x => panic!(
4426                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4427                        x
4428                    ),
4429                }
4430                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4431            }
4432
4433            // text::text::something is equivalent to text::something
4434
4435            mod cdata {
4436                use super::*;
4437                use pretty_assertions::assert_eq;
4438
4439                #[test]
4440                fn start() {
4441                    let mut de = make_de(" text <![CDATA[ cdata ]]><tag>");
4442                    // Text is trimmed from the start
4443                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4444                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4445                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4446                }
4447
4448                #[test]
4449                fn end() {
4450                    let mut de = make_de(" text <![CDATA[ cdata ]]></tag>");
4451                    // Text is trimmed from the start
4452                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4453                    match de.next() {
4454                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4455                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4456                        }
4457                        x => panic!(
4458                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4459                            x
4460                        ),
4461                    }
4462                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4463                }
4464
4465                #[test]
4466                fn text() {
4467                    let mut de = make_de(" text <![CDATA[ cdata ]]> text2 ");
4468                    // Text is trimmed from the start and from the end
4469                    assert_eq!(
4470                        de.next().unwrap(),
4471                        DeEvent::Text("text  cdata  text2".into())
4472                    );
4473                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4474                }
4475
4476                #[test]
4477                fn cdata() {
4478                    let mut de = make_de(" text <![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4479                    // Text is trimmed from the start
4480                    assert_eq!(
4481                        de.next().unwrap(),
4482                        DeEvent::Text("text  cdata  cdata2 ".into())
4483                    );
4484                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4485                }
4486
4487                #[test]
4488                fn eof() {
4489                    let mut de = make_de(" text <![CDATA[ cdata ]]>");
4490                    // Text is trimmed from the start
4491                    assert_eq!(de.next().unwrap(), DeEvent::Text("text  cdata ".into()));
4492                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4493                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4494                }
4495            }
4496        }
4497
4498        mod cdata {
4499            use super::*;
4500            use pretty_assertions::assert_eq;
4501
4502            mod start {
4503                use super::*;
4504                use pretty_assertions::assert_eq;
4505
4506                #[test]
4507                fn start() {
4508                    let mut de = make_de("<![CDATA[ cdata ]]><tag1><tag2>");
4509                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4510                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
4511                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
4512                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4513                }
4514
4515                /// Not matching end tag will result in error
4516                #[test]
4517                fn end() {
4518                    let mut de = make_de("<![CDATA[ cdata ]]><tag></tag>");
4519                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4520                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4521                    assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
4522                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4523                }
4524
4525                #[test]
4526                fn text() {
4527                    let mut de = make_de("<![CDATA[ cdata ]]><tag> text ");
4528                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4529                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4530                    // Text is trimmed from both sides
4531                    assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4532                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4533                }
4534
4535                #[test]
4536                fn cdata() {
4537                    let mut de = make_de("<![CDATA[ cdata ]]><tag><![CDATA[ cdata2 ]]>");
4538                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4539                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4540                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into()));
4541                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4542                }
4543
4544                #[test]
4545                fn eof() {
4546                    let mut de = make_de("<![CDATA[ cdata ]]><tag>");
4547                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4548                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4549                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4550                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4551                }
4552            }
4553
4554            /// End event without corresponding start event will always generate an error
4555            #[test]
4556            fn end() {
4557                let mut de = make_de("<![CDATA[ cdata ]]></tag>");
4558                assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
4559                match de.next() {
4560                    Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4561                        assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4562                    }
4563                    x => panic!(
4564                        "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4565                        x
4566                    ),
4567                }
4568                assert_eq!(de.next().unwrap(), DeEvent::Eof);
4569            }
4570
4571            mod text {
4572                use super::*;
4573                use pretty_assertions::assert_eq;
4574
4575                #[test]
4576                fn start() {
4577                    let mut de = make_de("<![CDATA[ cdata ]]> text <tag>");
4578                    // Text is trimmed from the end
4579                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4580                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4581                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4582                }
4583
4584                #[test]
4585                fn end() {
4586                    let mut de = make_de("<![CDATA[ cdata ]]> text </tag>");
4587                    // Text is trimmed from the end
4588                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4589                    match de.next() {
4590                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4591                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4592                        }
4593                        x => panic!(
4594                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4595                            x
4596                        ),
4597                    }
4598                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4599                }
4600
4601                // cdata::text::text is equivalent to cdata::text
4602
4603                #[test]
4604                fn cdata() {
4605                    let mut de = make_de("<![CDATA[ cdata ]]> text <![CDATA[ cdata2 ]]>");
4606                    assert_eq!(
4607                        de.next().unwrap(),
4608                        DeEvent::Text(" cdata  text  cdata2 ".into())
4609                    );
4610                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4611                }
4612
4613                #[test]
4614                fn eof() {
4615                    let mut de = make_de("<![CDATA[ cdata ]]> text ");
4616                    // Text is trimmed from the end
4617                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  text".into()));
4618                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4619                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4620                }
4621            }
4622
4623            mod cdata {
4624                use super::*;
4625                use pretty_assertions::assert_eq;
4626
4627                #[test]
4628                fn start() {
4629                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><tag>");
4630                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4631                    assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag")));
4632                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4633                }
4634
4635                #[test]
4636                fn end() {
4637                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]></tag>");
4638                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4639                    match de.next() {
4640                        Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
4641                            assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
4642                        }
4643                        x => panic!(
4644                            "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
4645                            x
4646                        ),
4647                    }
4648                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4649                }
4650
4651                #[test]
4652                fn text() {
4653                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]> text ");
4654                    // Text is trimmed from the end
4655                    assert_eq!(
4656                        de.next().unwrap(),
4657                        DeEvent::Text(" cdata  cdata2  text".into())
4658                    );
4659                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4660                }
4661
4662                #[test]
4663                fn cdata() {
4664                    let mut de =
4665                        make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]><![CDATA[ cdata3 ]]>");
4666                    assert_eq!(
4667                        de.next().unwrap(),
4668                        DeEvent::Text(" cdata  cdata2  cdata3 ".into())
4669                    );
4670                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4671                }
4672
4673                #[test]
4674                fn eof() {
4675                    let mut de = make_de("<![CDATA[ cdata ]]><![CDATA[ cdata2 ]]>");
4676                    assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata  cdata2 ".into()));
4677                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4678                    assert_eq!(de.next().unwrap(), DeEvent::Eof);
4679                }
4680            }
4681        }
4682    }
4683}