serde_qs/de/parse.rs
1use crate::utils::{replace_space, QS_ENCODE_SET};
2
3use super::*;
4
5use percent_encoding::percent_encode;
6use serde::de;
7
8use std::borrow::Cow;
9use std::iter::Iterator;
10use std::slice::Iter;
11use std::str;
12
13macro_rules! tu {
14 ($x:expr) => {
15 match $x {
16 Some(x) => *x,
17 None => return Err(de::Error::custom("query string ended before expected")),
18 }
19 };
20}
21
22impl<'a> Level<'a> {
23 /// If this `Level` value is indeed a map, then attempt to insert
24 /// `value` for key `key`.
25 /// Returns error if `self` is not a map, or already has an entry for that
26 /// key.
27 fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) {
28 if let Level::Nested(ref mut map) = *self {
29 match map.entry(key) {
30 Entry::Occupied(mut o) => {
31 let key = o.key();
32 let error = if key.contains('[') {
33 let newkey = percent_encode(key.as_bytes(), QS_ENCODE_SET)
34 .map(replace_space)
35 .collect::<String>();
36 format!("Multiple values for one key: \"{}\"\nInvalid field contains an encoded bracket -- did you mean to use non-strict mode?\n https://docs.rs/serde_qs/latest/serde_qs/#strict-vs-non-strict-modes", newkey)
37 } else {
38 format!("Multiple values for one key: \"{}\"", key)
39 };
40 // Throw away old result; map is now invalid anyway.
41 let _ = o.insert(Level::Invalid(error));
42 }
43 Entry::Vacant(vm) => {
44 // Map is empty, result is None
45 let _ = vm.insert(Level::Flat(value));
46 }
47 }
48 } else if let Level::Uninitialised = *self {
49 let mut map = BTreeMap::default();
50 let _ = map.insert(key, Level::Flat(value));
51 *self = Level::Nested(map);
52 } else {
53 *self = Level::Invalid(
54 "Attempted to insert map value into \
55 non-map structure"
56 .to_string(),
57 );
58 }
59 }
60
61 /// If this `Level` value is indeed a seq, then push a new value
62 fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) {
63 if let Level::OrderedSeq(ref mut map) = *self {
64 match map.entry(key) {
65 Entry::Occupied(mut o) => {
66 // Throw away old result; map is now invalid anyway.
67 let _ = o.insert(Level::Invalid("Multiple values for one key".to_string()));
68 }
69 Entry::Vacant(vm) => {
70 // Map is empty, result is None
71 let _ = vm.insert(Level::Flat(value));
72 }
73 }
74 } else if let Level::Uninitialised = *self {
75 // To reach here, self is either an OrderedSeq or nothing.
76 let mut map = BTreeMap::default();
77 let _ = map.insert(key, Level::Flat(value));
78 *self = Level::OrderedSeq(map);
79 } else {
80 *self = Level::Invalid(
81 "Attempted to insert seq value into \
82 non-seq structure"
83 .to_string(),
84 );
85 }
86 }
87
88 /// If this `Level` value is indeed a seq, then attempt to insert
89 /// `value` for key `key`.
90 /// Returns error if `self` is not a seq, or already has an entry for that
91 /// key.
92 fn insert_seq_value(&mut self, value: Cow<'a, str>) {
93 // Reached the end of the key string
94 if let Level::Sequence(ref mut seq) = *self {
95 seq.push(Level::Flat(value));
96 } else if let Level::Uninitialised = *self {
97 let seq = vec![Level::Flat(value)];
98 *self = Level::Sequence(seq);
99 } else {
100 *self = Level::Invalid(
101 "Attempted to insert seq value into \
102 non-seq structure"
103 .to_string(),
104 );
105 }
106 }
107}
108
109/// The `Parser` struct is a stateful querystring parser.
110/// It iterates over a slice of bytes, with a range to track the current
111/// start/end points of a value.
112/// The parser additionally supports peeking values, which allows them to be
113/// re-used (precisely once, unlike with `Peekable` from `std::iter`).
114pub struct Parser<'a> {
115 inner: &'a [u8],
116 iter: Iter<'a, u8>,
117 index: usize,
118 acc: (usize, usize),
119 peeked: Option<&'a u8>,
120 depth: usize, // stores the current depth, for use in bounded-depth parsing
121 strict: bool,
122 state: ParsingState,
123}
124
125/// The parsing logic varies slightly based on whether it is a key or a value
126/// (determines how encoded brackets are parse in non-strict mode)
127/// This tracks the state.
128enum ParsingState {
129 Init,
130 Key,
131 Value,
132}
133
134impl<'a> Iterator for Parser<'a> {
135 type Item = &'a u8;
136 #[inline]
137 fn next(&mut self) -> Option<Self::Item> {
138 let preparse_brackets = match self.state {
139 ParsingState::Value => false,
140 _ => !self.strict,
141 };
142 if preparse_brackets {
143 // in non-strict mode, we will happily decode any bracket
144 match self.peeked.take() {
145 Some(v) => Some(v),
146 None => {
147 self.index += 1;
148 self.acc.1 += 1;
149 match self.iter.next() {
150 Some(v) if v == &b'%' && self.iter.len() >= 2 => {
151 match &self.iter.as_slice()[..2] {
152 b"5B" => {
153 // skip the next two characters
154 let _ = self.iter.next();
155 let _ = self.iter.next();
156 self.index += 2;
157 Some(&b'[')
158 }
159 b"5D" => {
160 // skip the next two characters
161 let _ = self.iter.next();
162 let _ = self.iter.next();
163 self.index += 2;
164 Some(&b']')
165 }
166 _ => Some(v),
167 }
168 }
169 Some(v) => Some(v),
170 None => None,
171 }
172 }
173 }
174 } else {
175 match self.peeked.take() {
176 Some(v) => Some(v),
177 None => {
178 self.index += 1;
179 self.acc.1 += 1;
180 self.iter.next()
181 }
182 }
183 }
184 }
185}
186
187impl<'a> Parser<'a> {
188 #[inline]
189 fn peek(&mut self) -> Option<<Self as Iterator>::Item> {
190 if self.peeked.is_some() {
191 self.peeked
192 } else if let Some(x) = self.next() {
193 self.peeked = Some(x);
194 Some(x)
195 } else {
196 None
197 }
198 }
199}
200
201/// Replace b'+' with b' '
202/// Copied from [`form_urlencoded`](https://github.com/servo/rust-url/blob/380be29859adb859e861c2d765897c22ec878e01/src/form_urlencoded.rs#L125).
203fn replace_plus(input: &[u8]) -> Cow<[u8]> {
204 match input.iter().position(|&b| b == b'+') {
205 None => Cow::Borrowed(input),
206 Some(first_position) => {
207 let mut replaced = input.to_owned();
208 replaced[first_position] = b' ';
209 for byte in &mut replaced[first_position + 1..] {
210 if *byte == b'+' {
211 *byte = b' ';
212 }
213 }
214
215 Cow::Owned(replaced)
216 }
217 }
218}
219
220impl<'a> Parser<'a> {
221 pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self {
222 Parser {
223 inner: encoded,
224 iter: encoded.iter(),
225 acc: (0, 0),
226 index: 0,
227 peeked: None,
228 depth,
229 strict,
230 state: ParsingState::Init,
231 }
232 }
233
234 /// Resets the accumulator range by setting `(start, end)` to `(end, end)`.
235 fn clear_acc(&mut self) {
236 self.acc = (self.index, self.index);
237 }
238
239 /// Extracts a string from the internal byte slice from the range tracked by
240 /// the parser.
241 /// Avoids allocations when neither percent encoded, nor `'+'` values are
242 /// present.
243 fn collect_str(&mut self) -> Result<Cow<'a, str>> {
244 let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]);
245 let decoder = percent_encoding::percent_decode(&replaced);
246
247 let maybe_decoded = if self.strict {
248 decoder.decode_utf8()?
249 } else {
250 decoder.decode_utf8_lossy()
251 };
252
253 let ret: Result<Cow<'a, str>> = match maybe_decoded {
254 Cow::Borrowed(_) => {
255 match replaced {
256 Cow::Borrowed(_) => {
257 // In this case, neither method made replacements, so we
258 // reuse the original bytes
259 let res = str::from_utf8(&self.inner[self.acc.0..self.acc.1 - 1])?;
260 Ok(Cow::Borrowed(res))
261 }
262 Cow::Owned(owned) => {
263 let res = String::from_utf8(owned)?;
264 Ok(Cow::Owned(res))
265 }
266 }
267 }
268 Cow::Owned(owned) => Ok(Cow::Owned(owned)),
269 };
270 self.clear_acc();
271 ret.map_err(Error::from)
272 }
273
274 /// In some ways the main way to use a `Parser`, this runs the parsing step
275 /// and outputs a simple `Deserializer` over the parsed map.
276 pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
277 let map = BTreeMap::default();
278 let mut root = Level::Nested(map);
279
280 // Parses all top level nodes into the `root` map.
281 while self.parse(&mut root)? {}
282 let iter = match root {
283 Level::Nested(map) => map.into_iter(),
284 _ => BTreeMap::default().into_iter(),
285 };
286 Ok(QsDeserializer { iter, value: None })
287 }
288
289 /// This is the top level parsing function. It checks the first character to
290 /// decide the type of key (nested, sequence, etc.) and to call the
291 /// approprate parsing function.
292 ///
293 /// Returns `Ok(false)` when there is no more string to parse.
294 fn parse(&mut self, node: &mut Level<'a>) -> Result<bool> {
295 // First character determines parsing type
296 if self.depth == 0 {
297 // Hit the maximum depth level, so parse everything as a key
298 let key = self.parse_key(b'=', false)?;
299 self.parse_map_value(key, node)?;
300 return Ok(true);
301 }
302 match self.next() {
303 Some(x) => {
304 match *x {
305 b'[' => {
306 loop {
307 self.clear_acc();
308 // Only peek at the next value to determine the key type.
309 match tu!(self.peek()) {
310 // key is of the form "[..=", not really allowed.
311 b'[' => {
312 // If we're in strict mode, error, otherwise just ignore it.
313 if self.strict {
314 return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index));
315 } else {
316 let _ = self.next();
317 }
318 }
319 // key is simply "[]", so treat as a seq.
320 b']' => {
321 // throw away the bracket
322 let _ = self.next();
323 self.clear_acc();
324 self.parse_seq_value(node)?;
325 return Ok(true);
326 }
327 // First character is an integer, attempt to parse it as an integer key
328 b'0'..=b'9' => {
329 let key = self.parse_key(b']', true)?;
330 let key = key.parse().map_err(Error::from)?;
331 self.parse_ord_seq_value(key, node)?;
332 return Ok(true);
333 }
334 // Key is "[a..=" so parse up to the closing "]"
335 0x20..=0x2f | 0x3a..=0x5a | 0x5c | 0x5e..=0x7e => {
336 let key = self.parse_key(b']', true)?;
337 self.parse_map_value(key, node)?;
338 return Ok(true);
339 }
340 c => {
341 if self.strict {
342 return Err(super::Error::parse_err(
343 format!(
344 "unexpected character: {}",
345 String::from_utf8_lossy(&[c])
346 ),
347 self.index,
348 ));
349 } else {
350 let _ = self.next();
351 }
352 }
353 }
354 }
355 }
356 // Skip empty byte sequences (e.g. leading `&`, trailing `&`, `&&`, ...)
357 b'&' => {
358 self.clear_acc();
359 Ok(true)
360 }
361 // This means the key should be a root key
362 // of the form "abc" or "abc[..=]"
363 // We do actually allow integer keys here since they cannot
364 // be confused with sequences
365 _ => {
366 let key = { self.parse_key(b'[', false)? };
367 // Root keys are _always_ map values
368 self.parse_map_value(key, node)?;
369 Ok(true)
370 }
371 }
372 }
373 // Ran out of characters to parse
374 None => Ok(false),
375 }
376 }
377
378 /// The iterator is currently pointing at a key, so parse up until the
379 /// `end_on` value. This will either be `'['` when the key is the root key,
380 /// or `']'` when the key is a nested key. In the former case, `'='` will
381 /// also finish the key parsing.
382 ///
383 /// The `consume` flag determines whether the end character should be
384 /// returned to the buffer to be peeked. This is important when
385 /// parsing keys like `abc[def][ghi]` since the `'['` character is
386 /// needed to for the next iteration of `parse`.
387 fn parse_key(&mut self, end_on: u8, consume: bool) -> Result<Cow<'a, str>> {
388 self.state = ParsingState::Key;
389 loop {
390 if let Some(x) = self.next() {
391 match *x {
392 c if c == end_on => {
393 // Add this character back to the buffer for peek.
394 if !consume {
395 self.peeked = Some(x);
396 }
397 return self.collect_str();
398 }
399 b'=' => {
400 // Allow the '=' byte only when parsing keys within []
401 if end_on != b']' {
402 // Otherwise, we have reached the end of the key
403 // Add this character back to the buffer for peek.
404 self.peeked = Some(x);
405 return self.collect_str();
406 }
407
408 // otherwise do nothing, so '=' is accumulated
409 }
410 b'&' => {
411 // important to keep the `&` character so we know the
412 // key-value is of the form `key&..=` (i.e. no value)
413 self.peeked = Some(&b'&');
414 return self.collect_str();
415 }
416 _ => {
417 // for any other character
418 // do nothing, keep adding to key
419 }
420 }
421 } else {
422 // no more string to parse
423 return self.collect_str();
424 }
425 }
426 }
427
428 /// The `(key,value)` pair is determined to be corresponding to a map entry,
429 /// so parse it as such. The first part of the `key` has been parsed.
430 fn parse_map_value(&mut self, key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> {
431 self.state = ParsingState::Key;
432 let res = loop {
433 if let Some(x) = self.peek() {
434 match *x {
435 b'=' => {
436 // Key is finished, parse up until the '&' as the value
437 self.clear_acc();
438 self.state = ParsingState::Value;
439 for _ in self.take_while(|b| *b != &b'&') {}
440 let value: Cow<'a, str> = self.collect_str()?;
441 node.insert_map_value(key, value);
442 break Ok(());
443 }
444 b'&' => {
445 // No value
446 node.insert_map_value(key, Cow::Borrowed(""));
447 break Ok(());
448 }
449 b'[' => {
450 // The key continues to another level of nested.
451 // Add a new unitialised level for this node and continue.
452 if let Level::Uninitialised = *node {
453 *node = Level::Nested(BTreeMap::default());
454 }
455 if let Level::Nested(ref mut map) = *node {
456 // By parsing we drop down another level
457 self.depth -= 1;
458 // Either take the existing entry, or add a new
459 // unitialised level
460 // Use this new node to keep parsing
461 let _ = self.parse(map.entry(key).or_insert(Level::Uninitialised))?;
462 break Ok(());
463 } else {
464 // We expected to parse into a map here.
465 break Err(super::Error::parse_err(
466 format!(
467 "tried to insert a \
468 new key into {:?}",
469 node
470 ),
471 self.index,
472 ));
473 }
474 }
475 c => {
476 // Anything else is unexpected since we just finished
477 // parsing a key.
478 if self.strict {
479 break Err(super::Error::parse_err(
480 format!(
481 "Unexpected character: '{}' found when parsing",
482 String::from_utf8_lossy(&[c])
483 ),
484 self.index,
485 ));
486 } else {
487 let _ = self.next();
488 }
489 }
490 }
491 } else {
492 // The string has ended, so the value is empty.
493 node.insert_map_value(key, Cow::Borrowed(""));
494 break Ok(());
495 }
496 };
497 // We have finished parsing this level, so go back up a level.
498 self.depth += 1;
499 res
500 }
501
502 /// The `(key,value)` pair is determined to be corresponding to an
503 /// ordered sequence.
504 /// Basically the same as the above, but we insert into `OrderedSeq`
505 /// Can potentially be merged?
506 fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> {
507 self.state = ParsingState::Key;
508 let res = loop {
509 if let Some(x) = self.peek() {
510 match *x {
511 b'=' => {
512 // Key is finished, parse up until the '&' as the value
513 self.clear_acc();
514 self.state = ParsingState::Value;
515 for _ in self.take_while(|b| *b != &b'&') {}
516 let value = self.collect_str()?;
517 // Reached the end of the key string
518 node.insert_ord_seq_value(key, value);
519 break Ok(());
520 }
521 b'&' => {
522 // No value
523 node.insert_ord_seq_value(key, Cow::Borrowed(""));
524 break Ok(());
525 }
526 b'[' => {
527 // The key continues to another level of nested.
528 // Add a new unitialised level for this node and continue.
529 if let Level::Uninitialised = *node {
530 *node = Level::OrderedSeq(BTreeMap::default());
531 }
532 if let Level::OrderedSeq(ref mut map) = *node {
533 // By parsing we drop down another level
534 self.depth -= 1;
535 let _ = self.parse(
536 // Either take the existing entry, or add a new
537 // unitialised level
538 // Use this new node to keep parsing
539 map.entry(key).or_insert(Level::Uninitialised),
540 )?;
541 break Ok(());
542 } else {
543 // We expected to parse into a seq here.
544 break Err(super::Error::parse_err(
545 format!(
546 "tried to insert a \
547 new key into {:?}",
548 node
549 ),
550 self.index,
551 ));
552 }
553 }
554 c => {
555 // Anything else is unexpected since we just finished
556 // parsing a key.
557 if self.strict {
558 break Err(super::Error::parse_err(
559 format!("Unexpected character: {:?} found when parsing", c),
560 self.index,
561 ));
562 } else {
563 let _ = self.next();
564 }
565 }
566 }
567 } else {
568 // The string has ended, so the value is empty.
569 node.insert_ord_seq_value(key, Cow::Borrowed(""));
570 break Ok(());
571 }
572 };
573 // We have finished parsing this level, so go back up a level.
574 self.depth += 1;
575 res
576 }
577
578 /// The `(key,value)` pair is determined to be corresponding to an
579 /// unordered sequence.
580 /// This must be the final level of nesting, so assume we have a value
581 fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> {
582 self.state = ParsingState::Key;
583 let res = match self.peek() {
584 Some(x) => {
585 match *x {
586 b'=' => {
587 // Key is finished, parse up until the '&' as the value
588 self.clear_acc();
589 self.state = ParsingState::Value;
590 for _ in self.take_while(|b| *b != &b'&') {}
591 let value = self.collect_str()?;
592 node.insert_seq_value(value);
593 Ok(())
594 }
595 b'&' => {
596 // key value is empty
597 node.insert_seq_value(Cow::Borrowed(""));
598 Ok(())
599 }
600 _ => Err(super::Error::parse_err(
601 "non-indexed sequence of \
602 structs not supported",
603 self.index,
604 )),
605 }
606 }
607 None => {
608 // The string has ended, so the value is empty.
609 node.insert_seq_value(Cow::Borrowed(""));
610 Ok(())
611 }
612 };
613 // We have finished parsing this level, so go back up a level.
614 self.depth += 1;
615 res
616 }
617}