serde_qs/de/
parse.rs

1use crate::utils::{replace_space, QS_ENCODE_SET};
2
3use super::*;
4
5use percent_encoding::percent_encode;
6use serde::de;
7
8use std::borrow::Cow;
9use std::iter::Iterator;
10use std::slice::Iter;
11use std::str;
12
13macro_rules! tu {
14    ($x:expr) => {
15        match $x {
16            Some(x) => *x,
17            None => return Err(de::Error::custom("query string ended before expected")),
18        }
19    };
20}
21
22impl<'a> Level<'a> {
23    /// If this `Level` value is indeed a map, then attempt to insert
24    /// `value` for key `key`.
25    /// Returns error if `self` is not a map, or already has an entry for that
26    /// key.
27    fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) {
28        if let Level::Nested(ref mut map) = *self {
29            match map.entry(key) {
30                Entry::Occupied(mut o) => {
31                    let key = o.key();
32                    let error = if key.contains('[') {
33                        let newkey = percent_encode(key.as_bytes(), QS_ENCODE_SET)
34                            .map(replace_space)
35                            .collect::<String>();
36                        format!("Multiple values for one key: \"{}\"\nInvalid field contains an encoded bracket -- did you mean to use non-strict mode?\n  https://docs.rs/serde_qs/latest/serde_qs/#strict-vs-non-strict-modes", newkey)
37                    } else {
38                        format!("Multiple values for one key: \"{}\"", key)
39                    };
40                    // Throw away old result; map is now invalid anyway.
41                    let _ = o.insert(Level::Invalid(error));
42                }
43                Entry::Vacant(vm) => {
44                    // Map is empty, result is None
45                    let _ = vm.insert(Level::Flat(value));
46                }
47            }
48        } else if let Level::Uninitialised = *self {
49            let mut map = BTreeMap::default();
50            let _ = map.insert(key, Level::Flat(value));
51            *self = Level::Nested(map);
52        } else {
53            *self = Level::Invalid(
54                "Attempted to insert map value into \
55                 non-map structure"
56                    .to_string(),
57            );
58        }
59    }
60
61    /// If this `Level` value is indeed a seq, then push a new value
62    fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) {
63        if let Level::OrderedSeq(ref mut map) = *self {
64            match map.entry(key) {
65                Entry::Occupied(mut o) => {
66                    // Throw away old result; map is now invalid anyway.
67                    let _ = o.insert(Level::Invalid("Multiple values for one key".to_string()));
68                }
69                Entry::Vacant(vm) => {
70                    // Map is empty, result is None
71                    let _ = vm.insert(Level::Flat(value));
72                }
73            }
74        } else if let Level::Uninitialised = *self {
75            // To reach here, self is either an OrderedSeq or nothing.
76            let mut map = BTreeMap::default();
77            let _ = map.insert(key, Level::Flat(value));
78            *self = Level::OrderedSeq(map);
79        } else {
80            *self = Level::Invalid(
81                "Attempted to insert seq value into \
82                 non-seq structure"
83                    .to_string(),
84            );
85        }
86    }
87
88    /// If this `Level` value is indeed a seq, then attempt to insert
89    /// `value` for key `key`.
90    /// Returns error if `self` is not a seq, or already has an entry for that
91    /// key.
92    fn insert_seq_value(&mut self, value: Cow<'a, str>) {
93        // Reached the end of the key string
94        if let Level::Sequence(ref mut seq) = *self {
95            seq.push(Level::Flat(value));
96        } else if let Level::Uninitialised = *self {
97            let seq = vec![Level::Flat(value)];
98            *self = Level::Sequence(seq);
99        } else {
100            *self = Level::Invalid(
101                "Attempted to insert seq value into \
102                 non-seq structure"
103                    .to_string(),
104            );
105        }
106    }
107}
108
109/// The `Parser` struct is a stateful querystring parser.
110/// It iterates over a slice of bytes, with a range to track the current
111/// start/end points of a value.
112/// The parser additionally supports peeking values, which allows them to be
113/// re-used (precisely once, unlike with `Peekable` from `std::iter`).
114pub struct Parser<'a> {
115    inner: &'a [u8],
116    iter: Iter<'a, u8>,
117    index: usize,
118    acc: (usize, usize),
119    peeked: Option<&'a u8>,
120    depth: usize, // stores the current depth, for use in bounded-depth parsing
121    strict: bool,
122    state: ParsingState,
123}
124
125/// The parsing logic varies slightly based on whether it is a key or a value
126/// (determines how encoded brackets are parse in non-strict mode)
127/// This tracks the state.
128enum ParsingState {
129    Init,
130    Key,
131    Value,
132}
133
134impl<'a> Iterator for Parser<'a> {
135    type Item = &'a u8;
136    #[inline]
137    fn next(&mut self) -> Option<Self::Item> {
138        let preparse_brackets = match self.state {
139            ParsingState::Value => false,
140            _ => !self.strict,
141        };
142        if preparse_brackets {
143            // in non-strict mode, we will happily decode any bracket
144            match self.peeked.take() {
145                Some(v) => Some(v),
146                None => {
147                    self.index += 1;
148                    self.acc.1 += 1;
149                    match self.iter.next() {
150                        Some(v) if v == &b'%' && self.iter.len() >= 2 => {
151                            match &self.iter.as_slice()[..2] {
152                                b"5B" => {
153                                    // skip the next two characters
154                                    let _ = self.iter.next();
155                                    let _ = self.iter.next();
156                                    self.index += 2;
157                                    Some(&b'[')
158                                }
159                                b"5D" => {
160                                    // skip the next two characters
161                                    let _ = self.iter.next();
162                                    let _ = self.iter.next();
163                                    self.index += 2;
164                                    Some(&b']')
165                                }
166                                _ => Some(v),
167                            }
168                        }
169                        Some(v) => Some(v),
170                        None => None,
171                    }
172                }
173            }
174        } else {
175            match self.peeked.take() {
176                Some(v) => Some(v),
177                None => {
178                    self.index += 1;
179                    self.acc.1 += 1;
180                    self.iter.next()
181                }
182            }
183        }
184    }
185}
186
187impl<'a> Parser<'a> {
188    #[inline]
189    fn peek(&mut self) -> Option<<Self as Iterator>::Item> {
190        if self.peeked.is_some() {
191            self.peeked
192        } else if let Some(x) = self.next() {
193            self.peeked = Some(x);
194            Some(x)
195        } else {
196            None
197        }
198    }
199}
200
201/// Replace b'+' with b' '
202/// Copied from [`form_urlencoded`](https://github.com/servo/rust-url/blob/380be29859adb859e861c2d765897c22ec878e01/src/form_urlencoded.rs#L125).
203fn replace_plus(input: &[u8]) -> Cow<[u8]> {
204    match input.iter().position(|&b| b == b'+') {
205        None => Cow::Borrowed(input),
206        Some(first_position) => {
207            let mut replaced = input.to_owned();
208            replaced[first_position] = b' ';
209            for byte in &mut replaced[first_position + 1..] {
210                if *byte == b'+' {
211                    *byte = b' ';
212                }
213            }
214
215            Cow::Owned(replaced)
216        }
217    }
218}
219
220impl<'a> Parser<'a> {
221    pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self {
222        Parser {
223            inner: encoded,
224            iter: encoded.iter(),
225            acc: (0, 0),
226            index: 0,
227            peeked: None,
228            depth,
229            strict,
230            state: ParsingState::Init,
231        }
232    }
233
234    /// Resets the accumulator range by setting `(start, end)` to `(end, end)`.
235    fn clear_acc(&mut self) {
236        self.acc = (self.index, self.index);
237    }
238
239    /// Extracts a string from the internal byte slice from the range tracked by
240    /// the parser.
241    /// Avoids allocations when neither percent encoded, nor `'+'` values are
242    /// present.
243    fn collect_str(&mut self) -> Result<Cow<'a, str>> {
244        let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]);
245        let decoder = percent_encoding::percent_decode(&replaced);
246
247        let maybe_decoded = if self.strict {
248            decoder.decode_utf8()?
249        } else {
250            decoder.decode_utf8_lossy()
251        };
252
253        let ret: Result<Cow<'a, str>> = match maybe_decoded {
254            Cow::Borrowed(_) => {
255                match replaced {
256                    Cow::Borrowed(_) => {
257                        // In this case, neither method made replacements, so we
258                        // reuse the original bytes
259                        let res = str::from_utf8(&self.inner[self.acc.0..self.acc.1 - 1])?;
260                        Ok(Cow::Borrowed(res))
261                    }
262                    Cow::Owned(owned) => {
263                        let res = String::from_utf8(owned)?;
264                        Ok(Cow::Owned(res))
265                    }
266                }
267            }
268            Cow::Owned(owned) => Ok(Cow::Owned(owned)),
269        };
270        self.clear_acc();
271        ret.map_err(Error::from)
272    }
273
274    /// In some ways the main way to use a `Parser`, this runs the parsing step
275    /// and outputs a simple `Deserializer` over the parsed map.
276    pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
277        let map = BTreeMap::default();
278        let mut root = Level::Nested(map);
279
280        // Parses all top level nodes into the `root` map.
281        while self.parse(&mut root)? {}
282        let iter = match root {
283            Level::Nested(map) => map.into_iter(),
284            _ => BTreeMap::default().into_iter(),
285        };
286        Ok(QsDeserializer { iter, value: None })
287    }
288
289    /// This is the top level parsing function. It checks the first character to
290    /// decide the type of key (nested, sequence, etc.) and to call the
291    /// approprate parsing function.
292    ///
293    /// Returns `Ok(false)` when there is no more string to parse.
294    fn parse(&mut self, node: &mut Level<'a>) -> Result<bool> {
295        // First character determines parsing type
296        if self.depth == 0 {
297            // Hit the maximum depth level, so parse everything as a key
298            let key = self.parse_key(b'=', false)?;
299            self.parse_map_value(key, node)?;
300            return Ok(true);
301        }
302        match self.next() {
303            Some(x) => {
304                match *x {
305                    b'[' => {
306                        loop {
307                            self.clear_acc();
308                            // Only peek at the next value to determine the key type.
309                            match tu!(self.peek()) {
310                                // key is of the form "[..=", not really allowed.
311                                b'[' => {
312                                    // If we're in strict mode, error, otherwise just ignore it.
313                                    if self.strict {
314                                        return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index));
315                                    } else {
316                                        let _ = self.next();
317                                    }
318                                }
319                                // key is simply "[]", so treat as a seq.
320                                b']' => {
321                                    // throw away the bracket
322                                    let _ = self.next();
323                                    self.clear_acc();
324                                    self.parse_seq_value(node)?;
325                                    return Ok(true);
326                                }
327                                // First character is an integer, attempt to parse it as an integer key
328                                b'0'..=b'9' => {
329                                    let key = self.parse_key(b']', true)?;
330                                    let key = key.parse().map_err(Error::from)?;
331                                    self.parse_ord_seq_value(key, node)?;
332                                    return Ok(true);
333                                }
334                                // Key is "[a..=" so parse up to the closing "]"
335                                0x20..=0x2f | 0x3a..=0x5a | 0x5c | 0x5e..=0x7e => {
336                                    let key = self.parse_key(b']', true)?;
337                                    self.parse_map_value(key, node)?;
338                                    return Ok(true);
339                                }
340                                c => {
341                                    if self.strict {
342                                        return Err(super::Error::parse_err(
343                                            format!(
344                                                "unexpected character: {}",
345                                                String::from_utf8_lossy(&[c])
346                                            ),
347                                            self.index,
348                                        ));
349                                    } else {
350                                        let _ = self.next();
351                                    }
352                                }
353                            }
354                        }
355                    }
356                    // Skip empty byte sequences (e.g. leading `&`, trailing `&`, `&&`, ...)
357                    b'&' => {
358                        self.clear_acc();
359                        Ok(true)
360                    }
361                    // This means the key should be a root key
362                    // of the form "abc" or "abc[..=]"
363                    // We do actually allow integer keys here since they cannot
364                    // be confused with sequences
365                    _ => {
366                        let key = { self.parse_key(b'[', false)? };
367                        // Root keys are _always_ map values
368                        self.parse_map_value(key, node)?;
369                        Ok(true)
370                    }
371                }
372            }
373            // Ran out of characters to parse
374            None => Ok(false),
375        }
376    }
377
378    /// The iterator is currently pointing at a key, so parse up until the
379    /// `end_on` value. This will either be `'['` when the key is the root key,
380    /// or `']'` when the key is a nested key. In the former case, `'='` will
381    /// also finish the key parsing.
382    ///
383    /// The `consume` flag determines whether the end character should be
384    /// returned to the buffer to be peeked. This is important when
385    /// parsing keys like `abc[def][ghi]` since the `'['` character is
386    /// needed to for the next iteration of `parse`.
387    fn parse_key(&mut self, end_on: u8, consume: bool) -> Result<Cow<'a, str>> {
388        self.state = ParsingState::Key;
389        loop {
390            if let Some(x) = self.next() {
391                match *x {
392                    c if c == end_on => {
393                        // Add this character back to the buffer for peek.
394                        if !consume {
395                            self.peeked = Some(x);
396                        }
397                        return self.collect_str();
398                    }
399                    b'=' => {
400                        // Allow the '=' byte only when parsing keys within []
401                        if end_on != b']' {
402                            // Otherwise, we have reached the end of the key
403                            // Add this character back to the buffer for peek.
404                            self.peeked = Some(x);
405                            return self.collect_str();
406                        }
407
408                        // otherwise do nothing, so '=' is accumulated
409                    }
410                    b'&' => {
411                        // important to keep the `&` character so we know the
412                        // key-value is of the form `key&..=` (i.e. no value)
413                        self.peeked = Some(&b'&');
414                        return self.collect_str();
415                    }
416                    _ => {
417                        // for any other character
418                        // do nothing, keep adding to key
419                    }
420                }
421            } else {
422                // no more string to parse
423                return self.collect_str();
424            }
425        }
426    }
427
428    /// The `(key,value)` pair is determined to be corresponding to a map entry,
429    /// so parse it as such. The first part of the `key` has been parsed.
430    fn parse_map_value(&mut self, key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> {
431        self.state = ParsingState::Key;
432        let res = loop {
433            if let Some(x) = self.peek() {
434                match *x {
435                    b'=' => {
436                        // Key is finished, parse up until the '&' as the value
437                        self.clear_acc();
438                        self.state = ParsingState::Value;
439                        for _ in self.take_while(|b| *b != &b'&') {}
440                        let value: Cow<'a, str> = self.collect_str()?;
441                        node.insert_map_value(key, value);
442                        break Ok(());
443                    }
444                    b'&' => {
445                        // No value
446                        node.insert_map_value(key, Cow::Borrowed(""));
447                        break Ok(());
448                    }
449                    b'[' => {
450                        // The key continues to another level of nested.
451                        // Add a new unitialised level for this node and continue.
452                        if let Level::Uninitialised = *node {
453                            *node = Level::Nested(BTreeMap::default());
454                        }
455                        if let Level::Nested(ref mut map) = *node {
456                            // By parsing we drop down another level
457                            self.depth -= 1;
458                            // Either take the existing entry, or add a new
459                            // unitialised level
460                            // Use this new node to keep parsing
461                            let _ = self.parse(map.entry(key).or_insert(Level::Uninitialised))?;
462                            break Ok(());
463                        } else {
464                            // We expected to parse into a map here.
465                            break Err(super::Error::parse_err(
466                                format!(
467                                    "tried to insert a \
468                                     new key into {:?}",
469                                    node
470                                ),
471                                self.index,
472                            ));
473                        }
474                    }
475                    c => {
476                        // Anything else is unexpected since we just finished
477                        // parsing a key.
478                        if self.strict {
479                            break Err(super::Error::parse_err(
480                                format!(
481                                    "Unexpected character: '{}' found when parsing",
482                                    String::from_utf8_lossy(&[c])
483                                ),
484                                self.index,
485                            ));
486                        } else {
487                            let _ = self.next();
488                        }
489                    }
490                }
491            } else {
492                // The string has ended, so the value is empty.
493                node.insert_map_value(key, Cow::Borrowed(""));
494                break Ok(());
495            }
496        };
497        // We have finished parsing this level, so go back up a level.
498        self.depth += 1;
499        res
500    }
501
502    /// The `(key,value)` pair is determined to be corresponding to an
503    /// ordered sequence.
504    /// Basically the same as the above, but we insert into `OrderedSeq`
505    /// Can potentially be merged?
506    fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> {
507        self.state = ParsingState::Key;
508        let res = loop {
509            if let Some(x) = self.peek() {
510                match *x {
511                    b'=' => {
512                        // Key is finished, parse up until the '&' as the value
513                        self.clear_acc();
514                        self.state = ParsingState::Value;
515                        for _ in self.take_while(|b| *b != &b'&') {}
516                        let value = self.collect_str()?;
517                        // Reached the end of the key string
518                        node.insert_ord_seq_value(key, value);
519                        break Ok(());
520                    }
521                    b'&' => {
522                        // No value
523                        node.insert_ord_seq_value(key, Cow::Borrowed(""));
524                        break Ok(());
525                    }
526                    b'[' => {
527                        // The key continues to another level of nested.
528                        // Add a new unitialised level for this node and continue.
529                        if let Level::Uninitialised = *node {
530                            *node = Level::OrderedSeq(BTreeMap::default());
531                        }
532                        if let Level::OrderedSeq(ref mut map) = *node {
533                            // By parsing we drop down another level
534                            self.depth -= 1;
535                            let _ = self.parse(
536                                // Either take the existing entry, or add a new
537                                // unitialised level
538                                // Use this new node to keep parsing
539                                map.entry(key).or_insert(Level::Uninitialised),
540                            )?;
541                            break Ok(());
542                        } else {
543                            // We expected to parse into a seq here.
544                            break Err(super::Error::parse_err(
545                                format!(
546                                    "tried to insert a \
547                                     new key into {:?}",
548                                    node
549                                ),
550                                self.index,
551                            ));
552                        }
553                    }
554                    c => {
555                        // Anything else is unexpected since we just finished
556                        // parsing a key.
557                        if self.strict {
558                            break Err(super::Error::parse_err(
559                                format!("Unexpected character: {:?} found when parsing", c),
560                                self.index,
561                            ));
562                        } else {
563                            let _ = self.next();
564                        }
565                    }
566                }
567            } else {
568                // The string has ended, so the value is empty.
569                node.insert_ord_seq_value(key, Cow::Borrowed(""));
570                break Ok(());
571            }
572        };
573        // We have finished parsing this level, so go back up a level.
574        self.depth += 1;
575        res
576    }
577
578    /// The `(key,value)` pair is determined to be corresponding to an
579    /// unordered sequence.
580    /// This must be the final level of nesting, so assume we have a value
581    fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> {
582        self.state = ParsingState::Key;
583        let res = match self.peek() {
584            Some(x) => {
585                match *x {
586                    b'=' => {
587                        // Key is finished, parse up until the '&' as the value
588                        self.clear_acc();
589                        self.state = ParsingState::Value;
590                        for _ in self.take_while(|b| *b != &b'&') {}
591                        let value = self.collect_str()?;
592                        node.insert_seq_value(value);
593                        Ok(())
594                    }
595                    b'&' => {
596                        // key value is empty
597                        node.insert_seq_value(Cow::Borrowed(""));
598                        Ok(())
599                    }
600                    _ => Err(super::Error::parse_err(
601                        "non-indexed sequence of \
602                         structs not supported",
603                        self.index,
604                    )),
605                }
606            }
607            None => {
608                // The string has ended, so the value is empty.
609                node.insert_seq_value(Cow::Borrowed(""));
610                Ok(())
611            }
612        };
613        // We have finished parsing this level, so go back up a level.
614        self.depth += 1;
615        res
616    }
617}