nom8/lib.rs
1//! # nom, eating data byte by byte
2//!
3//! **NOTE:** This is an unofficial, short-lived fork. I'm nominating nom8 to be nom v8.
4//!
5//! nom is a parser combinator library, supporting:
6//! - String (`&str`), byte (`&[u8]`), and [custom input types][crate::input]
7//! - [Streaming parsing][crate::input::Streaming]
8//! - Zero copy parsing
9//!
10//! ## Example
11//!
12//! ```rust
13//! use nom8::prelude::*;
14//! use nom8::bytes::{tag, take_while_m_n};
15//!
16//! #[derive(Debug,PartialEq)]
17//! pub struct Color {
18//! pub red: u8,
19//! pub green: u8,
20//! pub blue: u8,
21//! }
22//!
23//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
24//! u8::from_str_radix(input, 16)
25//! }
26//!
27//! fn is_hex_digit(c: char) -> bool {
28//! c.is_digit(16)
29//! }
30//!
31//! fn hex_primary(input: &str) -> IResult<&str, u8> {
32//! take_while_m_n(2, 2, is_hex_digit).map_res(from_hex).parse(input)
33//! }
34//!
35//! fn hex_color(input: &str) -> IResult<&str, Color> {
36//! let (input, _) = tag("#")(input)?;
37//! let (input, (red, green, blue)) = (hex_primary, hex_primary, hex_primary).parse(input)?;
38//!
39//! Ok((input, Color { red, green, blue }))
40//! }
41//!
42//! fn main() {
43//! let result = hex_color("#2F14DF").finish();
44//! assert_eq!(result, Ok(Color {
45//! red: 47,
46//! green: 20,
47//! blue: 223,
48//! }));
49//! }
50//! ```
51//!
52//! The code is available on [Github](https://github.com/Geal/nom)
53//!
54//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details
55//! about [how to write parsers][_tutorial],
56//! or the [error management system][error].
57//! You can also check out the [_cookbook] module that contains examples of common patterns.
58//!
59//! **Looking for a specific combinator? Read the
60//! ["choose a combinator" guide][combinator]**
61//!
62//! If you are upgrading to nom 5.0, please read the
63//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md).
64//!
65//! ## Parser combinators
66//!
67//! Parser combinators are an approach to parsers that is very different from
68//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
69//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
70//! in a separate syntax and generating the corresponding code, you use very small
71//! functions with very specific purposes, like "take 5 bytes", or "recognize the
72//! word 'HTTP'", and assemble them in meaningful patterns like "recognize
73//! 'HTTP', then a space, then a version".
74//! The resulting code is small, and looks like the grammar you would have
75//! written with other parser approaches.
76//!
77//! This gives us a few advantages:
78//!
79//! - The parsers are small and easy to write
80//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!)
81//! - The parsers components are easy to test separately (unit tests and property-based tests)
82//! - The parser combination code looks close to the grammar you would have written
83//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest
84//!
85//! Here is an example of one such parser, to recognize text between parentheses:
86//!
87//! ```rust
88//! use nom8::{
89//! IResult,
90//! sequence::delimited,
91//! bytes::take_till1
92//! };
93//!
94//! fn parens(input: &str) -> IResult<&str, &str> {
95//! delimited('(', take_till1(")"), ')')(input)
96//! }
97//! ```
98//!
99//! It defines a function named `parens` which will recognize a sequence of the
100//! character `(`, the longest byte array not containing `)`, then the character
101//! `)`, and will return the byte array in the middle.
102//!
103//! Here is another parser, written without using nom's combinators this time:
104//!
105//! ```rust
106//! use nom8::{IResult, Err, Needed};
107//!
108//! # fn main() {
109//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{
110//! if i.len() < 4 {
111//! Err(Err::Incomplete(Needed::new(4)))
112//! } else {
113//! Ok((&i[4..], &i[0..4]))
114//! }
115//! }
116//! # }
117//! ```
118//!
119//! This function takes a byte array as input, and tries to consume 4 bytes.
120//! Writing all the parsers manually, like this, is dangerous, despite Rust's
121//! safety features. There are still a lot of mistakes one can make. That's why
122//! nom provides a list of functions to help in developing parsers.
123//!
124//! With functions, you would write it like this:
125//!
126//! ```rust
127//! use nom8::{IResult, bytes::take, input::Streaming};
128//! fn take4(input: Streaming<&str>) -> IResult<Streaming<&str>, &str> {
129//! take(4u8)(input)
130//! }
131//! ```
132//!
133//! A parser in nom is a function which, for an input type `I`, an output type `O`
134//! and an optional error type `E`, will have the following signature:
135//!
136//! ```rust,compile_fail
137//! fn parser(input: I) -> IResult<I, O, E>;
138//! ```
139//!
140//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default):
141//!
142//! ```rust,compile_fail
143//! fn parser(input: I) -> IResult<I, O>;
144//! ```
145//!
146//! `IResult` is an alias for the `Result` type:
147//!
148//! ```rust
149//! use nom8::{Needed, error::Error};
150//!
151//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>;
152//!
153//! enum Err<E> {
154//! Incomplete(Needed),
155//! Error(E),
156//! Failure(E),
157//! }
158//! ```
159//!
160//! It can have the following values:
161//!
162//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
163//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error
164//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
165//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser
166//!
167//! Please refer to the ["choose a combinator" guide][combinator] for an exhaustive list of parsers.
168//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc).
169//!
170//! ## Making new parsers with function combinators
171//!
172//! nom is based on functions that generate parsers, with a signature like
173//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`.
174//! The arguments of a combinator can be direct values (like `take` which uses
175//! a number of bytes or character as argument) or even other parsers (like
176//! `delimited` which takes as argument 3 parsers, and returns the result of
177//! the second one if all are successful).
178//!
179//! Here are some examples:
180//!
181//! ```rust
182//! use nom8::IResult;
183//! use nom8::bytes::{tag, take};
184//! fn abcd_parser(i: &str) -> IResult<&str, &str> {
185//! tag("abcd")(i) // will consume bytes if the input begins with "abcd"
186//! }
187//!
188//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> {
189//! take(10u8)(i) // will consume and return 10 bytes of input
190//! }
191//! ```
192//!
193//! ## Combining parsers
194//!
195//! There are higher level patterns, like the **`alt`** combinator, which
196//! provides a choice between multiple parsers. If one branch fails, it tries
197//! the next, and returns the result of the first parser that succeeds:
198//!
199//! ```rust
200//! use nom8::IResult;
201//! use nom8::branch::alt;
202//! use nom8::bytes::tag;
203//!
204//! let mut alt_tags = alt((tag("abcd"), tag("efgh")));
205//!
206//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..])));
207//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..])));
208//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom8::Err::Error((&b"ijklxxx"[..], nom8::error::ErrorKind::Tag))));
209//! ```
210//!
211//! The **`opt`** combinator makes a parser optional. If the child parser returns
212//! an error, **`opt`** will still succeed and return None:
213//!
214//! ```rust
215//! use nom8::{IResult, combinator::opt, bytes::tag};
216//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
217//! opt(tag("abcd"))(i)
218//! }
219//!
220//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
221//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None)));
222//! ```
223//!
224//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results:
225//!
226//! ```rust
227//! # #[cfg(feature = "alloc")]
228//! # fn main() {
229//! use nom8::{IResult, multi::many0, bytes::tag};
230//! use std::str;
231//!
232//! fn multi(i: &str) -> IResult<&str, Vec<&str>> {
233//! many0(tag("abcd"))(i)
234//! }
235//!
236//! let a = "abcdef";
237//! let b = "abcdabcdef";
238//! let c = "azerty";
239//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"])));
240//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"])));
241//! assert_eq!(multi(c), Ok(("azerty", Vec::new())));
242//! # }
243//! # #[cfg(not(feature = "alloc"))]
244//! # fn main() {}
245//! ```
246//!
247//! Here are some basic combinators available:
248//!
249//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
250//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
251//! - **`many1`**: Will apply the parser 1 or more times
252//!
253//! There are more complex (and more useful) parsers like tuples, which is
254//! used to apply a series of parsers then assemble their results.
255//!
256//! Example with tuples:
257//!
258//! ```rust
259//! # fn main() {
260//! use nom8::prelude::*;
261//! use nom8::{
262//! error::ErrorKind, Needed,
263//! number::be_u16,
264//! bytes::{tag, take},
265//! input::Streaming,
266//! };
267//!
268//! let mut tpl = (be_u16, take(3u8), tag("fg"));
269//!
270//! assert_eq!(
271//! tpl.parse(Streaming(&b"abcdefgh"[..])),
272//! Ok((
273//! Streaming(&b"h"[..]),
274//! (0x6162u16, &b"cde"[..], &b"fg"[..])
275//! ))
276//! );
277//! assert_eq!(tpl.parse(Streaming(&b"abcde"[..])), Err(nom8::Err::Incomplete(Needed::new(2))));
278//! let input = &b"abcdejk"[..];
279//! assert_eq!(tpl.parse(Streaming(input)), Err(nom8::Err::Error((Streaming(&input[5..]), ErrorKind::Tag))));
280//! # }
281//! ```
282//!
283//! But you can also use a sequence of combinators written in imperative style,
284//! thanks to the `?` operator:
285//!
286//! ```rust
287//! # fn main() {
288//! use nom8::{IResult, bytes::tag};
289//!
290//! #[derive(Debug, PartialEq)]
291//! struct A {
292//! a: u8,
293//! b: u8
294//! }
295//!
296//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) }
297//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) }
298//!
299//! fn f(i: &[u8]) -> IResult<&[u8], A> {
300//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure
301//! let (i, _) = tag("abcd")(i)?;
302//! let (i, a) = ret_int1(i)?;
303//! let (i, _) = tag("efgh")(i)?;
304//! let (i, b) = ret_int2(i)?;
305//!
306//! Ok((i, A { a, b }))
307//! }
308//!
309//! let r = f(b"abcdefghX");
310//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2})));
311//! # }
312//! ```
313#![cfg_attr(docsrs, feature(doc_auto_cfg))]
314#![cfg_attr(docsrs, feature(doc_cfg))]
315#![cfg_attr(docsrs, feature(extended_key_value_attributes))]
316#![cfg_attr(not(feature = "std"), no_std)]
317#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))]
318#![deny(missing_docs)]
319#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
320#[cfg(feature = "alloc")]
321#[macro_use]
322extern crate alloc;
323#[cfg(doctest)]
324extern crate doc_comment;
325
326#[cfg(doctest)]
327doc_comment::doctest!("../README.md");
328
329/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
330/// it, albeit there it is not public.
331#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
332pub mod lib {
333 /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
334 /// as well as `core` or `std`
335 #[cfg(not(feature = "std"))]
336 #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
337 /// internal std exports for no_std compatibility
338 pub mod std {
339 #[doc(hidden)]
340 #[cfg(not(feature = "alloc"))]
341 pub use core::borrow;
342
343 #[cfg(feature = "alloc")]
344 #[doc(hidden)]
345 pub use alloc::{borrow, boxed, string, vec};
346
347 #[doc(hidden)]
348 pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str};
349
350 /// internal reproduction of std prelude
351 #[doc(hidden)]
352 pub mod prelude {
353 pub use core::prelude as v1;
354 }
355 }
356
357 #[cfg(feature = "std")]
358 #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))]
359 /// internal std exports for no_std compatibility
360 pub mod std {
361 #[doc(hidden)]
362 pub use std::{
363 alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result,
364 slice, str, string, vec,
365 };
366
367 /// internal reproduction of std prelude
368 #[doc(hidden)]
369 pub mod prelude {
370 pub use std::prelude as v1;
371 }
372 }
373}
374
375pub use self::parser::*;
376
377#[macro_use]
378mod macros;
379#[macro_use]
380pub mod error;
381
382pub mod branch;
383pub mod combinator;
384pub mod input;
385pub mod multi;
386mod parser;
387pub mod sequence;
388
389pub mod bits;
390pub mod bytes;
391
392pub mod character;
393
394mod str;
395
396pub mod number;
397
398#[cfg(feature = "unstable-doc")]
399pub mod _cookbook;
400#[cfg(feature = "unstable-doc")]
401pub mod _tutorial;
402
403/// Core concepts available for glob import
404///
405/// Including
406/// - [`FinishIResult`]
407/// - [`Parser`]
408///
409/// ## Example
410///
411/// ```rust
412/// use nom8::prelude::*;
413///
414/// fn parse_data(input: &str) -> IResult<&str, u64> {
415/// // ...
416/// # nom8::character::u64(input)
417/// }
418///
419/// fn main() {
420/// let result = parse_data.parse("100").finish();
421/// assert_eq!(result, Ok(100));
422/// }
423/// ```
424pub mod prelude {
425 pub use crate::input::InputIsStreaming as _;
426 pub use crate::FinishIResult as _;
427 pub use crate::IResult;
428 pub use crate::IntoOutputIResult as _;
429 pub use crate::Parser as _;
430}