1 //! # nom, eating data byte by byte 2 //! 3 //! nom is a parser combinator library with a focus on safe parsing, 4 //! streaming patterns, and as much as possible zero copy. 5 //! 6 //! ## Example 7 //! 8 //! ```rust 9 //! use nom::{ 10 //! IResult, 11 //! bytes::complete::{tag, take_while_m_n}, 12 //! combinator::map_res, 13 //! sequence::tuple}; 14 //! 15 //! #[derive(Debug,PartialEq)] 16 //! pub struct Color { 17 //! pub red: u8, 18 //! pub green: u8, 19 //! pub blue: u8, 20 //! } 21 //! 22 //! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> { 23 //! u8::from_str_radix(input, 16) 24 //! } 25 //! 26 //! fn is_hex_digit(c: char) -> bool { 27 //! c.is_digit(16) 28 //! } 29 //! 30 //! fn hex_primary(input: &str) -> IResult<&str, u8> { 31 //! map_res( 32 //! take_while_m_n(2, 2, is_hex_digit), 33 //! from_hex 34 //! )(input) 35 //! } 36 //! 37 //! fn hex_color(input: &str) -> IResult<&str, Color> { 38 //! let (input, _) = tag("#")(input)?; 39 //! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?; 40 //! 41 //! Ok((input, Color { red, green, blue })) 42 //! } 43 //! 44 //! fn main() { 45 //! assert_eq!(hex_color("#2F14DF"), Ok(("", Color { 46 //! red: 47, 47 //! green: 20, 48 //! blue: 223, 49 //! }))); 50 //! } 51 //! ``` 52 //! 53 //! The code is available on [Github](https://github.com/Geal/nom) 54 //! 55 //! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details 56 //! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md), 57 //! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md). 58 //! You can also check out the [recipes] module that contains examples of common patterns. 59 //! 60 //! **Looking for a specific combinator? Read the 61 //! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)** 62 //! 63 //! If you are upgrading to nom 5.0, please read the 64 //! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md). 65 //! 66 //! ## Parser combinators 67 //! 68 //! Parser combinators are an approach to parsers that is very different from 69 //! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and 70 //! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar 71 //! in a separate syntax and generating the corresponding code, you use very small 72 //! functions with very specific purposes, like "take 5 bytes", or "recognize the 73 //! word 'HTTP'", and assemble them in meaningful patterns like "recognize 74 //! 'HTTP', then a space, then a version". 75 //! The resulting code is small, and looks like the grammar you would have 76 //! written with other parser approaches. 77 //! 78 //! This gives us a few advantages: 79 //! 80 //! - The parsers are small and easy to write 81 //! - The parsers components are easy to reuse (if they're general enough, please add them to nom!) 82 //! - The parsers components are easy to test separately (unit tests and property-based tests) 83 //! - The parser combination code looks close to the grammar you would have written 84 //! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest 85 //! 86 //! Here is an example of one such parser, to recognize text between parentheses: 87 //! 88 //! ```rust 89 //! use nom::{ 90 //! IResult, 91 //! sequence::delimited, 92 //! // see the "streaming/complete" paragraph lower for an explanation of these submodules 93 //! character::complete::char, 94 //! bytes::complete::is_not 95 //! }; 96 //! 97 //! fn parens(input: &str) -> IResult<&str, &str> { 98 //! delimited(char('('), is_not(")"), char(')'))(input) 99 //! } 100 //! ``` 101 //! 102 //! It defines a function named `parens` which will recognize a sequence of the 103 //! character `(`, the longest byte array not containing `)`, then the character 104 //! `)`, and will return the byte array in the middle. 105 //! 106 //! Here is another parser, written without using nom's combinators this time: 107 //! 108 //! ```rust 109 //! use nom::{IResult, Err, Needed}; 110 //! 111 //! # fn main() { 112 //! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{ 113 //! if i.len() < 4 { 114 //! Err(Err::Incomplete(Needed::new(4))) 115 //! } else { 116 //! Ok((&i[4..], &i[0..4])) 117 //! } 118 //! } 119 //! # } 120 //! ``` 121 //! 122 //! This function takes a byte array as input, and tries to consume 4 bytes. 123 //! Writing all the parsers manually, like this, is dangerous, despite Rust's 124 //! safety features. There are still a lot of mistakes one can make. That's why 125 //! nom provides a list of functions to help in developing parsers. 126 //! 127 //! With functions, you would write it like this: 128 //! 129 //! ```rust 130 //! use nom::{IResult, bytes::streaming::take}; 131 //! fn take4(input: &str) -> IResult<&str, &str> { 132 //! take(4u8)(input) 133 //! } 134 //! ``` 135 //! 136 //! A parser in nom is a function which, for an input type `I`, an output type `O` 137 //! and an optional error type `E`, will have the following signature: 138 //! 139 //! ```rust,compile_fail 140 //! fn parser(input: I) -> IResult<I, O, E>; 141 //! ``` 142 //! 143 //! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default): 144 //! 145 //! ```rust,compile_fail 146 //! fn parser(input: I) -> IResult<I, O>; 147 //! ``` 148 //! 149 //! `IResult` is an alias for the `Result` type: 150 //! 151 //! ```rust 152 //! use nom::{Needed, error::Error}; 153 //! 154 //! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>; 155 //! 156 //! enum Err<E> { 157 //! Incomplete(Needed), 158 //! Error(E), 159 //! Failure(E), 160 //! } 161 //! ``` 162 //! 163 //! It can have the following values: 164 //! 165 //! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value; 166 //! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error 167 //! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed 168 //! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser 169 //! 170 //! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers. 171 //! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc). 172 //! 173 //! ## Making new parsers with function combinators 174 //! 175 //! nom is based on functions that generate parsers, with a signature like 176 //! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`. 177 //! The arguments of a combinator can be direct values (like `take` which uses 178 //! a number of bytes or character as argument) or even other parsers (like 179 //! `delimited` which takes as argument 3 parsers, and returns the result of 180 //! the second one if all are successful). 181 //! 182 //! Here are some examples: 183 //! 184 //! ```rust 185 //! use nom::IResult; 186 //! use nom::bytes::complete::{tag, take}; 187 //! fn abcd_parser(i: &str) -> IResult<&str, &str> { 188 //! tag("abcd")(i) // will consume bytes if the input begins with "abcd" 189 //! } 190 //! 191 //! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> { 192 //! take(10u8)(i) // will consume and return 10 bytes of input 193 //! } 194 //! ``` 195 //! 196 //! ## Combining parsers 197 //! 198 //! There are higher level patterns, like the **`alt`** combinator, which 199 //! provides a choice between multiple parsers. If one branch fails, it tries 200 //! the next, and returns the result of the first parser that succeeds: 201 //! 202 //! ```rust 203 //! use nom::IResult; 204 //! use nom::branch::alt; 205 //! use nom::bytes::complete::tag; 206 //! 207 //! let mut alt_tags = alt((tag("abcd"), tag("efgh"))); 208 //! 209 //! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..]))); 210 //! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..]))); 211 //! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag)))); 212 //! ``` 213 //! 214 //! The **`opt`** combinator makes a parser optional. If the child parser returns 215 //! an error, **`opt`** will still succeed and return None: 216 //! 217 //! ```rust 218 //! use nom::{IResult, combinator::opt, bytes::complete::tag}; 219 //! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { 220 //! opt(tag("abcd"))(i) 221 //! } 222 //! 223 //! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..])))); 224 //! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None))); 225 //! ``` 226 //! 227 //! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results: 228 //! 229 //! ```rust 230 //! # #[cfg(feature = "alloc")] 231 //! # fn main() { 232 //! use nom::{IResult, multi::many0, bytes::complete::tag}; 233 //! use std::str; 234 //! 235 //! fn multi(i: &str) -> IResult<&str, Vec<&str>> { 236 //! many0(tag("abcd"))(i) 237 //! } 238 //! 239 //! let a = "abcdef"; 240 //! let b = "abcdabcdef"; 241 //! let c = "azerty"; 242 //! assert_eq!(multi(a), Ok(("ef", vec!["abcd"]))); 243 //! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"]))); 244 //! assert_eq!(multi(c), Ok(("azerty", Vec::new()))); 245 //! # } 246 //! # #[cfg(not(feature = "alloc"))] 247 //! # fn main() {} 248 //! ``` 249 //! 250 //! Here are some basic combinators available: 251 //! 252 //! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`) 253 //! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`) 254 //! - **`many1`**: Will apply the parser 1 or more times 255 //! 256 //! There are more complex (and more useful) parsers like `tuple`, which is 257 //! used to apply a series of parsers then assemble their results. 258 //! 259 //! Example with `tuple`: 260 //! 261 //! ```rust 262 //! # fn main() { 263 //! use nom::{error::ErrorKind, Needed, 264 //! number::streaming::be_u16, 265 //! bytes::streaming::{tag, take}, 266 //! sequence::tuple}; 267 //! 268 //! let mut tpl = tuple((be_u16, take(3u8), tag("fg"))); 269 //! 270 //! assert_eq!( 271 //! tpl(&b"abcdefgh"[..]), 272 //! Ok(( 273 //! &b"h"[..], 274 //! (0x6162u16, &b"cde"[..], &b"fg"[..]) 275 //! )) 276 //! ); 277 //! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(2)))); 278 //! let input = &b"abcdejk"[..]; 279 //! assert_eq!(tpl(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag)))); 280 //! # } 281 //! ``` 282 //! 283 //! But you can also use a sequence of combinators written in imperative style, 284 //! thanks to the `?` operator: 285 //! 286 //! ```rust 287 //! # fn main() { 288 //! use nom::{IResult, bytes::complete::tag}; 289 //! 290 //! #[derive(Debug, PartialEq)] 291 //! struct A { 292 //! a: u8, 293 //! b: u8 294 //! } 295 //! 296 //! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) } 297 //! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) } 298 //! 299 //! fn f(i: &[u8]) -> IResult<&[u8], A> { 300 //! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure 301 //! let (i, _) = tag("abcd")(i)?; 302 //! let (i, a) = ret_int1(i)?; 303 //! let (i, _) = tag("efgh")(i)?; 304 //! let (i, b) = ret_int2(i)?; 305 //! 306 //! Ok((i, A { a, b })) 307 //! } 308 //! 309 //! let r = f(b"abcdefghX"); 310 //! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2}))); 311 //! # } 312 //! ``` 313 //! 314 //! ## Streaming / Complete 315 //! 316 //! Some of nom's modules have `streaming` or `complete` submodules. They hold 317 //! different variants of the same combinators. 318 //! 319 //! A streaming parser assumes that we might not have all of the input data. 320 //! This can happen with some network protocol or large file parsers, where the 321 //! input buffer can be full and need to be resized or refilled. 322 //! 323 //! A complete parser assumes that we already have all of the input data. 324 //! This will be the common case with small files that can be read entirely to 325 //! memory. 326 //! 327 //! Here is how it works in practice: 328 //! 329 //! ```rust 330 //! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character}; 331 //! 332 //! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> { 333 //! bytes::streaming::take(4u8)(i) 334 //! } 335 //! 336 //! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> { 337 //! bytes::complete::take(4u8)(i) 338 //! } 339 //! 340 //! // both parsers will take 4 bytes as expected 341 //! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); 342 //! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); 343 //! 344 //! // if the input is smaller than 4 bytes, the streaming parser 345 //! // will return `Incomplete` to indicate that we need more data 346 //! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1)))); 347 //! 348 //! // but the complete parser will return an error 349 //! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof)))); 350 //! 351 //! // the alpha0 function recognizes 0 or more alphabetic characters 352 //! fn alpha0_streaming(i: &str) -> IResult<&str, &str> { 353 //! character::streaming::alpha0(i) 354 //! } 355 //! 356 //! fn alpha0_complete(i: &str) -> IResult<&str, &str> { 357 //! character::complete::alpha0(i) 358 //! } 359 //! 360 //! // if there's a clear limit to the recognized characters, both parsers work the same way 361 //! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd"))); 362 //! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd"))); 363 //! 364 //! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot 365 //! // know if more input data should be recognized. The whole input could be "abcd;", or 366 //! // "abcde;" 367 //! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(1)))); 368 //! 369 //! // while the complete version knows that all of the data is there 370 //! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd"))); 371 //! ``` 372 //! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc), 373 //! check out the [recipes]! 374 #![cfg_attr(not(feature = "std"), no_std)] 375 #![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))] 376 #![cfg_attr(feature = "docsrs", feature(doc_cfg))] 377 #![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))] 378 #![deny(missing_docs)] 379 #[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] 380 #[cfg(feature = "alloc")] 381 #[macro_use] 382 extern crate alloc; 383 #[cfg(doctest)] 384 extern crate doc_comment; 385 386 #[cfg(doctest)] 387 doc_comment::doctest!("../README.md"); 388 389 /// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does 390 /// it, albeit there it is not public. 391 #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] 392 pub mod lib { 393 /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally, 394 /// as well as `core` or `std` 395 #[cfg(not(feature = "std"))] 396 #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] 397 /// internal std exports for no_std compatibility 398 pub mod std { 399 #[doc(hidden)] 400 #[cfg(not(feature = "alloc"))] 401 pub use core::borrow; 402 403 #[cfg(feature = "alloc")] 404 #[doc(hidden)] 405 pub use alloc::{borrow, boxed, string, vec}; 406 407 #[doc(hidden)] 408 pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str}; 409 410 /// internal reproduction of std prelude 411 #[doc(hidden)] 412 pub mod prelude { 413 pub use core::prelude as v1; 414 } 415 } 416 417 #[cfg(feature = "std")] 418 #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] 419 /// internal std exports for no_std compatibility 420 pub mod std { 421 #[doc(hidden)] 422 pub use std::{ 423 alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result, 424 slice, str, string, vec, 425 }; 426 427 /// internal reproduction of std prelude 428 #[doc(hidden)] 429 pub mod prelude { 430 pub use std::prelude as v1; 431 } 432 } 433 } 434 435 pub use self::bits::*; 436 pub use self::internal::*; 437 pub use self::traits::*; 438 439 pub use self::str::*; 440 441 #[macro_use] 442 mod macros; 443 #[macro_use] 444 pub mod error; 445 446 pub mod branch; 447 pub mod combinator; 448 mod internal; 449 pub mod multi; 450 pub mod sequence; 451 mod traits; 452 453 pub mod bits; 454 pub mod bytes; 455 456 pub mod character; 457 458 mod str; 459 460 pub mod number; 461 462 #[cfg(feature = "docsrs")] 463 #[cfg_attr(feature = "docsrs", cfg_attr(feature = "docsrs", doc = include_str!("../doc/nom_recipes.md")))] 464 pub mod recipes {} 465