1 //! # Chapter 2: Tokens and Tags 2 //! 3 //! The simplest *useful* parser you can write is one which matches tokens. 4 //! 5 //! ## Tokens 6 //! 7 //! [`Stream`] provides some core operations to help with parsing. For example, to process a 8 //! single token, you can do: 9 //! ```rust 10 //! # use winnow::Parser; 11 //! # use winnow::PResult; 12 //! use winnow::stream::Stream; 13 //! use winnow::error::ParserError; 14 //! use winnow::error::ErrorKind; 15 //! use winnow::error::ErrMode; 16 //! 17 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 18 //! let c = input.next_token().ok_or_else(|| { 19 //! ErrMode::from_error_kind(input, ErrorKind::Token) 20 //! })?; 21 //! if c != '0' { 22 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); 23 //! } 24 //! Ok(c) 25 //! } 26 //! 27 //! fn main() { 28 //! let mut input = "0x1a2b Hello"; 29 //! 30 //! let output = parse_prefix.parse_next(&mut input).unwrap(); 31 //! 32 //! assert_eq!(input, "x1a2b Hello"); 33 //! assert_eq!(output, '0'); 34 //! 35 //! assert!(parse_prefix.parse_next(&mut "d").is_err()); 36 //! } 37 //! ``` 38 //! 39 //! This extraction of a token is encapsulated in the [`any`] parser: 40 //! ```rust 41 //! # use winnow::PResult; 42 //! # use winnow::error::ParserError; 43 //! # use winnow::error::ErrorKind; 44 //! # use winnow::error::ErrMode; 45 //! use winnow::Parser; 46 //! use winnow::token::any; 47 //! 48 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 49 //! let c = any 50 //! .parse_next(input)?; 51 //! if c != '0' { 52 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); 53 //! } 54 //! Ok(c) 55 //! } 56 //! # 57 //! # fn main() { 58 //! # let mut input = "0x1a2b Hello"; 59 //! # 60 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 61 //! # 62 //! # assert_eq!(input, "x1a2b Hello"); 63 //! # assert_eq!(output, '0'); 64 //! # 65 //! # assert!(parse_prefix.parse_next(&mut "d").is_err()); 66 //! # } 67 //! ``` 68 //! 69 //! Using the higher level [`any`] parser opens `parse_prefix` to the helpers on the [`Parser`] trait, 70 //! like [`Parser::verify`] which fails a parse if a condition isn't met, like our check above: 71 //! ```rust 72 //! # use winnow::PResult; 73 //! use winnow::Parser; 74 //! use winnow::token::any; 75 //! 76 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 77 //! let c = any 78 //! .verify(|c| *c == '0') 79 //! .parse_next(input)?; 80 //! Ok(c) 81 //! } 82 //! # 83 //! # fn main() { 84 //! # let mut input = "0x1a2b Hello"; 85 //! # 86 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 87 //! # 88 //! # assert_eq!(input, "x1a2b Hello"); 89 //! # assert_eq!(output, '0'); 90 //! # 91 //! # assert!(parse_prefix.parse_next(&mut "d").is_err()); 92 //! # } 93 //! ``` 94 //! 95 //! Matching a single token literal is common enough that [`Parser`] is implemented for 96 //! the `char` type, encapsulating both [`any`] and [`Parser::verify`]: 97 //! ```rust 98 //! # use winnow::PResult; 99 //! use winnow::Parser; 100 //! 101 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 102 //! let c = '0'.parse_next(input)?; 103 //! Ok(c) 104 //! } 105 //! # 106 //! # fn main() { 107 //! # let mut input = "0x1a2b Hello"; 108 //! # 109 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 110 //! # 111 //! # assert_eq!(input, "x1a2b Hello"); 112 //! # assert_eq!(output, '0'); 113 //! # 114 //! # assert!(parse_prefix.parse_next(&mut "d").is_err()); 115 //! # } 116 //! ``` 117 //! 118 //! ## Tags 119 //! 120 //! [`Stream`] also supports processing slices of tokens: 121 //! ```rust 122 //! # use winnow::Parser; 123 //! # use winnow::PResult; 124 //! use winnow::stream::Stream; 125 //! use winnow::error::ParserError; 126 //! use winnow::error::ErrorKind; 127 //! use winnow::error::ErrMode; 128 //! 129 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { 130 //! let expected = "0x"; 131 //! if input.len() < expected.len() { 132 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); 133 //! } 134 //! let actual = input.next_slice(expected.len()); 135 //! if actual != expected { 136 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); 137 //! } 138 //! Ok(actual) 139 //! } 140 //! 141 //! fn main() { 142 //! let mut input = "0x1a2b Hello"; 143 //! 144 //! let output = parse_prefix.parse_next(&mut input).unwrap(); 145 //! assert_eq!(input, "1a2b Hello"); 146 //! assert_eq!(output, "0x"); 147 //! 148 //! assert!(parse_prefix.parse_next(&mut "0o123").is_err()); 149 //! } 150 //! ``` 151 //! 152 //! Matching the input position against a string literal is encapsulated in the [`literal`] parser: 153 //! ```rust 154 //! # use winnow::PResult; 155 //! # use winnow::Parser; 156 //! use winnow::token::literal; 157 //! 158 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { 159 //! let expected = "0x"; 160 //! let actual = literal(expected).parse_next(input)?; 161 //! Ok(actual) 162 //! } 163 //! # 164 //! # fn main() { 165 //! # let mut input = "0x1a2b Hello"; 166 //! # 167 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 168 //! # assert_eq!(input, "1a2b Hello"); 169 //! # assert_eq!(output, "0x"); 170 //! # 171 //! # assert!(parse_prefix.parse_next(&mut "0o123").is_err()); 172 //! # } 173 //! ``` 174 //! 175 //! Like for a single token, matching a string literal is common enough that [`Parser`] is implemented for the `&str` type: 176 //! ```rust 177 //! # use winnow::PResult; 178 //! use winnow::Parser; 179 //! 180 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { 181 //! let actual = "0x".parse_next(input)?; 182 //! Ok(actual) 183 //! } 184 //! # 185 //! # fn main() { 186 //! # let mut input = "0x1a2b Hello"; 187 //! # 188 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 189 //! # assert_eq!(input, "1a2b Hello"); 190 //! # assert_eq!(output, "0x"); 191 //! # 192 //! # assert!(parse_prefix.parse_next(&mut "0o123").is_err()); 193 //! # } 194 //! ``` 195 //! 196 //! See [`token`] for additional individual and token-slice parsers. 197 //! 198 //! ## Character Classes 199 //! 200 //! Selecting a single `char` or a [`literal`] is fairly limited. Sometimes, you will want to select one of several 201 //! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser: 202 //! 203 //! ```rust 204 //! # use winnow::Parser; 205 //! # use winnow::PResult; 206 //! use winnow::token::one_of; 207 //! 208 //! fn parse_digits(input: &mut &str) -> PResult<char> { 209 //! one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) 210 //! } 211 //! 212 //! fn main() { 213 //! let mut input = "1a2b Hello"; 214 //! 215 //! let output = parse_digits.parse_next(&mut input).unwrap(); 216 //! assert_eq!(input, "a2b Hello"); 217 //! assert_eq!(output, '1'); 218 //! 219 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 220 //! } 221 //! ``` 222 //! 223 //! > **Aside:** [`one_of`] might look straightforward, a function returning a value that implements `Parser`. 224 //! > Let's look at it more closely as its used above (resolving all generic parameters): 225 //! > ```rust 226 //! > # use winnow::prelude::*; 227 //! > # use winnow::error::InputError; 228 //! > pub fn one_of<'i>( 229 //! > list: &'static [char] 230 //! > ) -> impl Parser<&'i str, char, InputError<&'i str>> { 231 //! > // ... 232 //! > # winnow::token::one_of(list) 233 //! > } 234 //! > ``` 235 //! > If you have not programmed in a language where functions are values, the type signature of the 236 //! > [`one_of`] function might be a surprise. 237 //! > The function [`one_of`] *returns a function*. The function it returns is a 238 //! > `Parser`, taking a `&str` and returning an `PResult`. This is a common pattern in winnow for 239 //! > configurable or stateful parsers. 240 //! 241 //! Some of character classes are common enough that a named parser is provided, like with: 242 //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) 243 //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` 244 //! - [`tab`][crate::ascii::tab]: Matches a tab character `\t` 245 //! 246 //! You can then capture sequences of these characters with parsers like [`take_while`]. 247 //! ```rust 248 //! # use winnow::Parser; 249 //! # use winnow::PResult; 250 //! use winnow::token::take_while; 251 //! 252 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { 253 //! take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) 254 //! } 255 //! 256 //! fn main() { 257 //! let mut input = "1a2b Hello"; 258 //! 259 //! let output = parse_digits.parse_next(&mut input).unwrap(); 260 //! assert_eq!(input, " Hello"); 261 //! assert_eq!(output, "1a2b"); 262 //! 263 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 264 //! } 265 //! ``` 266 //! 267 //! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]: 268 //! ```rust 269 //! # use winnow::Parser; 270 //! # use winnow::PResult; 271 //! use winnow::ascii::hex_digit1; 272 //! 273 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { 274 //! hex_digit1.parse_next(input) 275 //! } 276 //! 277 //! fn main() { 278 //! let mut input = "1a2b Hello"; 279 //! 280 //! let output = parse_digits.parse_next(&mut input).unwrap(); 281 //! assert_eq!(input, " Hello"); 282 //! assert_eq!(output, "1a2b"); 283 //! 284 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 285 //! } 286 //! ``` 287 //! 288 //! See [`ascii`] for more text-based parsers. 289 290 #![allow(unused_imports)] 291 use crate::ascii; 292 use crate::ascii::hex_digit1; 293 use crate::stream::ContainsToken; 294 use crate::stream::Stream; 295 use crate::token; 296 use crate::token::any; 297 use crate::token::literal; 298 use crate::token::one_of; 299 use crate::token::take_while; 300 use crate::Parser; 301 use std::ops::RangeInclusive; 302 303 pub use super::chapter_1 as previous; 304 pub use super::chapter_3 as next; 305 pub use crate::_tutorial as table_of_contents; 306