1 //! # Chapter 2: Tokens and Tags 2 //! 3 //! The simplest *useful* parser you can write is one which matches tokens. 4 //! 5 //! ## Tokens 6 //! 7 //! [`Stream`] provides some core operations to help with parsing. For example, to process a 8 //! single token, you can do: 9 //! ```rust 10 //! # use winnow::Parser; 11 //! # use winnow::PResult; 12 //! use winnow::stream::Stream; 13 //! use winnow::error::ParserError; 14 //! use winnow::error::ErrorKind; 15 //! use winnow::error::ErrMode; 16 //! 17 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 18 //! let c = input.next_token().ok_or_else(|| { 19 //! ErrMode::from_error_kind(input, ErrorKind::Token) 20 //! })?; 21 //! if c != '0' { 22 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); 23 //! } 24 //! Ok(c) 25 //! } 26 //! 27 //! fn main() { 28 //! let mut input = "0x1a2b Hello"; 29 //! 30 //! let output = parse_prefix.parse_next(&mut input).unwrap(); 31 //! 32 //! assert_eq!(input, "x1a2b Hello"); 33 //! assert_eq!(output, '0'); 34 //! 35 //! assert!(parse_prefix.parse_next(&mut "d").is_err()); 36 //! } 37 //! ``` 38 //! 39 //! [`any`] and [`Parser::verify`] are [`Parser`] building blocks on top of [`Stream`]: 40 //! ```rust 41 //! # use winnow::PResult; 42 //! use winnow::Parser; 43 //! use winnow::token::any; 44 //! 45 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 46 //! any.verify(|c| *c == '0').parse_next(input) 47 //! } 48 //! # 49 //! # fn main() { 50 //! # let mut input = "0x1a2b Hello"; 51 //! # 52 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 53 //! # 54 //! # assert_eq!(input, "x1a2b Hello"); 55 //! # assert_eq!(output, '0'); 56 //! # 57 //! # assert!(parse_prefix.parse_next(&mut "d").is_err()); 58 //! # } 59 //! ``` 60 //! 61 //! Matching a single token literal is common enough that [`Parser`] is implemented for 62 //! `char`. 63 //! 64 //! ```rust 65 //! # use winnow::PResult; 66 //! use winnow::Parser; 67 //! 68 //! fn parse_prefix(input: &mut &str) -> PResult<char> { 69 //! '0'.parse_next(input) 70 //! } 71 //! # 72 //! # fn main() { 73 //! # let mut input = "0x1a2b Hello"; 74 //! # 75 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 76 //! # 77 //! # assert_eq!(input, "x1a2b Hello"); 78 //! # assert_eq!(output, '0'); 79 //! # 80 //! # assert!(parse_prefix.parse_next(&mut "d").is_err()); 81 //! # } 82 //! ``` 83 //! 84 //! ## Tags 85 //! 86 //! [`Stream`] also supports processing slices of tokens: 87 //! ```rust 88 //! # use winnow::Parser; 89 //! # use winnow::PResult; 90 //! use winnow::stream::Stream; 91 //! use winnow::error::ParserError; 92 //! use winnow::error::ErrorKind; 93 //! use winnow::error::ErrMode; 94 //! 95 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { 96 //! let expected = "0x"; 97 //! if input.len() < expected.len() { 98 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Slice)); 99 //! } 100 //! let actual = input.next_slice(expected.len()); 101 //! if actual != expected { 102 //! return Err(ErrMode::from_error_kind(input, ErrorKind::Verify)); 103 //! } 104 //! Ok(actual) 105 //! } 106 //! 107 //! fn main() { 108 //! let mut input = "0x1a2b Hello"; 109 //! 110 //! let output = parse_prefix.parse_next(&mut input).unwrap(); 111 //! assert_eq!(input, "1a2b Hello"); 112 //! assert_eq!(output, "0x"); 113 //! 114 //! assert!(parse_prefix.parse_next(&mut "0o123").is_err()); 115 //! } 116 //! ``` 117 //! 118 //! Again, matching a literal is common enough that [`Parser`] is implemented for `&str`: 119 //! ```rust 120 //! # use winnow::PResult; 121 //! use winnow::Parser; 122 //! 123 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> { 124 //! "0x".parse_next(input) 125 //! } 126 //! # 127 //! # fn main() { 128 //! # let mut input = "0x1a2b Hello"; 129 //! # 130 //! # let output = parse_prefix.parse_next(&mut input).unwrap(); 131 //! # assert_eq!(input, "1a2b Hello"); 132 //! # assert_eq!(output, "0x"); 133 //! # 134 //! # assert!(parse_prefix.parse_next(&mut "0o123").is_err()); 135 //! # } 136 //! ``` 137 //! 138 //! In `winnow`, we call this type of parser a [`tag`]. See [`token`] for additional individual 139 //! and token-slice parsers. 140 //! 141 //! ## Character Classes 142 //! 143 //! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several 144 //! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser: 145 //! 146 //! ```rust 147 //! # use winnow::Parser; 148 //! # use winnow::PResult; 149 //! use winnow::token::one_of; 150 //! 151 //! fn parse_digits(input: &mut &str) -> PResult<char> { 152 //! one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) 153 //! } 154 //! 155 //! fn main() { 156 //! let mut input = "1a2b Hello"; 157 //! 158 //! let output = parse_digits.parse_next(&mut input).unwrap(); 159 //! assert_eq!(input, "a2b Hello"); 160 //! assert_eq!(output, '1'); 161 //! 162 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 163 //! } 164 //! ``` 165 //! 166 //! > **Aside:** [`one_of`] might look straightforward, a function returning a value that implements `Parser`. 167 //! > Let's look at it more closely as its used above (resolving all generic parameters): 168 //! > ```rust 169 //! > # use winnow::prelude::*; 170 //! > # use winnow::error::InputError; 171 //! > pub fn one_of<'i>( 172 //! > list: &'static [char] 173 //! > ) -> impl Parser<&'i str, char, InputError<&'i str>> { 174 //! > // ... 175 //! > # winnow::token::one_of(list) 176 //! > } 177 //! > ``` 178 //! > If you have not programmed in a language where functions are values, the type signature of the 179 //! > [`one_of`] function might be a surprise. 180 //! > The function [`one_of`] *returns a function*. The function it returns is a 181 //! > `Parser`, taking a `&str` and returning an `PResult`. This is a common pattern in winnow for 182 //! > configurable or stateful parsers. 183 //! 184 //! Some of character classes are common enough that a named parser is provided, like with: 185 //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) 186 //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n` 187 //! - [`tab`][crate::ascii::tab]: Matches a tab character `\t` 188 //! 189 //! You can then capture sequences of these characters with parsers like [`take_while`]. 190 //! ```rust 191 //! # use winnow::Parser; 192 //! # use winnow::PResult; 193 //! use winnow::token::take_while; 194 //! 195 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { 196 //! take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input) 197 //! } 198 //! 199 //! fn main() { 200 //! let mut input = "1a2b Hello"; 201 //! 202 //! let output = parse_digits.parse_next(&mut input).unwrap(); 203 //! assert_eq!(input, " Hello"); 204 //! assert_eq!(output, "1a2b"); 205 //! 206 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 207 //! } 208 //! ``` 209 //! 210 //! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]: 211 //! ```rust 212 //! # use winnow::Parser; 213 //! # use winnow::PResult; 214 //! use winnow::ascii::hex_digit1; 215 //! 216 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> { 217 //! hex_digit1.parse_next(input) 218 //! } 219 //! 220 //! fn main() { 221 //! let mut input = "1a2b Hello"; 222 //! 223 //! let output = parse_digits.parse_next(&mut input).unwrap(); 224 //! assert_eq!(input, " Hello"); 225 //! assert_eq!(output, "1a2b"); 226 //! 227 //! assert!(parse_digits.parse_next(&mut "Z").is_err()); 228 //! } 229 //! ``` 230 //! 231 //! See [`ascii`] for more text-based parsers. 232 233 #![allow(unused_imports)] 234 use crate::ascii; 235 use crate::ascii::hex_digit1; 236 use crate::stream::ContainsToken; 237 use crate::stream::Stream; 238 use crate::token; 239 use crate::token::any; 240 use crate::token::one_of; 241 use crate::token::tag; 242 use crate::token::take_while; 243 use crate::Parser; 244 use std::ops::RangeInclusive; 245 246 pub use super::chapter_1 as previous; 247 pub use super::chapter_3 as next; 248 pub use crate::_tutorial as table_of_contents; 249