• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! # Chapter 2: Tokens and Tags
2 //!
3 //! The simplest *useful* parser you can write is one which matches tokens.
4 //!
5 //! ## Tokens
6 //!
7 //! [`Stream`] provides some core operations to help with parsing. For example, to process a
8 //! single token, you can do:
9 //! ```rust
10 //! # use winnow::Parser;
11 //! # use winnow::PResult;
12 //! use winnow::stream::Stream;
13 //! use winnow::error::ParserError;
14 //! use winnow::error::ErrorKind;
15 //! use winnow::error::ErrMode;
16 //!
17 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
18 //!     let c = input.next_token().ok_or_else(|| {
19 //!         ErrMode::from_error_kind(input, ErrorKind::Token)
20 //!     })?;
21 //!     if c != '0' {
22 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
23 //!     }
24 //!     Ok(c)
25 //! }
26 //!
27 //! fn main()  {
28 //!     let mut input = "0x1a2b Hello";
29 //!
30 //!     let output = parse_prefix.parse_next(&mut input).unwrap();
31 //!
32 //!     assert_eq!(input, "x1a2b Hello");
33 //!     assert_eq!(output, '0');
34 //!
35 //!     assert!(parse_prefix.parse_next(&mut "d").is_err());
36 //! }
37 //! ```
38 //!
39 //! This extraction of a token is encapsulated in the [`any`] parser:
40 //! ```rust
41 //! # use winnow::PResult;
42 //! # use winnow::error::ParserError;
43 //! # use winnow::error::ErrorKind;
44 //! # use winnow::error::ErrMode;
45 //! use winnow::Parser;
46 //! use winnow::token::any;
47 //!
48 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
49 //!     let c = any
50 //!         .parse_next(input)?;
51 //!     if c != '0' {
52 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
53 //!     }
54 //!     Ok(c)
55 //! }
56 //! #
57 //! # fn main()  {
58 //! #     let mut input = "0x1a2b Hello";
59 //! #
60 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
61 //! #
62 //! #     assert_eq!(input, "x1a2b Hello");
63 //! #     assert_eq!(output, '0');
64 //! #
65 //! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
66 //! # }
67 //! ```
68 //!
69 //! Using the higher level [`any`] parser opens `parse_prefix` to the helpers on the [`Parser`] trait,
70 //! like [`Parser::verify`] which fails a parse if a condition isn't met, like our check above:
71 //! ```rust
72 //! # use winnow::PResult;
73 //! use winnow::Parser;
74 //! use winnow::token::any;
75 //!
76 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
77 //!     let c = any
78 //!         .verify(|c| *c == '0')
79 //!         .parse_next(input)?;
80 //!     Ok(c)
81 //! }
82 //! #
83 //! # fn main()  {
84 //! #     let mut input = "0x1a2b Hello";
85 //! #
86 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
87 //! #
88 //! #     assert_eq!(input, "x1a2b Hello");
89 //! #     assert_eq!(output, '0');
90 //! #
91 //! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
92 //! # }
93 //! ```
94 //!
95 //! Matching a single token literal is common enough that [`Parser`] is implemented for
96 //! the `char` type, encapsulating both [`any`] and [`Parser::verify`]:
97 //! ```rust
98 //! # use winnow::PResult;
99 //! use winnow::Parser;
100 //!
101 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
102 //!     let c = '0'.parse_next(input)?;
103 //!     Ok(c)
104 //! }
105 //! #
106 //! # fn main()  {
107 //! #     let mut input = "0x1a2b Hello";
108 //! #
109 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
110 //! #
111 //! #     assert_eq!(input, "x1a2b Hello");
112 //! #     assert_eq!(output, '0');
113 //! #
114 //! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
115 //! # }
116 //! ```
117 //!
118 //! ## Tags
119 //!
120 //! [`Stream`] also supports processing slices of tokens:
121 //! ```rust
122 //! # use winnow::Parser;
123 //! # use winnow::PResult;
124 //! use winnow::stream::Stream;
125 //! use winnow::error::ParserError;
126 //! use winnow::error::ErrorKind;
127 //! use winnow::error::ErrMode;
128 //!
129 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> {
130 //!     let expected = "0x";
131 //!     if input.len() < expected.len() {
132 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
133 //!     }
134 //!     let actual = input.next_slice(expected.len());
135 //!     if actual != expected {
136 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
137 //!     }
138 //!     Ok(actual)
139 //! }
140 //!
141 //! fn main()  {
142 //!     let mut input = "0x1a2b Hello";
143 //!
144 //!     let output = parse_prefix.parse_next(&mut input).unwrap();
145 //!     assert_eq!(input, "1a2b Hello");
146 //!     assert_eq!(output, "0x");
147 //!
148 //!     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
149 //! }
150 //! ```
151 //!
152 //! Matching the input position against a string literal is encapsulated in the [`literal`] parser:
153 //! ```rust
154 //! # use winnow::PResult;
155 //! # use winnow::Parser;
156 //! use winnow::token::literal;
157 //!
158 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> {
159 //!     let expected = "0x";
160 //!     let actual = literal(expected).parse_next(input)?;
161 //!     Ok(actual)
162 //! }
163 //! #
164 //! # fn main()  {
165 //! #     let mut input = "0x1a2b Hello";
166 //! #
167 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
168 //! #     assert_eq!(input, "1a2b Hello");
169 //! #     assert_eq!(output, "0x");
170 //! #
171 //! #     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
172 //! # }
173 //! ```
174 //!
175 //! Like for a single token, matching a string literal is common enough that [`Parser`] is implemented for the `&str` type:
176 //! ```rust
177 //! # use winnow::PResult;
178 //! use winnow::Parser;
179 //!
180 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> {
181 //!     let actual = "0x".parse_next(input)?;
182 //!     Ok(actual)
183 //! }
184 //! #
185 //! # fn main()  {
186 //! #     let mut input = "0x1a2b Hello";
187 //! #
188 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
189 //! #     assert_eq!(input, "1a2b Hello");
190 //! #     assert_eq!(output, "0x");
191 //! #
192 //! #     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
193 //! # }
194 //! ```
195 //!
196 //! See [`token`] for additional individual and token-slice parsers.
197 //!
198 //! ## Character Classes
199 //!
200 //! Selecting a single `char` or a [`literal`] is fairly limited. Sometimes, you will want to select one of several
201 //! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser:
202 //!
203 //! ```rust
204 //! # use winnow::Parser;
205 //! # use winnow::PResult;
206 //! use winnow::token::one_of;
207 //!
208 //! fn parse_digits(input: &mut &str) -> PResult<char> {
209 //!     one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
210 //! }
211 //!
212 //! fn main() {
213 //!     let mut input = "1a2b Hello";
214 //!
215 //!     let output = parse_digits.parse_next(&mut input).unwrap();
216 //!     assert_eq!(input, "a2b Hello");
217 //!     assert_eq!(output, '1');
218 //!
219 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
220 //! }
221 //! ```
222 //!
223 //! > **Aside:** [`one_of`] might look straightforward, a function returning a value that implements `Parser`.
224 //! > Let's look at it more closely as its used above (resolving all generic parameters):
225 //! > ```rust
226 //! > # use winnow::prelude::*;
227 //! > # use winnow::error::InputError;
228 //! > pub fn one_of<'i>(
229 //! >     list: &'static [char]
230 //! > ) -> impl Parser<&'i str, char, InputError<&'i str>> {
231 //! >     // ...
232 //! > #    winnow::token::one_of(list)
233 //! > }
234 //! > ```
235 //! > If you have not programmed in a language where functions are values, the type signature of the
236 //! > [`one_of`] function might be a surprise.
237 //! > The function [`one_of`] *returns a function*. The function it returns is a
238 //! > `Parser`, taking a `&str` and returning an `PResult`. This is a common pattern in winnow for
239 //! > configurable or stateful parsers.
240 //!
241 //! Some of character classes are common enough that a named parser is provided, like with:
242 //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`)
243 //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n`
244 //! - [`tab`][crate::ascii::tab]: Matches a tab character `\t`
245 //!
246 //! You can then capture sequences of these characters with parsers like [`take_while`].
247 //! ```rust
248 //! # use winnow::Parser;
249 //! # use winnow::PResult;
250 //! use winnow::token::take_while;
251 //!
252 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> {
253 //!     take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
254 //! }
255 //!
256 //! fn main() {
257 //!     let mut input = "1a2b Hello";
258 //!
259 //!     let output = parse_digits.parse_next(&mut input).unwrap();
260 //!     assert_eq!(input, " Hello");
261 //!     assert_eq!(output, "1a2b");
262 //!
263 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
264 //! }
265 //! ```
266 //!
267 //! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]:
268 //! ```rust
269 //! # use winnow::Parser;
270 //! # use winnow::PResult;
271 //! use winnow::ascii::hex_digit1;
272 //!
273 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> {
274 //!     hex_digit1.parse_next(input)
275 //! }
276 //!
277 //! fn main() {
278 //!     let mut input = "1a2b Hello";
279 //!
280 //!     let output = parse_digits.parse_next(&mut input).unwrap();
281 //!     assert_eq!(input, " Hello");
282 //!     assert_eq!(output, "1a2b");
283 //!
284 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
285 //! }
286 //! ```
287 //!
288 //! See [`ascii`] for more text-based parsers.
289 
290 #![allow(unused_imports)]
291 use crate::ascii;
292 use crate::ascii::hex_digit1;
293 use crate::stream::ContainsToken;
294 use crate::stream::Stream;
295 use crate::token;
296 use crate::token::any;
297 use crate::token::literal;
298 use crate::token::one_of;
299 use crate::token::take_while;
300 use crate::Parser;
301 use std::ops::RangeInclusive;
302 
303 pub use super::chapter_1 as previous;
304 pub use super::chapter_3 as next;
305 pub use crate::_tutorial as table_of_contents;
306