• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! # Chapter 2: Tokens and Tags
2 //!
3 //! The simplest *useful* parser you can write is one which matches tokens.
4 //!
5 //! ## Tokens
6 //!
7 //! [`Stream`] provides some core operations to help with parsing. For example, to process a
8 //! single token, you can do:
9 //! ```rust
10 //! # use winnow::Parser;
11 //! # use winnow::PResult;
12 //! use winnow::stream::Stream;
13 //! use winnow::error::ParserError;
14 //! use winnow::error::ErrorKind;
15 //! use winnow::error::ErrMode;
16 //!
17 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
18 //!     let c = input.next_token().ok_or_else(|| {
19 //!         ErrMode::from_error_kind(input, ErrorKind::Token)
20 //!     })?;
21 //!     if c != '0' {
22 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
23 //!     }
24 //!     Ok(c)
25 //! }
26 //!
27 //! fn main()  {
28 //!     let mut input = "0x1a2b Hello";
29 //!
30 //!     let output = parse_prefix.parse_next(&mut input).unwrap();
31 //!
32 //!     assert_eq!(input, "x1a2b Hello");
33 //!     assert_eq!(output, '0');
34 //!
35 //!     assert!(parse_prefix.parse_next(&mut "d").is_err());
36 //! }
37 //! ```
38 //!
39 //! [`any`] and [`Parser::verify`] are [`Parser`] building blocks on top of [`Stream`]:
40 //! ```rust
41 //! # use winnow::PResult;
42 //! use winnow::Parser;
43 //! use winnow::token::any;
44 //!
45 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
46 //!     any.verify(|c| *c == '0').parse_next(input)
47 //! }
48 //! #
49 //! # fn main()  {
50 //! #     let mut input = "0x1a2b Hello";
51 //! #
52 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
53 //! #
54 //! #     assert_eq!(input, "x1a2b Hello");
55 //! #     assert_eq!(output, '0');
56 //! #
57 //! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
58 //! # }
59 //! ```
60 //!
61 //! Matching a single token literal is common enough that [`Parser`] is implemented for
62 //! `char`.
63 //!
64 //! ```rust
65 //! # use winnow::PResult;
66 //! use winnow::Parser;
67 //!
68 //! fn parse_prefix(input: &mut &str) -> PResult<char> {
69 //!     '0'.parse_next(input)
70 //! }
71 //! #
72 //! # fn main()  {
73 //! #     let mut input = "0x1a2b Hello";
74 //! #
75 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
76 //! #
77 //! #     assert_eq!(input, "x1a2b Hello");
78 //! #     assert_eq!(output, '0');
79 //! #
80 //! #     assert!(parse_prefix.parse_next(&mut "d").is_err());
81 //! # }
82 //! ```
83 //!
84 //! ## Tags
85 //!
86 //! [`Stream`] also supports processing slices of tokens:
87 //! ```rust
88 //! # use winnow::Parser;
89 //! # use winnow::PResult;
90 //! use winnow::stream::Stream;
91 //! use winnow::error::ParserError;
92 //! use winnow::error::ErrorKind;
93 //! use winnow::error::ErrMode;
94 //!
95 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> {
96 //!     let expected = "0x";
97 //!     if input.len() < expected.len() {
98 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Slice));
99 //!     }
100 //!     let actual = input.next_slice(expected.len());
101 //!     if actual != expected {
102 //!         return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
103 //!     }
104 //!     Ok(actual)
105 //! }
106 //!
107 //! fn main()  {
108 //!     let mut input = "0x1a2b Hello";
109 //!
110 //!     let output = parse_prefix.parse_next(&mut input).unwrap();
111 //!     assert_eq!(input, "1a2b Hello");
112 //!     assert_eq!(output, "0x");
113 //!
114 //!     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
115 //! }
116 //! ```
117 //!
118 //! Again, matching a literal is common enough that [`Parser`] is implemented for `&str`:
119 //! ```rust
120 //! # use winnow::PResult;
121 //! use winnow::Parser;
122 //!
123 //! fn parse_prefix<'s>(input: &mut &'s str) -> PResult<&'s str> {
124 //!     "0x".parse_next(input)
125 //! }
126 //! #
127 //! # fn main()  {
128 //! #     let mut input = "0x1a2b Hello";
129 //! #
130 //! #     let output = parse_prefix.parse_next(&mut input).unwrap();
131 //! #     assert_eq!(input, "1a2b Hello");
132 //! #     assert_eq!(output, "0x");
133 //! #
134 //! #     assert!(parse_prefix.parse_next(&mut "0o123").is_err());
135 //! # }
136 //! ```
137 //!
138 //! In `winnow`, we call this type of parser a [`tag`]. See [`token`] for additional individual
139 //! and token-slice parsers.
140 //!
141 //! ## Character Classes
142 //!
143 //! Selecting a single `char` or a [`tag`] is fairly limited. Sometimes, you will want to select one of several
144 //! `chars` of a specific class, like digits. For this, we use the [`one_of`] parser:
145 //!
146 //! ```rust
147 //! # use winnow::Parser;
148 //! # use winnow::PResult;
149 //! use winnow::token::one_of;
150 //!
151 //! fn parse_digits(input: &mut &str) -> PResult<char> {
152 //!     one_of(('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
153 //! }
154 //!
155 //! fn main() {
156 //!     let mut input = "1a2b Hello";
157 //!
158 //!     let output = parse_digits.parse_next(&mut input).unwrap();
159 //!     assert_eq!(input, "a2b Hello");
160 //!     assert_eq!(output, '1');
161 //!
162 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
163 //! }
164 //! ```
165 //!
166 //! > **Aside:** [`one_of`] might look straightforward, a function returning a value that implements `Parser`.
167 //! > Let's look at it more closely as its used above (resolving all generic parameters):
168 //! > ```rust
169 //! > # use winnow::prelude::*;
170 //! > # use winnow::error::InputError;
171 //! > pub fn one_of<'i>(
172 //! >     list: &'static [char]
173 //! > ) -> impl Parser<&'i str, char, InputError<&'i str>> {
174 //! >     // ...
175 //! > #    winnow::token::one_of(list)
176 //! > }
177 //! > ```
178 //! > If you have not programmed in a language where functions are values, the type signature of the
179 //! > [`one_of`] function might be a surprise.
180 //! > The function [`one_of`] *returns a function*. The function it returns is a
181 //! > `Parser`, taking a `&str` and returning an `PResult`. This is a common pattern in winnow for
182 //! > configurable or stateful parsers.
183 //!
184 //! Some of character classes are common enough that a named parser is provided, like with:
185 //! - [`line_ending`][crate::ascii::line_ending]: Recognizes an end of line (both `\n` and `\r\n`)
186 //! - [`newline`][crate::ascii::newline]: Matches a newline character `\n`
187 //! - [`tab`][crate::ascii::tab]: Matches a tab character `\t`
188 //!
189 //! You can then capture sequences of these characters with parsers like [`take_while`].
190 //! ```rust
191 //! # use winnow::Parser;
192 //! # use winnow::PResult;
193 //! use winnow::token::take_while;
194 //!
195 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> {
196 //!     take_while(1.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
197 //! }
198 //!
199 //! fn main() {
200 //!     let mut input = "1a2b Hello";
201 //!
202 //!     let output = parse_digits.parse_next(&mut input).unwrap();
203 //!     assert_eq!(input, " Hello");
204 //!     assert_eq!(output, "1a2b");
205 //!
206 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
207 //! }
208 //! ```
209 //!
210 //! We could simplify this further by using one of the built-in character classes, [`hex_digit1`]:
211 //! ```rust
212 //! # use winnow::Parser;
213 //! # use winnow::PResult;
214 //! use winnow::ascii::hex_digit1;
215 //!
216 //! fn parse_digits<'s>(input: &mut &'s str) -> PResult<&'s str> {
217 //!     hex_digit1.parse_next(input)
218 //! }
219 //!
220 //! fn main() {
221 //!     let mut input = "1a2b Hello";
222 //!
223 //!     let output = parse_digits.parse_next(&mut input).unwrap();
224 //!     assert_eq!(input, " Hello");
225 //!     assert_eq!(output, "1a2b");
226 //!
227 //!     assert!(parse_digits.parse_next(&mut "Z").is_err());
228 //! }
229 //! ```
230 //!
231 //! See [`ascii`] for more text-based parsers.
232 
233 #![allow(unused_imports)]
234 use crate::ascii;
235 use crate::ascii::hex_digit1;
236 use crate::stream::ContainsToken;
237 use crate::stream::Stream;
238 use crate::token;
239 use crate::token::any;
240 use crate::token::one_of;
241 use crate::token::tag;
242 use crate::token::take_while;
243 use crate::Parser;
244 use std::ops::RangeInclusive;
245 
246 pub use super::chapter_1 as previous;
247 pub use super::chapter_3 as next;
248 pub use crate::_tutorial as table_of_contents;
249