1 //! # Elements of Programming Languages 2 //! 3 //! These are short recipes for accomplishing common tasks. 4 //! 5 //! * [Whitespace](#whitespace) 6 //! + [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser) 7 //! * [Comments](#comments) 8 //! + [`// C++/EOL-style comments`](#-ceol-style-comments) 9 //! + [`/* C-style comments */`](#-c-style-comments-) 10 //! * [Identifiers](#identifiers) 11 //! + [`Rust-Style Identifiers`](#rust-style-identifiers) 12 //! * [Literal Values](#literal-values) 13 //! + [Escaped Strings](#escaped-strings) 14 //! + [Integers](#integers) 15 //! - [Hexadecimal](#hexadecimal) 16 //! - [Octal](#octal) 17 //! - [Binary](#binary) 18 //! - [Decimal](#decimal) 19 //! + [Floating Point Numbers](#floating-point-numbers) 20 //! 21 //! ## Whitespace 22 //! 23 //! 24 //! 25 //! ### Wrapper combinators that eat whitespace before and after a parser 26 //! 27 //! ```rust 28 //! use winnow::prelude::*; 29 //! use winnow::{ 30 //! error::ParserError, 31 //! combinator::delimited, 32 //! ascii::multispace0, 33 //! }; 34 //! 35 //! /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and 36 //! /// trailing whitespace, returning the output of `inner`. 37 //! fn ws<'a, F, O, E: ParserError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E> 38 //! where 39 //! F: Parser<&'a str, O, E>, 40 //! { 41 //! delimited( 42 //! multispace0, 43 //! inner, 44 //! multispace0 45 //! ) 46 //! } 47 //! ``` 48 //! 49 //! To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`. 50 //! Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0, 51 //! &inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set 52 //! of lexemes. 53 //! 54 //! ## Comments 55 //! 56 //! ### `// C++/EOL-style comments` 57 //! 58 //! This version uses `%` to start a comment, does not consume the newline character, and returns an 59 //! output of `()`. 60 //! 61 //! ```rust 62 //! use winnow::prelude::*; 63 //! use winnow::{ 64 //! error::ParserError, 65 //! token::take_till, 66 //! }; 67 //! 68 //! pub fn peol_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> 69 //! { 70 //! ('%', take_till(1.., ['\n', '\r'])) 71 //! .void() // Output is thrown away. 72 //! .parse_next(i) 73 //! } 74 //! ``` 75 //! 76 //! ### `/* C-style comments */` 77 //! 78 //! Inline comments surrounded with sentinel literals `(*` and `*)`. This version returns an output of `()` 79 //! and does not handle nested comments. 80 //! 81 //! ```rust 82 //! use winnow::prelude::*; 83 //! use winnow::{ 84 //! error::ParserError, 85 //! token::take_until, 86 //! }; 87 //! 88 //! pub fn pinline_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> { 89 //! ( 90 //! "(*", 91 //! take_until(0.., "*)"), 92 //! "*)" 93 //! ) 94 //! .void() // Output is thrown away. 95 //! .parse_next(i) 96 //! } 97 //! ``` 98 //! 99 //! ## Identifiers 100 //! 101 //! ### `Rust-Style Identifiers` 102 //! 103 //! Parsing identifiers that may start with a letter (or underscore) and may contain underscores, 104 //! letters and numbers may be parsed like this: 105 //! 106 //! ```rust 107 //! use winnow::prelude::*; 108 //! use winnow::{ 109 //! stream::AsChar, 110 //! token::take_while, 111 //! token::one_of, 112 //! }; 113 //! 114 //! pub fn identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { 115 //! ( 116 //! one_of(|c: char| c.is_alpha() || c == '_'), 117 //! take_while(0.., |c: char| c.is_alphanum() || c == '_') 118 //! ) 119 //! .take() 120 //! .parse_next(input) 121 //! } 122 //! ``` 123 //! 124 //! Let's say we apply this to the identifier `hello_world123abc`. The first element of the tuple 125 //! would uses [`one_of`][crate::token::one_of] which would take `h`. The tuple ensures that 126 //! `ello_world123abc` will be piped to the next [`take_while`][crate::token::take_while] parser, 127 //! which takes every remaining character. However, the tuple returns a tuple of the results 128 //! of its sub-parsers. The [`take`][crate::Parser::take] parser produces a `&str` of the 129 //! input text that was parsed, which in this case is the entire `&str` `hello_world123abc`. 130 //! 131 //! ## Literal Values 132 //! 133 //! ### Escaped Strings 134 //! 135 //! ```rust 136 #![doc = include_str!("../../examples/string/parser.rs")] 137 //! ``` 138 //! 139 //! See also [`take_escaped`] and [`escaped_transform`]. 140 //! 141 //! ### Integers 142 //! 143 //! The following recipes all return string slices rather than integer values. How to obtain an 144 //! integer value instead is demonstrated for hexadecimal integers. The others are similar. 145 //! 146 //! The parsers allow the grouping character `_`, which allows one to group the digits by byte, for 147 //! example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a 148 //! string slice to an integer value is slightly simpler. You can also strip the `_` from the string 149 //! slice that is returned, which is demonstrated in the second hexadecimal number parser. 150 //! 151 //! #### Hexadecimal 152 //! 153 //! The parser outputs the string slice of the digits without the leading `0x`/`0X`. 154 //! 155 //! ```rust 156 //! use winnow::prelude::*; 157 //! use winnow::{ 158 //! combinator::alt, 159 //! combinator::{repeat}, 160 //! combinator::{preceded, terminated}, 161 //! token::one_of, 162 //! }; 163 //! 164 //! fn hexadecimal<'s>(input: &mut &'s str) -> PResult<&'s str> { // <'a, E: ParserError<&'a str>> 165 //! preceded( 166 //! alt(("0x", "0X")), 167 //! repeat(1.., 168 //! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) 169 //! ).map(|()| ()).take() 170 //! ).parse_next(input) 171 //! } 172 //! ``` 173 //! 174 //! If you want it to return the integer value instead, use map: 175 //! 176 //! ```rust 177 //! use winnow::prelude::*; 178 //! use winnow::{ 179 //! combinator::alt, 180 //! combinator::{repeat}, 181 //! combinator::{preceded, terminated}, 182 //! token::one_of, 183 //! }; 184 //! 185 //! fn hexadecimal_value(input: &mut &str) -> PResult<i64> { 186 //! preceded( 187 //! alt(("0x", "0X")), 188 //! repeat(1.., 189 //! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) 190 //! ).map(|()| ()).take() 191 //! ).try_map( 192 //! |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16) 193 //! ).parse_next(input) 194 //! } 195 //! ``` 196 //! 197 //! See also [`hex_uint`] 198 //! 199 //! #### Octal 200 //! 201 //! ```rust 202 //! use winnow::prelude::*; 203 //! use winnow::{ 204 //! combinator::alt, 205 //! combinator::{repeat}, 206 //! combinator::{preceded, terminated}, 207 //! token::one_of, 208 //! }; 209 //! 210 //! fn octal<'s>(input: &mut &'s str) -> PResult<&'s str> { 211 //! preceded( 212 //! alt(("0o", "0O")), 213 //! repeat(1.., 214 //! terminated(one_of('0'..='7'), repeat(0.., '_').map(|()| ())) 215 //! ).map(|()| ()).take() 216 //! ).parse_next(input) 217 //! } 218 //! ``` 219 //! 220 //! #### Binary 221 //! 222 //! ```rust 223 //! use winnow::prelude::*; 224 //! use winnow::{ 225 //! combinator::alt, 226 //! combinator::{repeat}, 227 //! combinator::{preceded, terminated}, 228 //! token::one_of, 229 //! }; 230 //! 231 //! fn binary<'s>(input: &mut &'s str) -> PResult<&'s str> { 232 //! preceded( 233 //! alt(("0b", "0B")), 234 //! repeat(1.., 235 //! terminated(one_of('0'..='1'), repeat(0.., '_').map(|()| ())) 236 //! ).map(|()| ()).take() 237 //! ).parse_next(input) 238 //! } 239 //! ``` 240 //! 241 //! #### Decimal 242 //! 243 //! ```rust 244 //! use winnow::prelude::*; 245 //! use winnow::{ 246 //! combinator::{repeat}, 247 //! combinator::terminated, 248 //! token::one_of, 249 //! }; 250 //! 251 //! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { 252 //! repeat(1.., 253 //! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) 254 //! ).map(|()| ()) 255 //! .take() 256 //! .parse_next(input) 257 //! } 258 //! ``` 259 //! 260 //! See also [`dec_uint`] and [`dec_int`] 261 //! 262 //! ### Floating Point Numbers 263 //! 264 //! The following is adapted from [the Python parser by Valentin Lorentz](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs). 265 //! 266 //! ```rust 267 //! use winnow::prelude::*; 268 //! use winnow::{ 269 //! combinator::alt, 270 //! combinator::{repeat}, 271 //! combinator::opt, 272 //! combinator::{preceded, terminated}, 273 //! token::one_of, 274 //! }; 275 //! 276 //! fn float<'s>(input: &mut &'s str) -> PResult<&'s str> { 277 //! alt(( 278 //! // Case one: .42 279 //! ( 280 //! '.', 281 //! decimal, 282 //! opt(( 283 //! one_of(['e', 'E']), 284 //! opt(one_of(['+', '-'])), 285 //! decimal 286 //! )) 287 //! ).take() 288 //! , // Case two: 42e42 and 42.42e42 289 //! ( 290 //! decimal, 291 //! opt(preceded( 292 //! '.', 293 //! decimal, 294 //! )), 295 //! one_of(['e', 'E']), 296 //! opt(one_of(['+', '-'])), 297 //! decimal 298 //! ).take() 299 //! , // Case three: 42. and 42.42 300 //! ( 301 //! decimal, 302 //! '.', 303 //! opt(decimal) 304 //! ).take() 305 //! )).parse_next(input) 306 //! } 307 //! 308 //! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { 309 //! repeat(1.., 310 //! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) 311 //! ). 312 //! map(|()| ()) 313 //! .take() 314 //! .parse_next(input) 315 //! } 316 //! ``` 317 //! 318 //! See also [`float`] 319 320 #![allow(unused_imports)] 321 use crate::ascii::dec_int; 322 use crate::ascii::dec_uint; 323 use crate::ascii::escaped_transform; 324 use crate::ascii::float; 325 use crate::ascii::hex_uint; 326 use crate::ascii::take_escaped; 327