1 use std::collections::HashMap;
2 use std::str;
3
4 use winnow::prelude::*;
5 use winnow::{
6 ascii::float,
7 combinator::alt,
8 combinator::cut_err,
9 combinator::{delimited, preceded, separated_pair, terminated},
10 combinator::{repeat, separated},
11 error::{AddContext, ParserError, StrContext},
12 token::{any, none_of, take, take_while},
13 };
14
15 use crate::json::JsonValue;
16
17 pub(crate) type Stream<'i> = &'i str;
18
19 /// The root element of a JSON parser is any value
20 ///
21 /// A parser has the following signature:
22 /// `&mut Stream -> PResult<Output, ContextError>`, with `PResult` defined as:
23 /// `type PResult<O, E = (I, ErrorKind)> = Result<O, Err<E>>;`
24 ///
25 /// most of the times you can ignore the error type and use the default (but this
26 /// examples shows custom error types later on!)
27 ///
28 /// Here we use `&str` as input type, but parsers can be generic over
29 /// the input type, work directly with `&[u8]`, or any other type that
30 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>31 pub(crate) fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
32 input: &mut Stream<'i>,
33 ) -> PResult<JsonValue, E> {
34 delimited(ws, json_value, ws).parse_next(input)
35 }
36
37 /// `alt` is a combinator that tries multiple parsers one by one, until
38 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>39 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
40 input: &mut Stream<'i>,
41 ) -> PResult<JsonValue, E> {
42 // `alt` combines the each value parser. It returns the result of the first
43 // successful parser, or an error
44 alt((
45 null.value(JsonValue::Null),
46 boolean.map(JsonValue::Boolean),
47 string.map(JsonValue::Str),
48 float.map(JsonValue::Num),
49 array.map(JsonValue::Array),
50 object.map(JsonValue::Object),
51 ))
52 .parse_next(input)
53 }
54
55 /// `literal(string)` generates a parser that takes the argument string.
56 ///
57 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>58 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
59 // This is a parser that returns `"null"` if it sees the string "null", and
60 // an error otherwise
61 "null".parse_next(input)
62 }
63
64 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
65 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>66 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
67 // This is a parser that returns `true` if it sees the string "true", and
68 // an error otherwise
69 let parse_true = "true".value(true);
70
71 // This is a parser that returns `false` if it sees the string "false", and
72 // an error otherwise
73 let parse_false = "false".value(false);
74
75 alt((parse_true, parse_false)).parse_next(input)
76 }
77
78 /// This parser gathers all `char`s up into a `String`with a parse to take the double quote
79 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<String, E>80 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
81 input: &mut Stream<'i>,
82 ) -> PResult<String, E> {
83 preceded(
84 '\"',
85 // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
86 // combinators like `alt` that they should not try other parsers. We were in the
87 // right branch (since we found the `"` character) but encountered an error when
88 // parsing the string
89 cut_err(terminated(
90 repeat(0.., character).fold(String::new, |mut string, c| {
91 string.push(c);
92 string
93 }),
94 '\"',
95 )),
96 )
97 // `context` lets you add a static string to errors to provide more information in the
98 // error chain (to indicate which parser had an error)
99 .context(StrContext::Expected("string".into()))
100 .parse_next(input)
101 }
102
103 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
104 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>105 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
106 let c = none_of('\"').parse_next(input)?;
107 if c == '\\' {
108 alt((
109 any.verify_map(|c| {
110 Some(match c {
111 '"' | '\\' | '/' => c,
112 'b' => '\x08',
113 'f' => '\x0C',
114 'n' => '\n',
115 'r' => '\r',
116 't' => '\t',
117 _ => return None,
118 })
119 }),
120 preceded('u', unicode_escape),
121 ))
122 .parse_next(input)
123 } else {
124 Ok(c)
125 }
126 }
127
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>128 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
129 alt((
130 // Not a surrogate
131 u16_hex
132 .verify(|cp| !(0xD800..0xE000).contains(cp))
133 .map(|cp| cp as u32),
134 // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
135 separated_pair(u16_hex, "\\u", u16_hex)
136 .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
137 .map(|(high, low)| {
138 let high_ten = (high as u32) - 0xD800;
139 let low_ten = (low as u32) - 0xDC00;
140 (high_ten << 10) + low_ten + 0x10000
141 }),
142 ))
143 .verify_map(
144 // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
145 std::char::from_u32,
146 )
147 .parse_next(input)
148 }
149
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>150 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
151 take(4usize)
152 .verify_map(|s| u16::from_str_radix(s, 16).ok())
153 .parse_next(input)
154 }
155
156 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
157 /// accumulating results in a `Vec`, until it encounters an error.
158 /// If you want more control on the parser application, check out the `iterator`
159 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>160 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
161 input: &mut Stream<'i>,
162 ) -> PResult<Vec<JsonValue>, E> {
163 preceded(
164 ('[', ws),
165 cut_err(terminated(
166 separated(0.., json_value, (ws, ',', ws)),
167 (ws, ']'),
168 )),
169 )
170 .context(StrContext::Expected("array".into()))
171 .parse_next(input)
172 }
173
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>174 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
175 input: &mut Stream<'i>,
176 ) -> PResult<HashMap<String, JsonValue>, E> {
177 preceded(
178 ('{', ws),
179 cut_err(terminated(
180 separated(0.., key_value, (ws, ',', ws)),
181 (ws, '}'),
182 )),
183 )
184 .context(StrContext::Expected("object".into()))
185 .parse_next(input)
186 }
187
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>188 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
189 input: &mut Stream<'i>,
190 ) -> PResult<(String, JsonValue), E> {
191 separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
192 }
193
194 /// Parser combinators are constructed from the bottom up:
195 /// first we write parsers for the smallest elements (here a space character),
196 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>197 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
198 // Combinators like `take_while` return a function. That function is the
199 // parser,to which we can pass the input
200 take_while(0.., WS).parse_next(input)
201 }
202
203 const WS: &[char] = &[' ', '\t', '\r', '\n'];
204
205 #[cfg(test)]
206 mod test {
207 #[allow(clippy::useless_attribute)]
208 #[allow(unused_imports)] // its dead for benches
209 use super::*;
210
211 #[allow(clippy::useless_attribute)]
212 #[allow(dead_code)] // its dead for benches
213 type Error = winnow::error::ContextError;
214
215 #[test]
json_string()216 fn json_string() {
217 assert_eq!(string::<Error>.parse_peek("\"\""), Ok(("", "".to_owned())));
218 assert_eq!(
219 string::<Error>.parse_peek("\"abc\""),
220 Ok(("", "abc".to_owned()))
221 );
222 assert_eq!(
223 string::<Error>
224 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
225 Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_owned())),
226 );
227 assert_eq!(
228 string::<Error>.parse_peek("\"\\uD83D\\uDE10\""),
229 Ok(("", "".to_owned()))
230 );
231
232 assert!(string::<Error>.parse_peek("\"").is_err());
233 assert!(string::<Error>.parse_peek("\"abc").is_err());
234 assert!(string::<Error>.parse_peek("\"\\\"").is_err());
235 assert!(string::<Error>.parse_peek("\"\\u123\"").is_err());
236 assert!(string::<Error>.parse_peek("\"\\uD800\"").is_err());
237 assert!(string::<Error>.parse_peek("\"\\uD800\\uD800\"").is_err());
238 assert!(string::<Error>.parse_peek("\"\\uDC00\"").is_err());
239 }
240
241 #[test]
json_object()242 fn json_object() {
243 use JsonValue::{Num, Object, Str};
244
245 let input = r#"{"a":42,"b":"x"}"#;
246
247 let expected = Object(
248 vec![
249 ("a".to_owned(), Num(42.0)),
250 ("b".to_owned(), Str("x".to_owned())),
251 ]
252 .into_iter()
253 .collect(),
254 );
255
256 assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
257 }
258
259 #[test]
json_array()260 fn json_array() {
261 use JsonValue::{Array, Num, Str};
262
263 let input = r#"[42,"x"]"#;
264
265 let expected = Array(vec![Num(42.0), Str("x".to_owned())]);
266
267 assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
268 }
269
270 #[test]
json_whitespace()271 fn json_whitespace() {
272 use JsonValue::{Array, Boolean, Null, Num, Object, Str};
273
274 let input = r#"
275 {
276 "null" : null,
277 "true" :true ,
278 "false": false ,
279 "number" : 123e4 ,
280 "string" : " abc 123 " ,
281 "array" : [ false , 1 , "two" ] ,
282 "object" : { "a" : 1.0 , "b" : "c" } ,
283 "empty_array" : [ ] ,
284 "empty_object" : { }
285 }
286 "#;
287
288 assert_eq!(
289 json::<Error>.parse_peek(input),
290 Ok((
291 "",
292 Object(
293 vec![
294 ("null".to_owned(), Null),
295 ("true".to_owned(), Boolean(true)),
296 ("false".to_owned(), Boolean(false)),
297 ("number".to_owned(), Num(123e4)),
298 ("string".to_owned(), Str(" abc 123 ".to_owned())),
299 (
300 "array".to_owned(),
301 Array(vec![Boolean(false), Num(1.0), Str("two".to_owned())])
302 ),
303 (
304 "object".to_owned(),
305 Object(
306 vec![
307 ("a".to_owned(), Num(1.0)),
308 ("b".to_owned(), Str("c".to_owned())),
309 ]
310 .into_iter()
311 .collect()
312 )
313 ),
314 ("empty_array".to_owned(), Array(vec![]),),
315 ("empty_object".to_owned(), Object(HashMap::new()),),
316 ]
317 .into_iter()
318 .collect()
319 )
320 ))
321 );
322 }
323 }
324