1 use std::collections::HashMap;
2 use std::str;
3
4 use winnow::prelude::*;
5 use winnow::{
6 ascii::float,
7 combinator::alt,
8 combinator::cut_err,
9 combinator::{delimited, preceded, separated_pair, terminated},
10 combinator::{repeat, separated},
11 error::{AddContext, ParserError, StrContext},
12 stream::Partial,
13 token::{any, none_of, rest, take, take_while},
14 };
15
16 use crate::json::JsonValue;
17
18 pub(crate) type Stream<'i> = Partial<&'i str>;
19
20 /// The root element of a JSON parser is any value
21 ///
22 /// A parser has the following signature:
23 /// `&mut Stream -> PResult<Output, ContextError>`, with `PResult` defined as:
24 /// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;`
25 ///
26 /// most of the times you can ignore the error type and use the default (but this
27 /// examples shows custom error types later on!)
28 ///
29 /// Here we use `&str` as input type, but parsers can be generic over
30 /// the input type, work directly with `&[u8]`, or any other type that
31 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>32 pub(crate) fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
33 input: &mut Stream<'i>,
34 ) -> PResult<JsonValue, E> {
35 delimited(ws, json_value, ws_or_eof).parse_next(input)
36 }
37
38 /// `alt` is a combinator that tries multiple parsers one by one, until
39 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>40 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
41 input: &mut Stream<'i>,
42 ) -> PResult<JsonValue, E> {
43 // `alt` combines the each value parser. It returns the result of the first
44 // successful parser, or an error
45 alt((
46 null.value(JsonValue::Null),
47 boolean.map(JsonValue::Boolean),
48 string.map(JsonValue::Str),
49 float.map(JsonValue::Num),
50 array.map(JsonValue::Array),
51 object.map(JsonValue::Object),
52 ))
53 .parse_next(input)
54 }
55
56 /// `literal(string)` generates a parser that takes the argument string.
57 ///
58 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>59 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
60 // This is a parser that returns `"null"` if it sees the string "null", and
61 // an error otherwise
62 "null".parse_next(input)
63 }
64
65 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
66 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>67 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
68 // This is a parser that returns `true` if it sees the string "true", and
69 // an error otherwise
70 let parse_true = "true".value(true);
71
72 // This is a parser that returns `false` if it sees the string "false", and
73 // an error otherwise
74 let parse_false = "false".value(false);
75
76 alt((parse_true, parse_false)).parse_next(input)
77 }
78
79 /// This parser gathers all `char`s up into a `String`with a parse to take the double quote
80 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<String, E>81 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
82 input: &mut Stream<'i>,
83 ) -> PResult<String, E> {
84 preceded(
85 '\"',
86 // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
87 // combinators like `alt` that they should not try other parsers. We were in the
88 // right branch (since we found the `"` character) but encountered an error when
89 // parsing the string
90 cut_err(terminated(
91 repeat(0.., character).fold(String::new, |mut string, c| {
92 string.push(c);
93 string
94 }),
95 '\"',
96 )),
97 )
98 // `context` lets you add a static string to errors to provide more information in the
99 // error chain (to indicate which parser had an error)
100 .context(StrContext::Expected("string".into()))
101 .parse_next(input)
102 }
103
104 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
105 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>106 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
107 let c = none_of('\"').parse_next(input)?;
108 if c == '\\' {
109 alt((
110 any.verify_map(|c| {
111 Some(match c {
112 '"' | '\\' | '/' => c,
113 'b' => '\x08',
114 'f' => '\x0C',
115 'n' => '\n',
116 'r' => '\r',
117 't' => '\t',
118 _ => return None,
119 })
120 }),
121 preceded('u', unicode_escape),
122 ))
123 .parse_next(input)
124 } else {
125 Ok(c)
126 }
127 }
128
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>129 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
130 alt((
131 // Not a surrogate
132 u16_hex
133 .verify(|cp| !(0xD800..0xE000).contains(cp))
134 .map(|cp| cp as u32),
135 // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
136 separated_pair(u16_hex, "\\u", u16_hex)
137 .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
138 .map(|(high, low)| {
139 let high_ten = (high as u32) - 0xD800;
140 let low_ten = (low as u32) - 0xDC00;
141 (high_ten << 10) + low_ten + 0x10000
142 }),
143 ))
144 .verify_map(
145 // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
146 std::char::from_u32,
147 )
148 .parse_next(input)
149 }
150
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>151 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
152 take(4usize)
153 .verify_map(|s| u16::from_str_radix(s, 16).ok())
154 .parse_next(input)
155 }
156
157 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
158 /// accumulating results in a `Vec`, until it encounters an error.
159 /// If you want more control on the parser application, check out the `iterator`
160 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>161 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
162 input: &mut Stream<'i>,
163 ) -> PResult<Vec<JsonValue>, E> {
164 preceded(
165 ('[', ws),
166 cut_err(terminated(
167 separated(0.., json_value, (ws, ',', ws)),
168 (ws, ']'),
169 )),
170 )
171 .context(StrContext::Expected("array".into()))
172 .parse_next(input)
173 }
174
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>175 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
176 input: &mut Stream<'i>,
177 ) -> PResult<HashMap<String, JsonValue>, E> {
178 preceded(
179 ('{', ws),
180 cut_err(terminated(
181 separated(0.., key_value, (ws, ',', ws)),
182 (ws, '}'),
183 )),
184 )
185 .context(StrContext::Expected("object".into()))
186 .parse_next(input)
187 }
188
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>189 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
190 input: &mut Stream<'i>,
191 ) -> PResult<(String, JsonValue), E> {
192 separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
193 }
194
195 /// Parser combinators are constructed from the bottom up:
196 /// first we write parsers for the smallest elements (here a space character),
197 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>198 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
199 // Combinators like `take_while` return a function. That function is the
200 // parser,to which we can pass the input
201 take_while(0.., WS).parse_next(input)
202 }
203
ws_or_eof<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>204 fn ws_or_eof<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
205 rest.verify(|s: &str| s.chars().all(|c| WS.contains(&c)))
206 .parse_next(input)
207 }
208
209 const WS: &[char] = &[' ', '\t', '\r', '\n'];
210
211 #[cfg(test)]
212 mod test {
213 #[allow(clippy::useless_attribute)]
214 #[allow(unused_imports)] // its dead for benches
215 use super::*;
216
217 #[allow(clippy::useless_attribute)]
218 #[allow(dead_code)] // its dead for benches
219 type Error = winnow::error::ContextError;
220
221 #[test]
json_string()222 fn json_string() {
223 assert_eq!(
224 string::<Error>.parse_peek(Partial::new("\"\"")),
225 Ok((Partial::new(""), "".to_owned()))
226 );
227 assert_eq!(
228 string::<Error>.parse_peek(Partial::new("\"abc\"")),
229 Ok((Partial::new(""), "abc".to_owned()))
230 );
231 assert_eq!(
232 string::<Error>.parse_peek(Partial::new(
233 "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""
234 )),
235 Ok((
236 Partial::new(""),
237 "abc\"\\/\x08\x0C\n\r\t\x01——def".to_owned()
238 )),
239 );
240 assert_eq!(
241 string::<Error>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")),
242 Ok((Partial::new(""), "".to_owned()))
243 );
244
245 assert!(string::<Error>.parse_peek(Partial::new("\"")).is_err());
246 assert!(string::<Error>.parse_peek(Partial::new("\"abc")).is_err());
247 assert!(string::<Error>.parse_peek(Partial::new("\"\\\"")).is_err());
248 assert!(string::<Error>
249 .parse_peek(Partial::new("\"\\u123\""))
250 .is_err());
251 assert!(string::<Error>
252 .parse_peek(Partial::new("\"\\uD800\""))
253 .is_err());
254 assert!(string::<Error>
255 .parse_peek(Partial::new("\"\\uD800\\uD800\""))
256 .is_err());
257 assert!(string::<Error>
258 .parse_peek(Partial::new("\"\\uDC00\""))
259 .is_err());
260 }
261
262 #[test]
json_object()263 fn json_object() {
264 use JsonValue::{Num, Object, Str};
265
266 let input = r#"{"a":42,"b":"x"}"#;
267
268 let expected = Object(
269 vec![
270 ("a".to_owned(), Num(42.0)),
271 ("b".to_owned(), Str("x".to_owned())),
272 ]
273 .into_iter()
274 .collect(),
275 );
276
277 assert_eq!(
278 json::<Error>.parse_peek(Partial::new(input)),
279 Ok((Partial::new(""), expected))
280 );
281 }
282
283 #[test]
json_array()284 fn json_array() {
285 use JsonValue::{Array, Num, Str};
286
287 let input = r#"[42,"x"]"#;
288
289 let expected = Array(vec![Num(42.0), Str("x".to_owned())]);
290
291 assert_eq!(
292 json::<Error>.parse_peek(Partial::new(input)),
293 Ok((Partial::new(""), expected))
294 );
295 }
296
297 #[test]
json_whitespace()298 fn json_whitespace() {
299 use JsonValue::{Array, Boolean, Null, Num, Object, Str};
300
301 let input = r#"
302 {
303 "null" : null,
304 "true" :true ,
305 "false": false ,
306 "number" : 123e4 ,
307 "string" : " abc 123 " ,
308 "array" : [ false , 1 , "two" ] ,
309 "object" : { "a" : 1.0 , "b" : "c" } ,
310 "empty_array" : [ ] ,
311 "empty_object" : { }
312 }
313 "#;
314
315 assert_eq!(
316 json::<Error>.parse_peek(Partial::new(input)),
317 Ok((
318 Partial::new(""),
319 Object(
320 vec![
321 ("null".to_owned(), Null),
322 ("true".to_owned(), Boolean(true)),
323 ("false".to_owned(), Boolean(false)),
324 ("number".to_owned(), Num(123e4)),
325 ("string".to_owned(), Str(" abc 123 ".to_owned())),
326 (
327 "array".to_owned(),
328 Array(vec![Boolean(false), Num(1.0), Str("two".to_owned())])
329 ),
330 (
331 "object".to_owned(),
332 Object(
333 vec![
334 ("a".to_owned(), Num(1.0)),
335 ("b".to_owned(), Str("c".to_owned())),
336 ]
337 .into_iter()
338 .collect()
339 )
340 ),
341 ("empty_array".to_owned(), Array(vec![]),),
342 ("empty_object".to_owned(), Object(HashMap::new()),),
343 ]
344 .into_iter()
345 .collect()
346 )
347 ))
348 );
349 }
350 }
351