1 use std::collections::HashMap;
2 use std::str;
3
4 use winnow::prelude::*;
5 use winnow::{
6 ascii::float,
7 combinator::empty,
8 combinator::fail,
9 combinator::peek,
10 combinator::{alt, dispatch},
11 combinator::{delimited, preceded, separated_pair, terminated},
12 combinator::{repeat, separated},
13 error::{AddContext, ParserError, StrContext},
14 token::{any, none_of, take, take_while},
15 };
16
17 use crate::json::JsonValue;
18
19 pub(crate) type Stream<'i> = &'i str;
20
21 /// The root element of a JSON parser is any value
22 ///
23 /// A parser has the following signature:
24 /// `&mut Stream -> PResult<Output ContextError>`, with `PResult` defined as:
25 /// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;`
26 ///
27 /// most of the times you can ignore the error type and use the default (but this
28 /// examples shows custom error types later on!)
29 ///
30 /// Here we use `&str` as input type, but parsers can be generic over
31 /// the input type, work directly with `&[u8]`, or any other type that
32 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>33 pub(crate) fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
34 input: &mut Stream<'i>,
35 ) -> PResult<JsonValue, E> {
36 delimited(ws, json_value, ws).parse_next(input)
37 }
38
39 /// `alt` is a combinator that tries multiple parsers one by one, until
40 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>41 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
42 input: &mut Stream<'i>,
43 ) -> PResult<JsonValue, E> {
44 // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different
45 // implementations.
46 dispatch!(peek(any);
47 'n' => null.value(JsonValue::Null),
48 't' => true_.map(JsonValue::Boolean),
49 'f' => false_.map(JsonValue::Boolean),
50 '"' => string.map(JsonValue::Str),
51 '+' => float.map(JsonValue::Num),
52 '-' => float.map(JsonValue::Num),
53 '0'..='9' => float.map(JsonValue::Num),
54 '[' => array.map(JsonValue::Array),
55 '{' => object.map(JsonValue::Object),
56 _ => fail,
57 )
58 .parse_next(input)
59 }
60
61 /// `literal(string)` generates a parser that takes the argument string.
62 ///
63 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>64 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
65 // This is a parser that returns `"null"` if it sees the string "null", and
66 // an error otherwise
67 "null".parse_next(input)
68 }
69
70 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
71 /// success.
true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>72 fn true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
73 // This is a parser that returns `true` if it sees the string "true", and
74 // an error otherwise
75 "true".value(true).parse_next(input)
76 }
77
78 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
79 /// success.
false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>80 fn false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
81 // This is a parser that returns `false` if it sees the string "false", and
82 // an error otherwise
83 "false".value(false).parse_next(input)
84 }
85
86 /// This parser gathers all `char`s up into a `String`with a parse to take the double quote
87 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<String, E>88 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
89 input: &mut Stream<'i>,
90 ) -> PResult<String, E> {
91 preceded(
92 '\"',
93 terminated(
94 repeat(0.., character).fold(String::new, |mut string, c| {
95 string.push(c);
96 string
97 }),
98 '\"',
99 ),
100 )
101 // `context` lets you add a static string to errors to provide more information in the
102 // error chain (to indicate which parser had an error)
103 .context(StrContext::Expected("string".into()))
104 .parse_next(input)
105 }
106
107 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
108 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>109 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
110 let c = none_of('\"').parse_next(input)?;
111 if c == '\\' {
112 dispatch!(any;
113 '"' => empty.value('"'),
114 '\\' => empty.value('\\'),
115 '/' => empty.value('/'),
116 'b' => empty.value('\x08'),
117 'f' => empty.value('\x0C'),
118 'n' => empty.value('\n'),
119 'r' => empty.value('\r'),
120 't' => empty.value('\t'),
121 'u' => unicode_escape,
122 _ => fail,
123 )
124 .parse_next(input)
125 } else {
126 Ok(c)
127 }
128 }
129
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>130 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
131 alt((
132 // Not a surrogate
133 u16_hex
134 .verify(|cp| !(0xD800..0xE000).contains(cp))
135 .map(|cp| cp as u32),
136 // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
137 separated_pair(u16_hex, "\\u", u16_hex)
138 .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
139 .map(|(high, low)| {
140 let high_ten = (high as u32) - 0xD800;
141 let low_ten = (low as u32) - 0xDC00;
142 (high_ten << 10) + low_ten + 0x10000
143 }),
144 ))
145 .verify_map(
146 // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
147 std::char::from_u32,
148 )
149 .parse_next(input)
150 }
151
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>152 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
153 take(4usize)
154 .verify_map(|s| u16::from_str_radix(s, 16).ok())
155 .parse_next(input)
156 }
157
158 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
159 /// accumulating results in a `Vec`, until it encounters an error.
160 /// If you want more control on the parser application, check out the `iterator`
161 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>162 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
163 input: &mut Stream<'i>,
164 ) -> PResult<Vec<JsonValue>, E> {
165 preceded(
166 ('[', ws),
167 terminated(separated(0.., json_value, (ws, ',', ws)), (ws, ']')),
168 )
169 .context(StrContext::Expected("array".into()))
170 .parse_next(input)
171 }
172
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>173 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
174 input: &mut Stream<'i>,
175 ) -> PResult<HashMap<String, JsonValue>, E> {
176 preceded(
177 ('{', ws),
178 terminated(separated(0.., key_value, (ws, ',', ws)), (ws, '}')),
179 )
180 .context(StrContext::Expected("object".into()))
181 .parse_next(input)
182 }
183
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>184 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
185 input: &mut Stream<'i>,
186 ) -> PResult<(String, JsonValue), E> {
187 separated_pair(string, (ws, ':', ws), json_value).parse_next(input)
188 }
189
190 /// Parser combinators are constructed from the bottom up:
191 /// first we write parsers for the smallest elements (here a space character),
192 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>193 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
194 // Combinators like `take_while` return a function. That function is the
195 // parser,to which we can pass the input
196 take_while(0.., WS).parse_next(input)
197 }
198
199 const WS: &[char] = &[' ', '\t', '\r', '\n'];
200
201 #[cfg(test)]
202 mod test {
203 #[allow(clippy::useless_attribute)]
204 #[allow(unused_imports)] // its dead for benches
205 use super::*;
206
207 #[allow(clippy::useless_attribute)]
208 #[allow(dead_code)] // its dead for benches
209 type Error = winnow::error::ContextError;
210
211 #[test]
json_string()212 fn json_string() {
213 assert_eq!(string::<Error>.parse_peek("\"\""), Ok(("", "".to_owned())));
214 assert_eq!(
215 string::<Error>.parse_peek("\"abc\""),
216 Ok(("", "abc".to_owned()))
217 );
218 assert_eq!(
219 string::<Error>
220 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
221 Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_owned())),
222 );
223 assert_eq!(
224 string::<Error>.parse_peek("\"\\uD83D\\uDE10\""),
225 Ok(("", "".to_owned()))
226 );
227
228 assert!(string::<Error>.parse_peek("\"").is_err());
229 assert!(string::<Error>.parse_peek("\"abc").is_err());
230 assert!(string::<Error>.parse_peek("\"\\\"").is_err());
231 assert!(string::<Error>.parse_peek("\"\\u123\"").is_err());
232 assert!(string::<Error>.parse_peek("\"\\uD800\"").is_err());
233 assert!(string::<Error>.parse_peek("\"\\uD800\\uD800\"").is_err());
234 assert!(string::<Error>.parse_peek("\"\\uDC00\"").is_err());
235 }
236
237 #[test]
json_object()238 fn json_object() {
239 use JsonValue::{Num, Object, Str};
240
241 let input = r#"{"a":42,"b":"x"}"#;
242
243 let expected = Object(
244 vec![
245 ("a".to_owned(), Num(42.0)),
246 ("b".to_owned(), Str("x".to_owned())),
247 ]
248 .into_iter()
249 .collect(),
250 );
251
252 assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
253 }
254
255 #[test]
json_array()256 fn json_array() {
257 use JsonValue::{Array, Num, Str};
258
259 let input = r#"[42,"x"]"#;
260
261 let expected = Array(vec![Num(42.0), Str("x".to_owned())]);
262
263 assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
264 }
265
266 #[test]
json_whitespace()267 fn json_whitespace() {
268 use JsonValue::{Array, Boolean, Null, Num, Object, Str};
269
270 let input = r#"
271 {
272 "null" : null,
273 "true" :true ,
274 "false": false ,
275 "number" : 123e4 ,
276 "string" : " abc 123 " ,
277 "array" : [ false , 1 , "two" ] ,
278 "object" : { "a" : 1.0 , "b" : "c" } ,
279 "empty_array" : [ ] ,
280 "empty_object" : { }
281 }
282 "#;
283
284 assert_eq!(
285 json::<Error>.parse_peek(input),
286 Ok((
287 "",
288 Object(
289 vec![
290 ("null".to_owned(), Null),
291 ("true".to_owned(), Boolean(true)),
292 ("false".to_owned(), Boolean(false)),
293 ("number".to_owned(), Num(123e4)),
294 ("string".to_owned(), Str(" abc 123 ".to_owned())),
295 (
296 "array".to_owned(),
297 Array(vec![Boolean(false), Num(1.0), Str("two".to_owned())])
298 ),
299 (
300 "object".to_owned(),
301 Object(
302 vec![
303 ("a".to_owned(), Num(1.0)),
304 ("b".to_owned(), Str("c".to_owned())),
305 ]
306 .into_iter()
307 .collect()
308 )
309 ),
310 ("empty_array".to_owned(), Array(vec![]),),
311 ("empty_object".to_owned(), Object(HashMap::new()),),
312 ]
313 .into_iter()
314 .collect()
315 )
316 ))
317 );
318 }
319 }
320