• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::HashMap;
2 use std::str;
3 
4 use winnow::prelude::*;
5 use winnow::{
6     ascii::float,
7     combinator::empty,
8     combinator::fail,
9     combinator::peek,
10     combinator::{alt, dispatch},
11     combinator::{delimited, preceded, separated_pair, terminated},
12     combinator::{repeat, separated},
13     error::{AddContext, ParserError, StrContext},
14     token::{any, none_of, take, take_while},
15 };
16 
17 use crate::json::JsonValue;
18 
19 pub(crate) type Stream<'i> = &'i str;
20 
21 /// The root element of a JSON parser is any value
22 ///
23 /// A parser has the following signature:
24 /// `&mut Stream -> PResult<Output ContextError>`, with `PResult` defined as:
25 /// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;`
26 ///
27 /// most of the times you can ignore the error type and use the default (but this
28 /// examples shows custom error types later on!)
29 ///
30 /// Here we use `&str` as input type, but parsers can be generic over
31 /// the input type, work directly with `&[u8]`, or any other type that
32 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>33 pub(crate) fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
34     input: &mut Stream<'i>,
35 ) -> PResult<JsonValue, E> {
36     delimited(ws, json_value, ws).parse_next(input)
37 }
38 
39 /// `alt` is a combinator that tries multiple parsers one by one, until
40 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>41 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
42     input: &mut Stream<'i>,
43 ) -> PResult<JsonValue, E> {
44     // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different
45     // implementations.
46     dispatch!(peek(any);
47         'n' => null.value(JsonValue::Null),
48         't' => true_.map(JsonValue::Boolean),
49         'f' => false_.map(JsonValue::Boolean),
50         '"' => string.map(JsonValue::Str),
51         '+' => float.map(JsonValue::Num),
52         '-' => float.map(JsonValue::Num),
53         '0'..='9' => float.map(JsonValue::Num),
54         '[' => array.map(JsonValue::Array),
55         '{' => object.map(JsonValue::Object),
56         _ => fail,
57     )
58     .parse_next(input)
59 }
60 
61 /// `literal(string)` generates a parser that takes the argument string.
62 ///
63 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>64 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
65     // This is a parser that returns `"null"` if it sees the string "null", and
66     // an error otherwise
67     "null".parse_next(input)
68 }
69 
70 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
71 /// success.
true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>72 fn true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
73     // This is a parser that returns `true` if it sees the string "true", and
74     // an error otherwise
75     "true".value(true).parse_next(input)
76 }
77 
78 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
79 /// success.
false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>80 fn false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
81     // This is a parser that returns `false` if it sees the string "false", and
82     // an error otherwise
83     "false".value(false).parse_next(input)
84 }
85 
86 /// This parser gathers all `char`s up into a `String`with a parse to take the double quote
87 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<String, E>88 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
89     input: &mut Stream<'i>,
90 ) -> PResult<String, E> {
91     preceded(
92         '\"',
93         terminated(
94             repeat(0.., character).fold(String::new, |mut string, c| {
95                 string.push(c);
96                 string
97             }),
98             '\"',
99         ),
100     )
101     // `context` lets you add a static string to errors to provide more information in the
102     // error chain (to indicate which parser had an error)
103     .context(StrContext::Expected("string".into()))
104     .parse_next(input)
105 }
106 
107 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
108 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>109 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
110     let c = none_of('\"').parse_next(input)?;
111     if c == '\\' {
112         dispatch!(any;
113           '"' => empty.value('"'),
114           '\\' => empty.value('\\'),
115           '/'  => empty.value('/'),
116           'b' => empty.value('\x08'),
117           'f' => empty.value('\x0C'),
118           'n' => empty.value('\n'),
119           'r' => empty.value('\r'),
120           't' => empty.value('\t'),
121           'u' => unicode_escape,
122           _ => fail,
123         )
124         .parse_next(input)
125     } else {
126         Ok(c)
127     }
128 }
129 
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>130 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
131     alt((
132         // Not a surrogate
133         u16_hex
134             .verify(|cp| !(0xD800..0xE000).contains(cp))
135             .map(|cp| cp as u32),
136         // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
137         separated_pair(u16_hex, "\\u", u16_hex)
138             .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
139             .map(|(high, low)| {
140                 let high_ten = (high as u32) - 0xD800;
141                 let low_ten = (low as u32) - 0xDC00;
142                 (high_ten << 10) + low_ten + 0x10000
143             }),
144     ))
145     .verify_map(
146         // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
147         std::char::from_u32,
148     )
149     .parse_next(input)
150 }
151 
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>152 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
153     take(4usize)
154         .verify_map(|s| u16::from_str_radix(s, 16).ok())
155         .parse_next(input)
156 }
157 
158 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
159 /// accumulating results in a `Vec`, until it encounters an error.
160 /// If you want more control on the parser application, check out the `iterator`
161 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>162 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
163     input: &mut Stream<'i>,
164 ) -> PResult<Vec<JsonValue>, E> {
165     preceded(
166         ('[', ws),
167         terminated(separated(0.., json_value, (ws, ',', ws)), (ws, ']')),
168     )
169     .context(StrContext::Expected("array".into()))
170     .parse_next(input)
171 }
172 
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>173 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
174     input: &mut Stream<'i>,
175 ) -> PResult<HashMap<String, JsonValue>, E> {
176     preceded(
177         ('{', ws),
178         terminated(separated(0.., key_value, (ws, ',', ws)), (ws, '}')),
179     )
180     .context(StrContext::Expected("object".into()))
181     .parse_next(input)
182 }
183 
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>184 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
185     input: &mut Stream<'i>,
186 ) -> PResult<(String, JsonValue), E> {
187     separated_pair(string, (ws, ':', ws), json_value).parse_next(input)
188 }
189 
190 /// Parser combinators are constructed from the bottom up:
191 /// first we write parsers for the smallest elements (here a space character),
192 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>193 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
194     // Combinators like `take_while` return a function. That function is the
195     // parser,to which we can pass the input
196     take_while(0.., WS).parse_next(input)
197 }
198 
199 const WS: &[char] = &[' ', '\t', '\r', '\n'];
200 
201 #[cfg(test)]
202 mod test {
203     #[allow(clippy::useless_attribute)]
204     #[allow(unused_imports)] // its dead for benches
205     use super::*;
206 
207     #[allow(clippy::useless_attribute)]
208     #[allow(dead_code)] // its dead for benches
209     type Error = winnow::error::ContextError;
210 
211     #[test]
json_string()212     fn json_string() {
213         assert_eq!(string::<Error>.parse_peek("\"\""), Ok(("", "".to_owned())));
214         assert_eq!(
215             string::<Error>.parse_peek("\"abc\""),
216             Ok(("", "abc".to_owned()))
217         );
218         assert_eq!(
219             string::<Error>
220                 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
221             Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_owned())),
222         );
223         assert_eq!(
224             string::<Error>.parse_peek("\"\\uD83D\\uDE10\""),
225             Ok(("", "��".to_owned()))
226         );
227 
228         assert!(string::<Error>.parse_peek("\"").is_err());
229         assert!(string::<Error>.parse_peek("\"abc").is_err());
230         assert!(string::<Error>.parse_peek("\"\\\"").is_err());
231         assert!(string::<Error>.parse_peek("\"\\u123\"").is_err());
232         assert!(string::<Error>.parse_peek("\"\\uD800\"").is_err());
233         assert!(string::<Error>.parse_peek("\"\\uD800\\uD800\"").is_err());
234         assert!(string::<Error>.parse_peek("\"\\uDC00\"").is_err());
235     }
236 
237     #[test]
json_object()238     fn json_object() {
239         use JsonValue::{Num, Object, Str};
240 
241         let input = r#"{"a":42,"b":"x"}"#;
242 
243         let expected = Object(
244             vec![
245                 ("a".to_owned(), Num(42.0)),
246                 ("b".to_owned(), Str("x".to_owned())),
247             ]
248             .into_iter()
249             .collect(),
250         );
251 
252         assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
253     }
254 
255     #[test]
json_array()256     fn json_array() {
257         use JsonValue::{Array, Num, Str};
258 
259         let input = r#"[42,"x"]"#;
260 
261         let expected = Array(vec![Num(42.0), Str("x".to_owned())]);
262 
263         assert_eq!(json::<Error>.parse_peek(input), Ok(("", expected)));
264     }
265 
266     #[test]
json_whitespace()267     fn json_whitespace() {
268         use JsonValue::{Array, Boolean, Null, Num, Object, Str};
269 
270         let input = r#"
271   {
272     "null" : null,
273     "true"  :true ,
274     "false":  false  ,
275     "number" : 123e4 ,
276     "string" : " abc 123 " ,
277     "array" : [ false , 1 , "two" ] ,
278     "object" : { "a" : 1.0 , "b" : "c" } ,
279     "empty_array" : [  ] ,
280     "empty_object" : {   }
281   }
282   "#;
283 
284         assert_eq!(
285             json::<Error>.parse_peek(input),
286             Ok((
287                 "",
288                 Object(
289                     vec![
290                         ("null".to_owned(), Null),
291                         ("true".to_owned(), Boolean(true)),
292                         ("false".to_owned(), Boolean(false)),
293                         ("number".to_owned(), Num(123e4)),
294                         ("string".to_owned(), Str(" abc 123 ".to_owned())),
295                         (
296                             "array".to_owned(),
297                             Array(vec![Boolean(false), Num(1.0), Str("two".to_owned())])
298                         ),
299                         (
300                             "object".to_owned(),
301                             Object(
302                                 vec![
303                                     ("a".to_owned(), Num(1.0)),
304                                     ("b".to_owned(), Str("c".to_owned())),
305                                 ]
306                                 .into_iter()
307                                 .collect()
308                             )
309                         ),
310                         ("empty_array".to_owned(), Array(vec![]),),
311                         ("empty_object".to_owned(), Object(HashMap::new()),),
312                     ]
313                     .into_iter()
314                     .collect()
315                 )
316             ))
317         );
318     }
319 }
320