• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::HashMap;
2 use std::str;
3 
4 use winnow::prelude::*;
5 use winnow::{
6     ascii::float,
7     ascii::line_ending,
8     combinator::alt,
9     combinator::cut_err,
10     combinator::{delimited, preceded, separated_pair, terminated},
11     combinator::{repeat, separated},
12     error::{AddContext, ParserError, StrContext},
13     stream::Partial,
14     token::{any, none_of, take, take_while},
15 };
16 
17 #[derive(Debug, PartialEq, Clone)]
18 pub(crate) enum JsonValue {
19     Null,
20     Boolean(bool),
21     Str(String),
22     Num(f64),
23     Array(Vec<JsonValue>),
24     Object(HashMap<String, JsonValue>),
25 }
26 
27 /// Use `Partial` to cause `ErrMode::Incomplete` while parsing
28 pub(crate) type Stream<'i> = Partial<&'i str>;
29 
ndjson<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Option<JsonValue>, E>30 pub(crate) fn ndjson<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
31     input: &mut Stream<'i>,
32 ) -> PResult<Option<JsonValue>, E> {
33     alt((
34         terminated(delimited(ws, json_value, ws), line_ending).map(Some),
35         line_ending.value(None),
36     ))
37     .parse_next(input)
38 }
39 
40 // --Besides `WS`, same as a regular json parser ----------------------------
41 
42 /// `alt` is a combinator that tries multiple parsers one by one, until
43 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>44 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
45     input: &mut Stream<'i>,
46 ) -> PResult<JsonValue, E> {
47     // `alt` combines the each value parser. It returns the result of the first
48     // successful parser, or an error
49     alt((
50         null.value(JsonValue::Null),
51         boolean.map(JsonValue::Boolean),
52         string.map(JsonValue::Str),
53         float.map(JsonValue::Num),
54         array.map(JsonValue::Array),
55         object.map(JsonValue::Object),
56     ))
57     .parse_next(input)
58 }
59 
60 /// `literal(string)` generates a parser that takes the argument string.
61 ///
62 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>63 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
64     // This is a parser that returns `"null"` if it sees the string "null", and
65     // an error otherwise
66     "null".parse_next(input)
67 }
68 
69 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
70 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>71 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
72     // This is a parser that returns `true` if it sees the string "true", and
73     // an error otherwise
74     let parse_true = "true".value(true);
75 
76     // This is a parser that returns `false` if it sees the string "false", and
77     // an error otherwise
78     let parse_false = "false".value(false);
79 
80     alt((parse_true, parse_false)).parse_next(input)
81 }
82 
83 /// This parser gathers all `char`s up into a `String`with a parse to take the double quote
84 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<String, E>85 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
86     input: &mut Stream<'i>,
87 ) -> PResult<String, E> {
88     preceded(
89         '\"',
90         // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
91         // combinators like  `alt` that they should not try other parsers. We were in the
92         // right branch (since we found the `"` character) but encountered an error when
93         // parsing the string
94         cut_err(terminated(
95             repeat(0.., character).fold(String::new, |mut string, c| {
96                 string.push(c);
97                 string
98             }),
99             '\"',
100         )),
101     )
102     // `context` lets you add a static string to errors to provide more information in the
103     // error chain (to indicate which parser had an error)
104     .context(StrContext::Expected("string".into()))
105     .parse_next(input)
106 }
107 
108 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
109 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>110 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
111     let c = none_of('"').parse_next(input)?;
112     if c == '\\' {
113         alt((
114             any.verify_map(|c| {
115                 Some(match c {
116                     '"' | '\\' | '/' => c,
117                     'b' => '\x08',
118                     'f' => '\x0C',
119                     'n' => '\n',
120                     'r' => '\r',
121                     't' => '\t',
122                     _ => return None,
123                 })
124             }),
125             preceded('u', unicode_escape),
126         ))
127         .parse_next(input)
128     } else {
129         Ok(c)
130     }
131 }
132 
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>133 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
134     alt((
135         // Not a surrogate
136         u16_hex
137             .verify(|cp| !(0xD800..0xE000).contains(cp))
138             .map(|cp| cp as u32),
139         // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
140         separated_pair(u16_hex, "\\u", u16_hex)
141             .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
142             .map(|(high, low)| {
143                 let high_ten = (high as u32) - 0xD800;
144                 let low_ten = (low as u32) - 0xDC00;
145                 (high_ten << 10) + low_ten + 0x10000
146             }),
147     ))
148     .verify_map(
149         // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
150         std::char::from_u32,
151     )
152     .parse_next(input)
153 }
154 
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>155 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
156     take(4usize)
157         .verify_map(|s| u16::from_str_radix(s, 16).ok())
158         .parse_next(input)
159 }
160 
161 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
162 /// accumulating results in a `Vec`, until it encounters an error.
163 /// If you want more control on the parser application, check out the `iterator`
164 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>165 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
166     input: &mut Stream<'i>,
167 ) -> PResult<Vec<JsonValue>, E> {
168     preceded(
169         ('[', ws),
170         cut_err(terminated(
171             separated(0.., json_value, (ws, ',', ws)),
172             (ws, ']'),
173         )),
174     )
175     .context(StrContext::Expected("array".into()))
176     .parse_next(input)
177 }
178 
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>179 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
180     input: &mut Stream<'i>,
181 ) -> PResult<HashMap<String, JsonValue>, E> {
182     preceded(
183         ('{', ws),
184         cut_err(terminated(
185             separated(0.., key_value, (ws, ',', ws)),
186             (ws, '}'),
187         )),
188     )
189     .context(StrContext::Expected("object".into()))
190     .parse_next(input)
191 }
192 
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>193 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, StrContext>>(
194     input: &mut Stream<'i>,
195 ) -> PResult<(String, JsonValue), E> {
196     separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
197 }
198 
199 /// Parser combinators are constructed from the bottom up:
200 /// first we write parsers for the smallest elements (here a space character),
201 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>202 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
203     // Combinators like `take_while` return a function. That function is the
204     // parser,to which we can pass the input
205     take_while(0.., WS).parse_next(input)
206 }
207 
208 const WS: &[char] = &[' ', '\t'];
209 
210 #[cfg(test)]
211 mod test {
212     #[allow(clippy::useless_attribute)]
213     #[allow(unused_imports)] // its dead for benches
214     use super::*;
215 
216     #[allow(clippy::useless_attribute)]
217     #[allow(dead_code)] // its dead for benches
218     type Error = winnow::error::ContextError;
219 
220     #[test]
json_string()221     fn json_string() {
222         assert_eq!(
223             string::<Error>.parse_peek(Partial::new("\"\"")),
224             Ok((Partial::new(""), "".to_owned()))
225         );
226         assert_eq!(
227             string::<Error>.parse_peek(Partial::new("\"abc\"")),
228             Ok((Partial::new(""), "abc".to_owned()))
229         );
230         assert_eq!(
231             string::<Error>.parse_peek(Partial::new(
232                 "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""
233             )),
234             Ok((
235                 Partial::new(""),
236                 "abc\"\\/\x08\x0C\n\r\t\x01——def".to_owned()
237             )),
238         );
239         assert_eq!(
240             string::<Error>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")),
241             Ok((Partial::new(""), "��".to_owned()))
242         );
243 
244         assert!(string::<Error>.parse_peek(Partial::new("\"")).is_err());
245         assert!(string::<Error>.parse_peek(Partial::new("\"abc")).is_err());
246         assert!(string::<Error>.parse_peek(Partial::new("\"\\\"")).is_err());
247         assert!(string::<Error>
248             .parse_peek(Partial::new("\"\\u123\""))
249             .is_err());
250         assert!(string::<Error>
251             .parse_peek(Partial::new("\"\\uD800\""))
252             .is_err());
253         assert!(string::<Error>
254             .parse_peek(Partial::new("\"\\uD800\\uD800\""))
255             .is_err());
256         assert!(string::<Error>
257             .parse_peek(Partial::new("\"\\uDC00\""))
258             .is_err());
259     }
260 
261     #[test]
json_object()262     fn json_object() {
263         use JsonValue::{Num, Object, Str};
264 
265         let input = r#"{"a":42,"b":"x"}
266 "#;
267 
268         let expected = Object(
269             vec![
270                 ("a".to_owned(), Num(42.0)),
271                 ("b".to_owned(), Str("x".to_owned())),
272             ]
273             .into_iter()
274             .collect(),
275         );
276 
277         assert_eq!(
278             ndjson::<Error>.parse_peek(Partial::new(input)),
279             Ok((Partial::new(""), Some(expected)))
280         );
281     }
282 
283     #[test]
json_array()284     fn json_array() {
285         use JsonValue::{Array, Num, Str};
286 
287         let input = r#"[42,"x"]
288 "#;
289 
290         let expected = Array(vec![Num(42.0), Str("x".to_owned())]);
291 
292         assert_eq!(
293             ndjson::<Error>.parse_peek(Partial::new(input)),
294             Ok((Partial::new(""), Some(expected)))
295         );
296     }
297 
298     #[test]
json_whitespace()299     fn json_whitespace() {
300         use JsonValue::{Array, Boolean, Null, Num, Object, Str};
301 
302         let input = r#"  {    "null" : null,    "true"  :true ,    "false":  false  ,    "number" : 123e4 ,    "string" : " abc 123 " ,    "array" : [ false , 1 , "two" ] ,    "object" : { "a" : 1.0 , "b" : "c" } ,    "empty_array" : [  ] ,    "empty_object" : {   }  }
303 "#;
304 
305         assert_eq!(
306             ndjson::<Error>.parse_peek(Partial::new(input)),
307             Ok((
308                 Partial::new(""),
309                 Some(Object(
310                     vec![
311                         ("null".to_owned(), Null),
312                         ("true".to_owned(), Boolean(true)),
313                         ("false".to_owned(), Boolean(false)),
314                         ("number".to_owned(), Num(123e4)),
315                         ("string".to_owned(), Str(" abc 123 ".to_owned())),
316                         (
317                             "array".to_owned(),
318                             Array(vec![Boolean(false), Num(1.0), Str("two".to_owned())])
319                         ),
320                         (
321                             "object".to_owned(),
322                             Object(
323                                 vec![
324                                     ("a".to_owned(), Num(1.0)),
325                                     ("b".to_owned(), Str("c".to_owned())),
326                                 ]
327                                 .into_iter()
328                                 .collect()
329                             )
330                         ),
331                         ("empty_array".to_owned(), Array(vec![]),),
332                         ("empty_object".to_owned(), Object(HashMap::new()),),
333                     ]
334                     .into_iter()
335                     .collect()
336                 ))
337             ))
338         );
339     }
340 }
341