• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![cfg(feature = "alloc")]
2 
3 use nom::{
4   branch::alt,
5   bytes::complete::{escaped, tag, take_while},
6   character::complete::{alphanumeric1 as alphanumeric, char, one_of},
7   combinator::{cut, map, opt, value},
8   error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
9   multi::separated_list0,
10   number::complete::double,
11   sequence::{delimited, preceded, separated_pair, terminated},
12   Err, IResult,
13 };
14 use std::collections::HashMap;
15 use std::str;
16 
17 #[derive(Debug, PartialEq)]
18 pub enum JsonValue {
19   Null,
20   Str(String),
21   Boolean(bool),
22   Num(f64),
23   Array(Vec<JsonValue>),
24   Object(HashMap<String, JsonValue>),
25 }
26 
27 /// parser combinators are constructed from the bottom up:
28 /// first we write parsers for the smallest elements (here a space character),
29 /// then we'll combine them in larger parsers
sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E>30 fn sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
31   let chars = " \t\r\n";
32 
33   // nom combinators like `take_while` return a function. That function is the
34   // parser,to which we can pass the input
35   take_while(move |c| chars.contains(c))(i)
36 }
37 
38 /// A nom parser has the following signature:
39 /// `Input -> IResult<Input, Output, Error>`, with `IResult` defined as:
40 /// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;`
41 ///
42 /// most of the times you can ignore the error type and use the default (but this
43 /// examples shows custom error types later on!)
44 ///
45 /// Here we use `&str` as input type, but nom parsers can be generic over
46 /// the input type, and work directly with `&[u8]` or any other type that
47 /// implements the required traits.
48 ///
49 /// Finally, we can see here that the input and output type are both `&str`
50 /// with the same lifetime tag. This means that the produced value is a subslice
51 /// of the input data. and there is no allocation needed. This is the main idea
52 /// behind nom's performance.
parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E>53 fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
54   escaped(alphanumeric, '\\', one_of("\"n\\"))(i)
55 }
56 
57 /// `tag(string)` generates a parser that recognizes the argument string.
58 ///
59 /// we can combine it with other functions, like `value` that takes another
60 /// parser, and if that parser returns without an error, returns a given
61 /// constant value.
62 ///
63 /// `alt` is another combinator that tries multiple parsers one by one, until
64 /// one of them succeeds
65 fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
66   // This is a parser that returns `true` if it sees the string "true", and
67   // an error otherwise
68   let parse_true = value(true, tag("true"));
69 
70   // This is a parser that returns `false` if it sees the string "false", and
71   // an error otherwise
72   let parse_false = value(false, tag("false"));
73 
74   // `alt` combines the two parsers. It returns the result of the first
75   // successful parser, or an error
76   alt((parse_true, parse_false))(input)
77 }
78 
79 fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
80   value((), tag("null"))(input)
81 }
82 
83 /// this parser combines the previous `parse_str` parser, that recognizes the
84 /// interior of a string, with a parse to recognize the double quote character,
85 /// before the string (using `preceded`) and after the string (using `terminated`).
86 ///
87 /// `context` and `cut` are related to error management:
88 /// - `cut` transforms an `Err::Error(e)` in `Err::Failure(e)`, signaling to
89 /// combinators like  `alt` that they should not try other parsers. We were in the
90 /// right branch (since we found the `"` character) but encountered an error when
91 /// parsing the string
92 /// - `context` lets you add a static string to provide more information in the
93 /// error chain (to indicate which parser had an error)
string<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, &'a str, E>94 fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
95   i: &'a str,
96 ) -> IResult<&'a str, &'a str, E> {
97   context(
98     "string",
99     preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
100   )(i)
101 }
102 
103 /// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
104 /// accumulating results in a `Vec`, until it encounters an error.
105 /// If you want more control on the parser application, check out the `iterator`
106 /// combinator (cf `examples/iterator.rs`)
array<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, Vec<JsonValue>, E>107 fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
108   i: &'a str,
109 ) -> IResult<&'a str, Vec<JsonValue>, E> {
110   context(
111     "array",
112     preceded(
113       char('['),
114       cut(terminated(
115         separated_list0(preceded(sp, char(',')), json_value),
116         preceded(sp, char(']')),
117       )),
118     ),
119   )(i)
120 }
121 
key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, (&'a str, JsonValue), E>122 fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
123   i: &'a str,
124 ) -> IResult<&'a str, (&'a str, JsonValue), E> {
125   separated_pair(
126     preceded(sp, string),
127     cut(preceded(sp, char(':'))),
128     json_value,
129   )(i)
130 }
131 
hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, HashMap<String, JsonValue>, E>132 fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
133   i: &'a str,
134 ) -> IResult<&'a str, HashMap<String, JsonValue>, E> {
135   context(
136     "map",
137     preceded(
138       char('{'),
139       cut(terminated(
140         map(
141           separated_list0(preceded(sp, char(',')), key_value),
142           |tuple_vec| {
143             tuple_vec
144               .into_iter()
145               .map(|(k, v)| (String::from(k), v))
146               .collect()
147           },
148         ),
149         preceded(sp, char('}')),
150       )),
151     ),
152   )(i)
153 }
154 
155 /// here, we apply the space parser before trying to parse a value
json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, JsonValue, E>156 fn json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
157   i: &'a str,
158 ) -> IResult<&'a str, JsonValue, E> {
159   preceded(
160     sp,
161     alt((
162       map(hash, JsonValue::Object),
163       map(array, JsonValue::Array),
164       map(string, |s| JsonValue::Str(String::from(s))),
165       map(double, JsonValue::Num),
166       map(boolean, JsonValue::Boolean),
167       map(null, |_| JsonValue::Null),
168     )),
169   )(i)
170 }
171 
172 /// the root element of a JSON parser is either an object or an array
root<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, JsonValue, E>173 fn root<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
174   i: &'a str,
175 ) -> IResult<&'a str, JsonValue, E> {
176   delimited(
177     sp,
178     alt((
179       map(hash, JsonValue::Object),
180       map(array, JsonValue::Array),
181       map(null, |_| JsonValue::Null),
182     )),
183     opt(sp),
184   )(i)
185 }
186 
main()187 fn main() {
188   let data = "  { \"a\"\t: 42,
189   \"b\": [ \"x\", \"y\", 12 ] ,
190   \"c\": { \"hello\" : \"world\"
191   }
192   } ";
193 
194   println!(
195     "will try to parse valid JSON data:\n\n**********\n{}\n**********\n",
196     data
197   );
198 
199   // this will print:
200   // Ok(
201   //     (
202   //         "",
203   //         Object(
204   //             {
205   //                 "b": Array(
206   //                     [
207   //                         Str(
208   //                             "x",
209   //                         ),
210   //                         Str(
211   //                             "y",
212   //                         ),
213   //                         Num(
214   //                             12.0,
215   //                         ),
216   //                     ],
217   //                 ),
218   //                 "c": Object(
219   //                     {
220   //                         "hello": Str(
221   //                             "world",
222   //                         ),
223   //                     },
224   //                 ),
225   //                 "a": Num(
226   //                     42.0,
227   //                 ),
228   //             },
229   //         ),
230   //     ),
231   // )
232   println!(
233     "parsing a valid file:\n{:#?}\n",
234     root::<(&str, ErrorKind)>(data)
235   );
236 
237   let data = "  { \"a\"\t: 42,
238   \"b\": [ \"x\", \"y\", 12 ] ,
239   \"c\": { 1\"hello\" : \"world\"
240   }
241   } ";
242 
243   println!(
244     "will try to parse invalid JSON data:\n\n**********\n{}\n**********\n",
245     data
246   );
247 
248   // here we use `(Input, ErrorKind)` as error type, which is used by default
249   // if you don't specify it. It contains the position of the error and some
250   // info on which parser encountered it.
251   // It is fast and small, but does not provide much context.
252   //
253   // This will print:
254   // basic errors - `root::<(&str, ErrorKind)>(data)`:
255   // Err(
256   //   Failure(
257   //       (
258   //           "1\"hello\" : \"world\"\n  }\n  } ",
259   //           Char,
260   //       ),
261   //   ),
262   // )
263   println!(
264     "basic errors - `root::<(&str, ErrorKind)>(data)`:\n{:#?}\n",
265     root::<(&str, ErrorKind)>(data)
266   );
267 
268   // nom also provides `the `VerboseError<Input>` type, which will generate a sort
269   // of backtrace of the path through the parser, accumulating info on input positions
270   // and affected parsers.
271   //
272   // This will print:
273   //
274   // parsed verbose: Err(
275   //   Failure(
276   //       VerboseError {
277   //           errors: [
278   //               (
279   //                   "1\"hello\" : \"world\"\n  }\n  } ",
280   //                   Char(
281   //                       '}',
282   //                   ),
283   //               ),
284   //               (
285   //                   "{ 1\"hello\" : \"world\"\n  }\n  } ",
286   //                   Context(
287   //                       "map",
288   //                   ),
289   //               ),
290   //               (
291   //                   "{ \"a\"\t: 42,\n  \"b\": [ \"x\", \"y\", 12 ] ,\n  \"c\": { 1\"hello\" : \"world\"\n  }\n  } ",
292   //                   Context(
293   //                       "map",
294   //                   ),
295   //               ),
296   //           ],
297   //       },
298   //   ),
299   // )
300   println!("parsed verbose: {:#?}", root::<VerboseError<&str>>(data));
301 
302   match root::<VerboseError<&str>>(data) {
303     Err(Err::Error(e)) | Err(Err::Failure(e)) => {
304       // here we use the `convert_error` function, to transform a `VerboseError<&str>`
305       // into a printable trace.
306       //
307       // This will print:
308       // verbose errors - `root::<VerboseError>(data)`:
309       // 0: at line 2:
310       //   "c": { 1"hello" : "world"
311       //          ^
312       // expected '}', found 1
313       //
314       // 1: at line 2, in map:
315       //   "c": { 1"hello" : "world"
316       //        ^
317       //
318       // 2: at line 0, in map:
319       //   { "a" : 42,
320       //   ^
321       println!(
322         "verbose errors - `root::<VerboseError>(data)`:\n{}",
323         convert_error(data, e)
324       );
325     }
326     _ => {}
327   }
328 
329   assert!(root::<(&str, ErrorKind)>("null").is_ok());
330 }
331