1 #![cfg(feature = "alloc")]
2
3 use nom::{
4 branch::alt,
5 bytes::complete::{escaped, tag, take_while},
6 character::complete::{alphanumeric1 as alphanumeric, char, one_of},
7 combinator::{cut, map, opt, value},
8 error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
9 multi::separated_list0,
10 number::complete::double,
11 sequence::{delimited, preceded, separated_pair, terminated},
12 Err, IResult,
13 };
14 use std::collections::HashMap;
15 use std::str;
16
17 #[derive(Debug, PartialEq)]
18 pub enum JsonValue {
19 Null,
20 Str(String),
21 Boolean(bool),
22 Num(f64),
23 Array(Vec<JsonValue>),
24 Object(HashMap<String, JsonValue>),
25 }
26
27 /// parser combinators are constructed from the bottom up:
28 /// first we write parsers for the smallest elements (here a space character),
29 /// then we'll combine them in larger parsers
sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E>30 fn sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
31 let chars = " \t\r\n";
32
33 // nom combinators like `take_while` return a function. That function is the
34 // parser,to which we can pass the input
35 take_while(move |c| chars.contains(c))(i)
36 }
37
38 /// A nom parser has the following signature:
39 /// `Input -> IResult<Input, Output, Error>`, with `IResult` defined as:
40 /// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;`
41 ///
42 /// most of the times you can ignore the error type and use the default (but this
43 /// examples shows custom error types later on!)
44 ///
45 /// Here we use `&str` as input type, but nom parsers can be generic over
46 /// the input type, and work directly with `&[u8]` or any other type that
47 /// implements the required traits.
48 ///
49 /// Finally, we can see here that the input and output type are both `&str`
50 /// with the same lifetime tag. This means that the produced value is a subslice
51 /// of the input data. and there is no allocation needed. This is the main idea
52 /// behind nom's performance.
parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E>53 fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
54 escaped(alphanumeric, '\\', one_of("\"n\\"))(i)
55 }
56
57 /// `tag(string)` generates a parser that recognizes the argument string.
58 ///
59 /// we can combine it with other functions, like `value` that takes another
60 /// parser, and if that parser returns without an error, returns a given
61 /// constant value.
62 ///
63 /// `alt` is another combinator that tries multiple parsers one by one, until
64 /// one of them succeeds
65 fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
66 // This is a parser that returns `true` if it sees the string "true", and
67 // an error otherwise
68 let parse_true = value(true, tag("true"));
69
70 // This is a parser that returns `false` if it sees the string "false", and
71 // an error otherwise
72 let parse_false = value(false, tag("false"));
73
74 // `alt` combines the two parsers. It returns the result of the first
75 // successful parser, or an error
76 alt((parse_true, parse_false))(input)
77 }
78
79 fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
80 value((), tag("null"))(input)
81 }
82
83 /// this parser combines the previous `parse_str` parser, that recognizes the
84 /// interior of a string, with a parse to recognize the double quote character,
85 /// before the string (using `preceded`) and after the string (using `terminated`).
86 ///
87 /// `context` and `cut` are related to error management:
88 /// - `cut` transforms an `Err::Error(e)` in `Err::Failure(e)`, signaling to
89 /// combinators like `alt` that they should not try other parsers. We were in the
90 /// right branch (since we found the `"` character) but encountered an error when
91 /// parsing the string
92 /// - `context` lets you add a static string to provide more information in the
93 /// error chain (to indicate which parser had an error)
string<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, &'a str, E>94 fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
95 i: &'a str,
96 ) -> IResult<&'a str, &'a str, E> {
97 context(
98 "string",
99 preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
100 )(i)
101 }
102
103 /// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
104 /// accumulating results in a `Vec`, until it encounters an error.
105 /// If you want more control on the parser application, check out the `iterator`
106 /// combinator (cf `examples/iterator.rs`)
array<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, Vec<JsonValue>, E>107 fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
108 i: &'a str,
109 ) -> IResult<&'a str, Vec<JsonValue>, E> {
110 context(
111 "array",
112 preceded(
113 char('['),
114 cut(terminated(
115 separated_list0(preceded(sp, char(',')), json_value),
116 preceded(sp, char(']')),
117 )),
118 ),
119 )(i)
120 }
121
key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, (&'a str, JsonValue), E>122 fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
123 i: &'a str,
124 ) -> IResult<&'a str, (&'a str, JsonValue), E> {
125 separated_pair(
126 preceded(sp, string),
127 cut(preceded(sp, char(':'))),
128 json_value,
129 )(i)
130 }
131
hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, HashMap<String, JsonValue>, E>132 fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
133 i: &'a str,
134 ) -> IResult<&'a str, HashMap<String, JsonValue>, E> {
135 context(
136 "map",
137 preceded(
138 char('{'),
139 cut(terminated(
140 map(
141 separated_list0(preceded(sp, char(',')), key_value),
142 |tuple_vec| {
143 tuple_vec
144 .into_iter()
145 .map(|(k, v)| (String::from(k), v))
146 .collect()
147 },
148 ),
149 preceded(sp, char('}')),
150 )),
151 ),
152 )(i)
153 }
154
155 /// here, we apply the space parser before trying to parse a value
json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, JsonValue, E>156 fn json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
157 i: &'a str,
158 ) -> IResult<&'a str, JsonValue, E> {
159 preceded(
160 sp,
161 alt((
162 map(hash, JsonValue::Object),
163 map(array, JsonValue::Array),
164 map(string, |s| JsonValue::Str(String::from(s))),
165 map(double, JsonValue::Num),
166 map(boolean, JsonValue::Boolean),
167 map(null, |_| JsonValue::Null),
168 )),
169 )(i)
170 }
171
172 /// the root element of a JSON parser is either an object or an array
root<'a, E: ParseError<&'a str> + ContextError<&'a str>>( i: &'a str, ) -> IResult<&'a str, JsonValue, E>173 fn root<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
174 i: &'a str,
175 ) -> IResult<&'a str, JsonValue, E> {
176 delimited(
177 sp,
178 alt((
179 map(hash, JsonValue::Object),
180 map(array, JsonValue::Array),
181 map(null, |_| JsonValue::Null),
182 )),
183 opt(sp),
184 )(i)
185 }
186
main()187 fn main() {
188 let data = " { \"a\"\t: 42,
189 \"b\": [ \"x\", \"y\", 12 ] ,
190 \"c\": { \"hello\" : \"world\"
191 }
192 } ";
193
194 println!(
195 "will try to parse valid JSON data:\n\n**********\n{}\n**********\n",
196 data
197 );
198
199 // this will print:
200 // Ok(
201 // (
202 // "",
203 // Object(
204 // {
205 // "b": Array(
206 // [
207 // Str(
208 // "x",
209 // ),
210 // Str(
211 // "y",
212 // ),
213 // Num(
214 // 12.0,
215 // ),
216 // ],
217 // ),
218 // "c": Object(
219 // {
220 // "hello": Str(
221 // "world",
222 // ),
223 // },
224 // ),
225 // "a": Num(
226 // 42.0,
227 // ),
228 // },
229 // ),
230 // ),
231 // )
232 println!(
233 "parsing a valid file:\n{:#?}\n",
234 root::<(&str, ErrorKind)>(data)
235 );
236
237 let data = " { \"a\"\t: 42,
238 \"b\": [ \"x\", \"y\", 12 ] ,
239 \"c\": { 1\"hello\" : \"world\"
240 }
241 } ";
242
243 println!(
244 "will try to parse invalid JSON data:\n\n**********\n{}\n**********\n",
245 data
246 );
247
248 // here we use `(Input, ErrorKind)` as error type, which is used by default
249 // if you don't specify it. It contains the position of the error and some
250 // info on which parser encountered it.
251 // It is fast and small, but does not provide much context.
252 //
253 // This will print:
254 // basic errors - `root::<(&str, ErrorKind)>(data)`:
255 // Err(
256 // Failure(
257 // (
258 // "1\"hello\" : \"world\"\n }\n } ",
259 // Char,
260 // ),
261 // ),
262 // )
263 println!(
264 "basic errors - `root::<(&str, ErrorKind)>(data)`:\n{:#?}\n",
265 root::<(&str, ErrorKind)>(data)
266 );
267
268 // nom also provides `the `VerboseError<Input>` type, which will generate a sort
269 // of backtrace of the path through the parser, accumulating info on input positions
270 // and affected parsers.
271 //
272 // This will print:
273 //
274 // parsed verbose: Err(
275 // Failure(
276 // VerboseError {
277 // errors: [
278 // (
279 // "1\"hello\" : \"world\"\n }\n } ",
280 // Char(
281 // '}',
282 // ),
283 // ),
284 // (
285 // "{ 1\"hello\" : \"world\"\n }\n } ",
286 // Context(
287 // "map",
288 // ),
289 // ),
290 // (
291 // "{ \"a\"\t: 42,\n \"b\": [ \"x\", \"y\", 12 ] ,\n \"c\": { 1\"hello\" : \"world\"\n }\n } ",
292 // Context(
293 // "map",
294 // ),
295 // ),
296 // ],
297 // },
298 // ),
299 // )
300 println!("parsed verbose: {:#?}", root::<VerboseError<&str>>(data));
301
302 match root::<VerboseError<&str>>(data) {
303 Err(Err::Error(e)) | Err(Err::Failure(e)) => {
304 // here we use the `convert_error` function, to transform a `VerboseError<&str>`
305 // into a printable trace.
306 //
307 // This will print:
308 // verbose errors - `root::<VerboseError>(data)`:
309 // 0: at line 2:
310 // "c": { 1"hello" : "world"
311 // ^
312 // expected '}', found 1
313 //
314 // 1: at line 2, in map:
315 // "c": { 1"hello" : "world"
316 // ^
317 //
318 // 2: at line 0, in map:
319 // { "a" : 42,
320 // ^
321 println!(
322 "verbose errors - `root::<VerboseError>(data)`:\n{}",
323 convert_error(data, e)
324 );
325 }
326 _ => {}
327 }
328
329 assert!(root::<(&str, ErrorKind)>("null").is_ok());
330 }
331