• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 #[macro_use]
11 extern crate pest;
12 
13 use std::collections::HashMap;
14 
15 use pest::error::Error;
16 use pest::iterators::{Pair, Pairs};
17 use pest::{state, ParseResult, Parser, ParserState, Span};
18 
19 #[allow(dead_code, non_camel_case_types)]
20 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
21 enum Rule {
22     json,
23     object,
24     pair,
25     array,
26     value,
27     string,
28     escape,
29     unicode,
30     hex,
31     number,
32     int,
33     exp,
34     bool,
35     null,
36 }
37 
38 struct JsonParser;
39 
40 impl Parser<Rule> for JsonParser {
parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>>41     fn parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>> {
42         fn json(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
43             value(state)
44         }
45 
46         fn object(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
47             state.rule(Rule::object, |s| {
48                 s.sequence(|s| {
49                     s.match_string("{")
50                         .and_then(skip)
51                         .and_then(pair)
52                         .and_then(skip)
53                         .and_then(|s| {
54                             s.repeat(|s| {
55                                 s.sequence(|s| {
56                                     s.match_string(",")
57                                         .and_then(skip)
58                                         .and_then(pair)
59                                         .and_then(skip)
60                                 })
61                             })
62                         })
63                         .and_then(|s| s.match_string("}"))
64                 })
65                 .or_else(|s| {
66                     s.sequence(|s| {
67                         s.match_string("{")
68                             .and_then(skip)
69                             .and_then(|s| s.match_string("}"))
70                     })
71                 })
72             })
73         }
74 
75         fn pair(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
76             state.rule(Rule::pair, |s| {
77                 s.sequence(|s| {
78                     string(s)
79                         .and_then(skip)
80                         .and_then(|s| s.match_string(":"))
81                         .and_then(skip)
82                         .and_then(value)
83                 })
84             })
85         }
86 
87         fn array(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
88             state.rule(Rule::array, |s| {
89                 s.sequence(|s| {
90                     s.match_string("[")
91                         .and_then(skip)
92                         .and_then(value)
93                         .and_then(skip)
94                         .and_then(|s| {
95                             s.repeat(|s| {
96                                 s.sequence(|s| {
97                                     s.match_string(",")
98                                         .and_then(skip)
99                                         .and_then(value)
100                                         .and_then(skip)
101                                 })
102                             })
103                         })
104                         .and_then(|s| s.match_string("]"))
105                 })
106                 .or_else(|s| {
107                     s.sequence(|s| {
108                         s.match_string("[")
109                             .and_then(skip)
110                             .and_then(|s| s.match_string("]"))
111                     })
112                 })
113             })
114         }
115 
116         fn value(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
117             state.rule(Rule::value, |s| {
118                 string(s)
119                     .or_else(number)
120                     .or_else(object)
121                     .or_else(array)
122                     .or_else(bool)
123                     .or_else(null)
124             })
125         }
126 
127         fn string(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
128             state.rule(Rule::string, |s| {
129                 s.match_string("\"")
130                     .and_then(|s| {
131                         s.repeat(|s| {
132                             escape(s).or_else(|s| {
133                                 s.sequence(|s| {
134                                     s.lookahead(false, |s| {
135                                         s.match_string("\"").or_else(|s| s.match_string("\\"))
136                                     })
137                                     .and_then(|s| s.skip(1))
138                                 })
139                             })
140                         })
141                     })
142                     .and_then(|pos| pos.match_string("\""))
143             })
144         }
145 
146         fn escape(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
147             state.sequence(|s| {
148                 s.match_string("\\").and_then(|s| {
149                     s.match_string("\"")
150                         .or_else(|s| s.match_string("\\"))
151                         .or_else(|s| s.match_string("/"))
152                         .or_else(|s| s.match_string("b"))
153                         .or_else(|s| s.match_string("f"))
154                         .or_else(|s| s.match_string("n"))
155                         .or_else(|s| s.match_string("r"))
156                         .or_else(|s| s.match_string("t"))
157                         .or_else(unicode)
158                 })
159             })
160         }
161 
162         fn unicode(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
163             state.sequence(|s| {
164                 s.match_string("u")
165                     .and_then(hex)
166                     .and_then(hex)
167                     .and_then(hex)
168             })
169         }
170 
171         fn hex(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
172             state
173                 .match_range('0'..'9')
174                 .or_else(|s| s.match_range('a'..'f'))
175                 .or_else(|s| s.match_range('A'..'F'))
176         }
177 
178         fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
179             state.rule(Rule::number, |s| {
180                 s.sequence(|s| {
181                     s.optional(|s| s.match_string("-"))
182                         .and_then(int)
183                         .and_then(|s| {
184                             s.optional(|s| {
185                                 s.sequence(|s| {
186                                     s.match_string(".")
187                                         .and_then(|s| s.match_range('0'..'9'))
188                                         .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
189                                         .and_then(|s| s.optional(exp))
190                                         .or_else(exp)
191                                 })
192                             })
193                         })
194                 })
195             })
196         }
197 
198         fn int(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
199             state.match_string("0").or_else(|s| {
200                 s.sequence(|s| {
201                     s.match_range('1'..'9')
202                         .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
203                 })
204             })
205         }
206 
207         fn exp(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
208             state.sequence(|s| {
209                 s.match_string("E")
210                     .or_else(|s| s.match_string("e"))
211                     .and_then(|s| {
212                         s.optional(|s| s.match_string("+").or_else(|s| s.match_string("-")))
213                     })
214                     .and_then(int)
215             })
216         }
217 
218         fn bool(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
219             state.rule(Rule::bool, |s| {
220                 s.match_string("true").or_else(|s| s.match_string("false"))
221             })
222         }
223 
224         fn null(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
225             state.rule(Rule::null, |s| s.match_string("null"))
226         }
227 
228         fn skip(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
229             state.repeat(|s| {
230                 s.match_string(" ")
231                     .or_else(|s| s.match_string("\t"))
232                     .or_else(|s| s.match_string("\r"))
233                     .or_else(|s| s.match_string("\n"))
234             })
235         }
236 
237         state(input, |state| match rule {
238             Rule::json => json(state),
239             Rule::object => object(state),
240             Rule::pair => pair(state),
241             Rule::array => array(state),
242             Rule::value => value(state),
243             Rule::string => string(state),
244             Rule::escape => escape(state),
245             Rule::unicode => unicode(state),
246             Rule::hex => hex(state),
247             Rule::number => number(state),
248             Rule::int => int(state),
249             Rule::exp => exp(state),
250             Rule::bool => bool(state),
251             Rule::null => null(state),
252         })
253     }
254 }
255 
256 #[derive(Debug, PartialEq)]
257 enum Json<'i> {
258     Null,
259     Bool(bool),
260     Number(f64),
261     String(Span<'i>),
262     Array(Vec<Json<'i>>),
263     Object(HashMap<Span<'i>, Json<'i>>),
264 }
265 
266 fn consume(pair: Pair<Rule>) -> Json {
267     fn value(pair: Pair<Rule>) -> Json {
268         let pair = pair.into_inner().next().unwrap();
269 
270         match pair.as_rule() {
271             Rule::null => Json::Null,
272             Rule::bool => match pair.as_str() {
273                 "false" => Json::Bool(false),
274                 "true" => Json::Bool(true),
275                 _ => unreachable!(),
276             },
277             Rule::number => Json::Number(pair.as_str().parse().unwrap()),
278             Rule::string => Json::String(pair.as_span()),
279             Rule::array => Json::Array(pair.into_inner().map(value).collect()),
280             Rule::object => {
281                 let pairs = pair.into_inner().map(|pos| {
282                     let mut pair = pos.into_inner();
283 
284                     let key = pair.next().unwrap().as_span();
285                     let value = value(pair.next().unwrap());
286 
287                     (key, value)
288                 });
289 
290                 Json::Object(pairs.collect())
291             }
292             _ => unreachable!(),
293         }
294     }
295 
296     value(pair)
297 }
298 
299 #[test]
300 fn null() {
301     parses_to! {
302         parser: JsonParser,
303         input: "null",
304         rule: Rule::null,
305         tokens: [
306             null(0, 4)
307         ]
308     };
309 }
310 
311 #[test]
312 fn bool() {
313     parses_to! {
314         parser: JsonParser,
315         input: "false",
316         rule: Rule::bool,
317         tokens: [
318             bool(0, 5)
319         ]
320     };
321 }
322 
323 #[test]
324 fn number_zero() {
325     parses_to! {
326         parser: JsonParser,
327         input: "0",
328         rule: Rule::number,
329         tokens: [
330             number(0, 1)
331         ]
332     };
333 }
334 
335 #[test]
336 fn float() {
337     parses_to! {
338         parser: JsonParser,
339         input: "100.001",
340         rule: Rule::number,
341         tokens: [
342             number(0, 7)
343         ]
344     };
345 }
346 
347 #[test]
348 fn float_with_exp() {
349     parses_to! {
350         parser: JsonParser,
351         input: "100.001E+100",
352         rule: Rule::number,
353         tokens: [
354             number(0, 12)
355         ]
356     };
357 }
358 
359 #[test]
360 fn number_minus_zero() {
361     parses_to! {
362         parser: JsonParser,
363         input: "-0",
364         rule: Rule::number,
365         tokens: [
366             number(0, 2)
367         ]
368     };
369 }
370 
371 #[test]
372 fn string_with_escapes() {
373     parses_to! {
374         parser: JsonParser,
375         input: "\"asd\\u0000\\\"\"",
376         rule: Rule::string,
377         tokens: [
378             string(0, 13)
379         ]
380     };
381 }
382 
383 #[test]
array_empty()384 fn array_empty() {
385     parses_to! {
386         parser: JsonParser,
387         input: "[ ]",
388         rule: Rule::array,
389         tokens: [
390             array(0, 3)
391         ]
392     };
393 }
394 
395 #[test]
array()396 fn array() {
397     parses_to! {
398         parser: JsonParser,
399         input: "[0.0e1, false, null, \"a\", [0]]",
400         rule: Rule::array,
401         tokens: [
402             array(0, 30, [
403                 value(1,  6, [number(1, 6)]),
404                 value(8, 13, [bool(8, 13)]),
405                 value(15, 19, [null(15, 19)]),
406                 value(21, 24, [string(21, 24)]),
407                 value(26, 29, [
408                     array(26, 29, [
409                         value(27, 28, [number(27, 28)])
410                     ])
411                 ])
412             ])
413         ]
414     };
415 }
416 
417 #[test]
object()418 fn object() {
419     parses_to! {
420         parser: JsonParser,
421         input: "{\"a\" : 3, \"b\" : [{}, 3]}",
422         rule: Rule::object,
423         tokens: [
424             object(0, 24, [
425                 pair(1, 8, [
426                     string(1, 4),
427                     value(7, 8, [number(7, 8)])
428                 ]),
429                 pair(10, 23, [
430                     string(10, 13),
431                     value(16, 23, [
432                         array(16, 23, [
433                             value(17, 19, [object(17, 19)]),
434                             value(21, 22, [number(21, 22)])
435                         ])
436                     ])
437                 ])
438             ])
439         ]
440     };
441 }
442 
443 #[test]
ast()444 fn ast() {
445     let input = "{\"a\": [null, true, 3.4]}";
446 
447     let ast = consume(
448         JsonParser::parse(Rule::json, input)
449             .unwrap()
450             .next()
451             .unwrap(),
452     );
453 
454     if let Json::Object(pairs) = ast {
455         let vals: Vec<&Json> = pairs.values().collect();
456 
457         assert_eq!(
458             **vals.get(0).unwrap(),
459             Json::Array(vec![Json::Null, Json::Bool(true), Json::Number(3.4)])
460         );
461     }
462 }
463