1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 #[macro_use]
11 extern crate pest;
12
13 use std::collections::HashMap;
14
15 use pest::error::Error;
16 use pest::iterators::{Pair, Pairs};
17 use pest::{state, ParseResult, Parser, ParserState, Span};
18
19 #[allow(dead_code, non_camel_case_types)]
20 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
21 enum Rule {
22 json,
23 object,
24 pair,
25 array,
26 value,
27 string,
28 escape,
29 unicode,
30 hex,
31 number,
32 int,
33 exp,
34 bool,
35 null,
36 }
37
38 struct JsonParser;
39
40 impl Parser<Rule> for JsonParser {
parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>>41 fn parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>> {
42 fn json(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
43 value(state)
44 }
45
46 fn object(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
47 state.rule(Rule::object, |s| {
48 s.sequence(|s| {
49 s.match_string("{")
50 .and_then(skip)
51 .and_then(pair)
52 .and_then(skip)
53 .and_then(|s| {
54 s.repeat(|s| {
55 s.sequence(|s| {
56 s.match_string(",")
57 .and_then(skip)
58 .and_then(pair)
59 .and_then(skip)
60 })
61 })
62 })
63 .and_then(|s| s.match_string("}"))
64 })
65 .or_else(|s| {
66 s.sequence(|s| {
67 s.match_string("{")
68 .and_then(skip)
69 .and_then(|s| s.match_string("}"))
70 })
71 })
72 })
73 }
74
75 fn pair(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
76 state.rule(Rule::pair, |s| {
77 s.sequence(|s| {
78 string(s)
79 .and_then(skip)
80 .and_then(|s| s.match_string(":"))
81 .and_then(skip)
82 .and_then(value)
83 })
84 })
85 }
86
87 fn array(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
88 state.rule(Rule::array, |s| {
89 s.sequence(|s| {
90 s.match_string("[")
91 .and_then(skip)
92 .and_then(value)
93 .and_then(skip)
94 .and_then(|s| {
95 s.repeat(|s| {
96 s.sequence(|s| {
97 s.match_string(",")
98 .and_then(skip)
99 .and_then(value)
100 .and_then(skip)
101 })
102 })
103 })
104 .and_then(|s| s.match_string("]"))
105 })
106 .or_else(|s| {
107 s.sequence(|s| {
108 s.match_string("[")
109 .and_then(skip)
110 .and_then(|s| s.match_string("]"))
111 })
112 })
113 })
114 }
115
116 fn value(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
117 state.rule(Rule::value, |s| {
118 string(s)
119 .or_else(number)
120 .or_else(object)
121 .or_else(array)
122 .or_else(bool)
123 .or_else(null)
124 })
125 }
126
127 fn string(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
128 state.rule(Rule::string, |s| {
129 s.match_string("\"")
130 .and_then(|s| {
131 s.repeat(|s| {
132 escape(s).or_else(|s| {
133 s.sequence(|s| {
134 s.lookahead(false, |s| {
135 s.match_string("\"").or_else(|s| s.match_string("\\"))
136 })
137 .and_then(|s| s.skip(1))
138 })
139 })
140 })
141 })
142 .and_then(|pos| pos.match_string("\""))
143 })
144 }
145
146 fn escape(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
147 state.sequence(|s| {
148 s.match_string("\\").and_then(|s| {
149 s.match_string("\"")
150 .or_else(|s| s.match_string("\\"))
151 .or_else(|s| s.match_string("/"))
152 .or_else(|s| s.match_string("b"))
153 .or_else(|s| s.match_string("f"))
154 .or_else(|s| s.match_string("n"))
155 .or_else(|s| s.match_string("r"))
156 .or_else(|s| s.match_string("t"))
157 .or_else(unicode)
158 })
159 })
160 }
161
162 fn unicode(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
163 state.sequence(|s| {
164 s.match_string("u")
165 .and_then(hex)
166 .and_then(hex)
167 .and_then(hex)
168 })
169 }
170
171 fn hex(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
172 state
173 .match_range('0'..'9')
174 .or_else(|s| s.match_range('a'..'f'))
175 .or_else(|s| s.match_range('A'..'F'))
176 }
177
178 fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
179 state.rule(Rule::number, |s| {
180 s.sequence(|s| {
181 s.optional(|s| s.match_string("-"))
182 .and_then(int)
183 .and_then(|s| {
184 s.optional(|s| {
185 s.sequence(|s| {
186 s.match_string(".")
187 .and_then(|s| s.match_range('0'..'9'))
188 .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
189 .and_then(|s| s.optional(exp))
190 .or_else(exp)
191 })
192 })
193 })
194 })
195 })
196 }
197
198 fn int(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
199 state.match_string("0").or_else(|s| {
200 s.sequence(|s| {
201 s.match_range('1'..'9')
202 .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
203 })
204 })
205 }
206
207 fn exp(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
208 state.sequence(|s| {
209 s.match_string("E")
210 .or_else(|s| s.match_string("e"))
211 .and_then(|s| {
212 s.optional(|s| s.match_string("+").or_else(|s| s.match_string("-")))
213 })
214 .and_then(int)
215 })
216 }
217
218 fn bool(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
219 state.rule(Rule::bool, |s| {
220 s.match_string("true").or_else(|s| s.match_string("false"))
221 })
222 }
223
224 fn null(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
225 state.rule(Rule::null, |s| s.match_string("null"))
226 }
227
228 fn skip(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
229 state.repeat(|s| {
230 s.match_string(" ")
231 .or_else(|s| s.match_string("\t"))
232 .or_else(|s| s.match_string("\r"))
233 .or_else(|s| s.match_string("\n"))
234 })
235 }
236
237 state(input, |state| match rule {
238 Rule::json => json(state),
239 Rule::object => object(state),
240 Rule::pair => pair(state),
241 Rule::array => array(state),
242 Rule::value => value(state),
243 Rule::string => string(state),
244 Rule::escape => escape(state),
245 Rule::unicode => unicode(state),
246 Rule::hex => hex(state),
247 Rule::number => number(state),
248 Rule::int => int(state),
249 Rule::exp => exp(state),
250 Rule::bool => bool(state),
251 Rule::null => null(state),
252 })
253 }
254 }
255
256 #[derive(Debug, PartialEq)]
257 enum Json<'i> {
258 Null,
259 Bool(bool),
260 Number(f64),
261 String(Span<'i>),
262 Array(Vec<Json<'i>>),
263 Object(HashMap<Span<'i>, Json<'i>>),
264 }
265
266 fn consume(pair: Pair<Rule>) -> Json {
267 fn value(pair: Pair<Rule>) -> Json {
268 let pair = pair.into_inner().next().unwrap();
269
270 match pair.as_rule() {
271 Rule::null => Json::Null,
272 Rule::bool => match pair.as_str() {
273 "false" => Json::Bool(false),
274 "true" => Json::Bool(true),
275 _ => unreachable!(),
276 },
277 Rule::number => Json::Number(pair.as_str().parse().unwrap()),
278 Rule::string => Json::String(pair.as_span()),
279 Rule::array => Json::Array(pair.into_inner().map(value).collect()),
280 Rule::object => {
281 let pairs = pair.into_inner().map(|pos| {
282 let mut pair = pos.into_inner();
283
284 let key = pair.next().unwrap().as_span();
285 let value = value(pair.next().unwrap());
286
287 (key, value)
288 });
289
290 Json::Object(pairs.collect())
291 }
292 _ => unreachable!(),
293 }
294 }
295
296 value(pair)
297 }
298
299 #[test]
300 fn null() {
301 parses_to! {
302 parser: JsonParser,
303 input: "null",
304 rule: Rule::null,
305 tokens: [
306 null(0, 4)
307 ]
308 };
309 }
310
311 #[test]
312 fn bool() {
313 parses_to! {
314 parser: JsonParser,
315 input: "false",
316 rule: Rule::bool,
317 tokens: [
318 bool(0, 5)
319 ]
320 };
321 }
322
323 #[test]
324 fn number_zero() {
325 parses_to! {
326 parser: JsonParser,
327 input: "0",
328 rule: Rule::number,
329 tokens: [
330 number(0, 1)
331 ]
332 };
333 }
334
335 #[test]
336 fn float() {
337 parses_to! {
338 parser: JsonParser,
339 input: "100.001",
340 rule: Rule::number,
341 tokens: [
342 number(0, 7)
343 ]
344 };
345 }
346
347 #[test]
348 fn float_with_exp() {
349 parses_to! {
350 parser: JsonParser,
351 input: "100.001E+100",
352 rule: Rule::number,
353 tokens: [
354 number(0, 12)
355 ]
356 };
357 }
358
359 #[test]
360 fn number_minus_zero() {
361 parses_to! {
362 parser: JsonParser,
363 input: "-0",
364 rule: Rule::number,
365 tokens: [
366 number(0, 2)
367 ]
368 };
369 }
370
371 #[test]
372 fn string_with_escapes() {
373 parses_to! {
374 parser: JsonParser,
375 input: "\"asd\\u0000\\\"\"",
376 rule: Rule::string,
377 tokens: [
378 string(0, 13)
379 ]
380 };
381 }
382
383 #[test]
array_empty()384 fn array_empty() {
385 parses_to! {
386 parser: JsonParser,
387 input: "[ ]",
388 rule: Rule::array,
389 tokens: [
390 array(0, 3)
391 ]
392 };
393 }
394
395 #[test]
array()396 fn array() {
397 parses_to! {
398 parser: JsonParser,
399 input: "[0.0e1, false, null, \"a\", [0]]",
400 rule: Rule::array,
401 tokens: [
402 array(0, 30, [
403 value(1, 6, [number(1, 6)]),
404 value(8, 13, [bool(8, 13)]),
405 value(15, 19, [null(15, 19)]),
406 value(21, 24, [string(21, 24)]),
407 value(26, 29, [
408 array(26, 29, [
409 value(27, 28, [number(27, 28)])
410 ])
411 ])
412 ])
413 ]
414 };
415 }
416
417 #[test]
object()418 fn object() {
419 parses_to! {
420 parser: JsonParser,
421 input: "{\"a\" : 3, \"b\" : [{}, 3]}",
422 rule: Rule::object,
423 tokens: [
424 object(0, 24, [
425 pair(1, 8, [
426 string(1, 4),
427 value(7, 8, [number(7, 8)])
428 ]),
429 pair(10, 23, [
430 string(10, 13),
431 value(16, 23, [
432 array(16, 23, [
433 value(17, 19, [object(17, 19)]),
434 value(21, 22, [number(21, 22)])
435 ])
436 ])
437 ])
438 ])
439 ]
440 };
441 }
442
443 #[test]
ast()444 fn ast() {
445 let input = "{\"a\": [null, true, 3.4]}";
446
447 let ast = consume(
448 JsonParser::parse(Rule::json, input)
449 .unwrap()
450 .next()
451 .unwrap(),
452 );
453
454 if let Json::Object(pairs) = ast {
455 let vals: Vec<&Json> = pairs.values().collect();
456
457 assert_eq!(
458 **vals.get(0).unwrap(),
459 Json::Array(vec![Json::Null, Json::Bool(true), Json::Number(3.4)])
460 );
461 }
462 }
463