1 //! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
2 //! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
3 //! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
4
5 #![cfg(feature = "alloc")]
6
7 use nom::{
8 branch::alt,
9 bytes::complete::tag,
10 character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
11 combinator::{cut, map, map_res, opt},
12 error::{context, VerboseError},
13 multi::many0,
14 sequence::{delimited, preceded, terminated, tuple},
15 IResult, Parser,
16 };
17
18 /// We start by defining the types that define the shape of data that we want.
19 /// In this case, we want something tree-like
20
21 /// Starting from the most basic, we define some built-in functions that our lisp has
22 #[derive(Debug, PartialEq, Clone, Copy)]
23 pub enum BuiltIn {
24 Plus,
25 Minus,
26 Times,
27 Divide,
28 Equal,
29 Not,
30 }
31
32 /// We now wrap this type and a few other primitives into our Atom type.
33 /// Remember from before that Atoms form one half of our language.
34
35 #[derive(Debug, PartialEq, Clone)]
36 pub enum Atom {
37 Num(i32),
38 Keyword(String),
39 Boolean(bool),
40 BuiltIn(BuiltIn),
41 }
42
43 /// The remaining half is Lists. We implement these as recursive Expressions.
44 /// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
45 /// ```
46 /// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
47 /// Expr::Constant(Atom::Num(2)),
48 /// Expr::Constant(Atom::Num(3))])
49 /// Quote takes an S-expression and prevents evaluation of it, making it a data
50 /// structure that we can deal with programmatically. Thus any valid expression
51 /// is also a valid data structure in Lisp itself.
52
53 #[derive(Debug, PartialEq, Clone)]
54 pub enum Expr {
55 Constant(Atom),
56 /// (func-name arg1 arg2)
57 Application(Box<Expr>, Vec<Expr>),
58 /// (if predicate do-this)
59 If(Box<Expr>, Box<Expr>),
60 /// (if predicate do-this otherwise-do-this)
61 IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
62 /// '(3 (if (+ 3 3) 4 5) 7)
63 Quote(Vec<Expr>),
64 }
65
66 /// Continuing the trend of starting from the simplest piece and building up,
67 /// we start by creating a parser for the built-in operator functions.
parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>>68 fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
69 // one_of matches one of the characters we give it
70 let (i, t) = one_of("+-*/=")(i)?;
71
72 // because we are matching single character tokens, we can do the matching logic
73 // on the returned value
74 Ok((
75 i,
76 match t {
77 '+' => BuiltIn::Plus,
78 '-' => BuiltIn::Minus,
79 '*' => BuiltIn::Times,
80 '/' => BuiltIn::Divide,
81 '=' => BuiltIn::Equal,
82 _ => unreachable!(),
83 },
84 ))
85 }
86
parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>>87 fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
88 // alt gives us the result of first parser that succeeds, of the series of
89 // parsers we give it
90 alt((
91 parse_builtin_op,
92 // map lets us process the parsed output, in this case we know what we parsed,
93 // so we ignore the input and return the BuiltIn directly
94 map(tag("not"), |_| BuiltIn::Not),
95 ))(i)
96 }
97
98 /// Our boolean values are also constant, so we can do it the same way
parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>99 fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
100 alt((
101 map(tag("#t"), |_| Atom::Boolean(true)),
102 map(tag("#f"), |_| Atom::Boolean(false)),
103 ))(i)
104 }
105
106 /// The next easiest thing to parse are keywords.
107 /// We introduce some error handling combinators: `context` for human readable errors
108 /// and `cut` to prevent back-tracking.
109 ///
110 /// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:`
111 /// character, we have to see one or more alphabetic chararcters or the input is invalid.
parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>112 fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
113 map(
114 context("keyword", preceded(tag(":"), cut(alpha1))),
115 |sym_str: &str| Atom::Keyword(sym_str.to_string()),
116 )(i)
117 }
118
119 /// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
120 /// of digits but ending the program if it doesn't fit into an i32.
parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>121 fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
122 alt((
123 map_res(digit1, |digit_str: &str| {
124 digit_str.parse::<i32>().map(Atom::Num)
125 }),
126 map(preceded(tag("-"), digit1), |digit_str: &str| {
127 Atom::Num(-1 * digit_str.parse::<i32>().unwrap())
128 }),
129 ))(i)
130 }
131
132 /// Now we take all these simple parsers and connect them.
133 /// We can now parse half of our language!
parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>134 fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
135 alt((
136 parse_num,
137 parse_bool,
138 map(parse_builtin, Atom::BuiltIn),
139 parse_keyword,
140 ))(i)
141 }
142
143 /// We then add the Expr layer on top
parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>144 fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
145 map(parse_atom, |atom| Expr::Constant(atom))(i)
146 }
147
148 /// Before continuing, we need a helper function to parse lists.
149 /// A list starts with `(` and ends with a matching `)`.
150 /// By putting whitespace and newline parsing here, we can avoid having to worry about it
151 /// in much of the rest of the parser.
152 ///
153 /// Unlike the previous functions, this function doesn't take or consume input, instead it
154 /// takes a parsing function and returns a new parsing function.
s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>> where F: Parser<&'a str, O1, VerboseError<&'a str>>,155 fn s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>>
156 where
157 F: Parser<&'a str, O1, VerboseError<&'a str>>,
158 {
159 delimited(
160 char('('),
161 preceded(multispace0, inner),
162 context("closing paren", cut(preceded(multispace0, char(')')))),
163 )
164 }
165
166 /// We can now use our new combinator to define the rest of the `Expr`s.
167 ///
168 /// Starting with function application, we can see how the parser mirrors our data
169 /// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
170 /// that we need to parse an expression and then parse 0 or more expressions, all
171 /// wrapped in an S-expression.
172 ///
173 /// `tuple` is used to sequence parsers together, so we can translate this directly
174 /// and then map over it to transform the output into an `Expr::Application`
parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>175 fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
176 let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| {
177 Expr::Application(Box::new(head), tail)
178 });
179 // finally, we wrap it in an s-expression
180 s_exp(application_inner)(i)
181 }
182
183 /// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
184 /// defined `Expr::If` to have an `Option` for the else block), we parse both
185 /// in a single function.
186 ///
187 /// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
188 /// we have the `opt` combinator which fits very nicely here.
parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>189 fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
190 let if_inner = context(
191 "if expression",
192 map(
193 preceded(
194 // here to avoid ambiguity with other names starting with `if`, if we added
195 // variables to our language, we say that if must be terminated by at least
196 // one whitespace character
197 terminated(tag("if"), multispace1),
198 cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
199 ),
200 |(pred, true_branch, maybe_false_branch)| {
201 if let Some(false_branch) = maybe_false_branch {
202 Expr::IfElse(
203 Box::new(pred),
204 Box::new(true_branch),
205 Box::new(false_branch),
206 )
207 } else {
208 Expr::If(Box::new(pred), Box::new(true_branch))
209 }
210 },
211 ),
212 );
213 s_exp(if_inner)(i)
214 }
215
216 /// A quoted S-expression is list data structure.
217 ///
218 /// This example doesn't have the symbol atom, but by adding variables and changing
219 /// the definition of quote to not always be around an S-expression, we'd get them
220 /// naturally.
parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>221 fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
222 // this should look very straight-forward after all we've done:
223 // we find the `'` (quote) character, use cut to say that we're unambiguously
224 // looking for an s-expression of 0 or more expressions, and then parse them
225 map(
226 context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))),
227 |exprs| Expr::Quote(exprs),
228 )(i)
229 }
230
231 /// We tie them all together again, making a top-level expression parser!
232
parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>233 fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
234 preceded(
235 multispace0,
236 alt((parse_constant, parse_application, parse_if, parse_quote)),
237 )(i)
238 }
239
240 /// And that's it!
241 /// We can now parse our entire lisp language.
242 ///
243 /// But in order to make it a little more interesting, we can hack together
244 /// a little interpreter to take an Expr, which is really an
245 /// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
246 /// and give us something back
247
248 /// To start we define a couple of helper functions
get_num_from_expr(e: Expr) -> Option<i32>249 fn get_num_from_expr(e: Expr) -> Option<i32> {
250 if let Expr::Constant(Atom::Num(n)) = e {
251 Some(n)
252 } else {
253 None
254 }
255 }
256
get_bool_from_expr(e: Expr) -> Option<bool>257 fn get_bool_from_expr(e: Expr) -> Option<bool> {
258 if let Expr::Constant(Atom::Boolean(b)) = e {
259 Some(b)
260 } else {
261 None
262 }
263 }
264
265 /// This function tries to reduce the AST.
266 /// This has to return an Expression rather than an Atom because quoted s_expressions
267 /// can't be reduced
eval_expression(e: Expr) -> Option<Expr>268 fn eval_expression(e: Expr) -> Option<Expr> {
269 match e {
270 // Constants and quoted s-expressions are our base-case
271 Expr::Constant(_) | Expr::Quote(_) => Some(e),
272 // we then recursively `eval_expression` in the context of our special forms
273 // and built-in operators
274 Expr::If(pred, true_branch) => {
275 let reduce_pred = eval_expression(*pred)?;
276 if get_bool_from_expr(reduce_pred)? {
277 eval_expression(*true_branch)
278 } else {
279 None
280 }
281 }
282 Expr::IfElse(pred, true_branch, false_branch) => {
283 let reduce_pred = eval_expression(*pred)?;
284 if get_bool_from_expr(reduce_pred)? {
285 eval_expression(*true_branch)
286 } else {
287 eval_expression(*false_branch)
288 }
289 }
290 Expr::Application(head, tail) => {
291 let reduced_head = eval_expression(*head)?;
292 let reduced_tail = tail
293 .into_iter()
294 .map(|expr| eval_expression(expr))
295 .collect::<Option<Vec<Expr>>>()?;
296 if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
297 Some(Expr::Constant(match bi {
298 BuiltIn::Plus => Atom::Num(
299 reduced_tail
300 .into_iter()
301 .map(get_num_from_expr)
302 .collect::<Option<Vec<i32>>>()?
303 .into_iter()
304 .sum(),
305 ),
306 BuiltIn::Times => Atom::Num(
307 reduced_tail
308 .into_iter()
309 .map(get_num_from_expr)
310 .collect::<Option<Vec<i32>>>()?
311 .into_iter()
312 .product(),
313 ),
314 BuiltIn::Equal => Atom::Boolean(
315 reduced_tail
316 .iter()
317 .zip(reduced_tail.iter().skip(1))
318 .all(|(a, b)| a == b),
319 ),
320 BuiltIn::Not => {
321 if reduced_tail.len() != 1 {
322 return None;
323 } else {
324 Atom::Boolean(!get_bool_from_expr(reduced_tail.first().cloned().unwrap())?)
325 }
326 }
327 BuiltIn::Minus => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
328 let fe = get_num_from_expr(first_elem)?;
329 reduced_tail
330 .into_iter()
331 .map(get_num_from_expr)
332 .collect::<Option<Vec<i32>>>()?
333 .into_iter()
334 .skip(1)
335 .fold(fe, |a, b| a - b)
336 } else {
337 Default::default()
338 }),
339 BuiltIn::Divide => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
340 let fe = get_num_from_expr(first_elem)?;
341 reduced_tail
342 .into_iter()
343 .map(get_num_from_expr)
344 .collect::<Option<Vec<i32>>>()?
345 .into_iter()
346 .skip(1)
347 .fold(fe, |a, b| a / b)
348 } else {
349 Default::default()
350 }),
351 }))
352 } else {
353 None
354 }
355 }
356 }
357 }
358
359 /// And we add one more top-level function to tie everything together, letting
360 /// us call eval on a string directly
eval_from_str(src: &str) -> Result<Expr, String>361 fn eval_from_str(src: &str) -> Result<Expr, String> {
362 parse_expr(src)
363 .map_err(|e: nom::Err<VerboseError<&str>>| format!("{:#?}", e))
364 .and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string()))
365 }
366
main()367 fn main() {
368 let expression_1 = "((if (= (+ 3 (/ 9 3))
369 (* 2 3))
370 *
371 /)
372 456 123)";
373 println!(
374 "\"{}\"\nevaled gives us: {:?}",
375 expression_1,
376 eval_from_str(expression_1)
377 );
378 }
379