• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
2 //! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
3 //! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
4 
5 #![cfg(feature = "alloc")]
6 
7 use nom::{
8   branch::alt,
9   bytes::complete::tag,
10   character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
11   combinator::{cut, map, map_res, opt},
12   error::{context, VerboseError},
13   multi::many0,
14   sequence::{delimited, preceded, terminated, tuple},
15   IResult, Parser,
16 };
17 
18 /// We start by defining the types that define the shape of data that we want.
19 /// In this case, we want something tree-like
20 
21 /// Starting from the most basic, we define some built-in functions that our lisp has
22 #[derive(Debug, PartialEq, Clone, Copy)]
23 pub enum BuiltIn {
24   Plus,
25   Minus,
26   Times,
27   Divide,
28   Equal,
29   Not,
30 }
31 
32 /// We now wrap this type and a few other primitives into our Atom type.
33 /// Remember from before that Atoms form one half of our language.
34 
35 #[derive(Debug, PartialEq, Clone)]
36 pub enum Atom {
37   Num(i32),
38   Keyword(String),
39   Boolean(bool),
40   BuiltIn(BuiltIn),
41 }
42 
43 /// The remaining half is Lists. We implement these as recursive Expressions.
44 /// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
45 /// ```
46 /// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
47 ///                  Expr::Constant(Atom::Num(2)),
48 ///                  Expr::Constant(Atom::Num(3))])
49 /// Quote takes an S-expression and prevents evaluation of it, making it a data
50 /// structure that we can deal with programmatically. Thus any valid expression
51 /// is also a valid data structure in Lisp itself.
52 
53 #[derive(Debug, PartialEq, Clone)]
54 pub enum Expr {
55   Constant(Atom),
56   /// (func-name arg1 arg2)
57   Application(Box<Expr>, Vec<Expr>),
58   /// (if predicate do-this)
59   If(Box<Expr>, Box<Expr>),
60   /// (if predicate do-this otherwise-do-this)
61   IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
62   /// '(3 (if (+ 3 3) 4 5) 7)
63   Quote(Vec<Expr>),
64 }
65 
66 /// Continuing the trend of starting from the simplest piece and building up,
67 /// we start by creating a parser for the built-in operator functions.
parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>>68 fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
69   // one_of matches one of the characters we give it
70   let (i, t) = one_of("+-*/=")(i)?;
71 
72   // because we are matching single character tokens, we can do the matching logic
73   // on the returned value
74   Ok((
75     i,
76     match t {
77       '+' => BuiltIn::Plus,
78       '-' => BuiltIn::Minus,
79       '*' => BuiltIn::Times,
80       '/' => BuiltIn::Divide,
81       '=' => BuiltIn::Equal,
82       _ => unreachable!(),
83     },
84   ))
85 }
86 
parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>>87 fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
88   // alt gives us the result of first parser that succeeds, of the series of
89   // parsers we give it
90   alt((
91     parse_builtin_op,
92     // map lets us process the parsed output, in this case we know what we parsed,
93     // so we ignore the input and return the BuiltIn directly
94     map(tag("not"), |_| BuiltIn::Not),
95   ))(i)
96 }
97 
98 /// Our boolean values are also constant, so we can do it the same way
parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>99 fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
100   alt((
101     map(tag("#t"), |_| Atom::Boolean(true)),
102     map(tag("#f"), |_| Atom::Boolean(false)),
103   ))(i)
104 }
105 
106 /// The next easiest thing to parse are keywords.
107 /// We introduce some error handling combinators: `context` for human readable errors
108 /// and `cut` to prevent back-tracking.
109 ///
110 /// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:`
111 /// character, we have to see one or more alphabetic chararcters or the input is invalid.
parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>112 fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
113   map(
114     context("keyword", preceded(tag(":"), cut(alpha1))),
115     |sym_str: &str| Atom::Keyword(sym_str.to_string()),
116   )(i)
117 }
118 
119 /// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
120 /// of digits but ending the program if it doesn't fit into an i32.
parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>121 fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
122   alt((
123     map_res(digit1, |digit_str: &str| {
124       digit_str.parse::<i32>().map(Atom::Num)
125     }),
126     map(preceded(tag("-"), digit1), |digit_str: &str| {
127       Atom::Num(-1 * digit_str.parse::<i32>().unwrap())
128     }),
129   ))(i)
130 }
131 
132 /// Now we take all these simple parsers and connect them.
133 /// We can now parse half of our language!
parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>>134 fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
135   alt((
136     parse_num,
137     parse_bool,
138     map(parse_builtin, Atom::BuiltIn),
139     parse_keyword,
140   ))(i)
141 }
142 
143 /// We then add the Expr layer on top
parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>144 fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
145   map(parse_atom, |atom| Expr::Constant(atom))(i)
146 }
147 
148 /// Before continuing, we need a helper function to parse lists.
149 /// A list starts with `(` and ends with a matching `)`.
150 /// By putting whitespace and newline parsing here, we can avoid having to worry about it
151 /// in much of the rest of the parser.
152 ///
153 /// Unlike the previous functions, this function doesn't take or consume input, instead it
154 /// takes a parsing function and returns a new parsing function.
s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>> where F: Parser<&'a str, O1, VerboseError<&'a str>>,155 fn s_exp<'a, O1, F>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>>
156 where
157   F: Parser<&'a str, O1, VerboseError<&'a str>>,
158 {
159   delimited(
160     char('('),
161     preceded(multispace0, inner),
162     context("closing paren", cut(preceded(multispace0, char(')')))),
163   )
164 }
165 
166 /// We can now use our new combinator to define the rest of the `Expr`s.
167 ///
168 /// Starting with function application, we can see how the parser mirrors our data
169 /// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
170 /// that we need to parse an expression and then parse 0 or more expressions, all
171 /// wrapped in an S-expression.
172 ///
173 /// `tuple` is used to sequence parsers together, so we can translate this directly
174 /// and then map over it to transform the output into an `Expr::Application`
parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>175 fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
176   let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| {
177     Expr::Application(Box::new(head), tail)
178   });
179   // finally, we wrap it in an s-expression
180   s_exp(application_inner)(i)
181 }
182 
183 /// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
184 /// defined `Expr::If` to have an `Option` for the else block), we parse both
185 /// in a single function.
186 ///
187 /// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
188 /// we have the `opt` combinator which fits very nicely here.
parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>189 fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
190   let if_inner = context(
191     "if expression",
192     map(
193       preceded(
194         // here to avoid ambiguity with other names starting with `if`, if we added
195         // variables to our language, we say that if must be terminated by at least
196         // one whitespace character
197         terminated(tag("if"), multispace1),
198         cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
199       ),
200       |(pred, true_branch, maybe_false_branch)| {
201         if let Some(false_branch) = maybe_false_branch {
202           Expr::IfElse(
203             Box::new(pred),
204             Box::new(true_branch),
205             Box::new(false_branch),
206           )
207         } else {
208           Expr::If(Box::new(pred), Box::new(true_branch))
209         }
210       },
211     ),
212   );
213   s_exp(if_inner)(i)
214 }
215 
216 /// A quoted S-expression is list data structure.
217 ///
218 /// This example doesn't have the symbol atom, but by adding variables and changing
219 /// the definition of quote to not always be around an S-expression, we'd get them
220 /// naturally.
parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>221 fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
222   // this should look very straight-forward after all we've done:
223   // we find the `'` (quote) character, use cut to say that we're unambiguously
224   // looking for an s-expression of 0 or more expressions, and then parse them
225   map(
226     context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))),
227     |exprs| Expr::Quote(exprs),
228   )(i)
229 }
230 
231 /// We tie them all together again, making a top-level expression parser!
232 
parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>233 fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
234   preceded(
235     multispace0,
236     alt((parse_constant, parse_application, parse_if, parse_quote)),
237   )(i)
238 }
239 
240 /// And that's it!
241 /// We can now parse our entire lisp language.
242 ///
243 /// But in order to make it a little more interesting, we can hack together
244 /// a little interpreter to take an Expr, which is really an
245 /// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
246 /// and give us something back
247 
248 /// To start we define a couple of helper functions
get_num_from_expr(e: Expr) -> Option<i32>249 fn get_num_from_expr(e: Expr) -> Option<i32> {
250   if let Expr::Constant(Atom::Num(n)) = e {
251     Some(n)
252   } else {
253     None
254   }
255 }
256 
get_bool_from_expr(e: Expr) -> Option<bool>257 fn get_bool_from_expr(e: Expr) -> Option<bool> {
258   if let Expr::Constant(Atom::Boolean(b)) = e {
259     Some(b)
260   } else {
261     None
262   }
263 }
264 
265 /// This function tries to reduce the AST.
266 /// This has to return an Expression rather than an Atom because quoted s_expressions
267 /// can't be reduced
eval_expression(e: Expr) -> Option<Expr>268 fn eval_expression(e: Expr) -> Option<Expr> {
269   match e {
270     // Constants and quoted s-expressions are our base-case
271     Expr::Constant(_) | Expr::Quote(_) => Some(e),
272     // we then recursively `eval_expression` in the context of our special forms
273     // and built-in operators
274     Expr::If(pred, true_branch) => {
275       let reduce_pred = eval_expression(*pred)?;
276       if get_bool_from_expr(reduce_pred)? {
277         eval_expression(*true_branch)
278       } else {
279         None
280       }
281     }
282     Expr::IfElse(pred, true_branch, false_branch) => {
283       let reduce_pred = eval_expression(*pred)?;
284       if get_bool_from_expr(reduce_pred)? {
285         eval_expression(*true_branch)
286       } else {
287         eval_expression(*false_branch)
288       }
289     }
290     Expr::Application(head, tail) => {
291       let reduced_head = eval_expression(*head)?;
292       let reduced_tail = tail
293         .into_iter()
294         .map(|expr| eval_expression(expr))
295         .collect::<Option<Vec<Expr>>>()?;
296       if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
297         Some(Expr::Constant(match bi {
298           BuiltIn::Plus => Atom::Num(
299             reduced_tail
300               .into_iter()
301               .map(get_num_from_expr)
302               .collect::<Option<Vec<i32>>>()?
303               .into_iter()
304               .sum(),
305           ),
306           BuiltIn::Times => Atom::Num(
307             reduced_tail
308               .into_iter()
309               .map(get_num_from_expr)
310               .collect::<Option<Vec<i32>>>()?
311               .into_iter()
312               .product(),
313           ),
314           BuiltIn::Equal => Atom::Boolean(
315             reduced_tail
316               .iter()
317               .zip(reduced_tail.iter().skip(1))
318               .all(|(a, b)| a == b),
319           ),
320           BuiltIn::Not => {
321             if reduced_tail.len() != 1 {
322               return None;
323             } else {
324               Atom::Boolean(!get_bool_from_expr(reduced_tail.first().cloned().unwrap())?)
325             }
326           }
327           BuiltIn::Minus => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
328             let fe = get_num_from_expr(first_elem)?;
329             reduced_tail
330               .into_iter()
331               .map(get_num_from_expr)
332               .collect::<Option<Vec<i32>>>()?
333               .into_iter()
334               .skip(1)
335               .fold(fe, |a, b| a - b)
336           } else {
337             Default::default()
338           }),
339           BuiltIn::Divide => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
340             let fe = get_num_from_expr(first_elem)?;
341             reduced_tail
342               .into_iter()
343               .map(get_num_from_expr)
344               .collect::<Option<Vec<i32>>>()?
345               .into_iter()
346               .skip(1)
347               .fold(fe, |a, b| a / b)
348           } else {
349             Default::default()
350           }),
351         }))
352       } else {
353         None
354       }
355     }
356   }
357 }
358 
359 /// And we add one more top-level function to tie everything together, letting
360 /// us call eval on a string directly
eval_from_str(src: &str) -> Result<Expr, String>361 fn eval_from_str(src: &str) -> Result<Expr, String> {
362   parse_expr(src)
363     .map_err(|e: nom::Err<VerboseError<&str>>| format!("{:#?}", e))
364     .and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string()))
365 }
366 
main()367 fn main() {
368   let expression_1 = "((if (= (+ 3 (/ 9 3))
369          (* 2 3))
370      *
371      /)
372   456 123)";
373   println!(
374     "\"{}\"\nevaled gives us: {:?}",
375     expression_1,
376     eval_from_str(expression_1)
377   );
378 }
379