• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Syntax Tree library used throughout the rust-analyzer.
2 //!
3 //! Properties:
4 //!   - easy and fast incremental re-parsing
5 //!   - graceful handling of errors
6 //!   - full-fidelity representation (*any* text can be precisely represented as
7 //!     a syntax tree)
8 //!
9 //! For more information, see the [RFC]. Current implementation is inspired by
10 //! the [Swift] one.
11 //!
12 //! The most interesting modules here are `syntax_node` (which defines concrete
13 //! syntax tree) and `ast` (which defines abstract syntax tree on top of the
14 //! CST). The actual parser live in a separate `parser` crate, though the
15 //! lexer lives in this crate.
16 //!
17 //! See `api_walkthrough` test in this file for a quick API tour!
18 //!
19 //! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
20 //! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
21 
22 #![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)]
23 
24 #[allow(unused)]
25 macro_rules! eprintln {
26     ($($tt:tt)*) => { stdx::eprintln!($($tt)*) };
27 }
28 
29 mod syntax_node;
30 mod syntax_error;
31 mod parsing;
32 mod validation;
33 mod ptr;
34 mod token_text;
35 #[cfg(test)]
36 mod tests;
37 
38 pub mod algo;
39 pub mod ast;
40 #[doc(hidden)]
41 pub mod fuzz;
42 pub mod utils;
43 pub mod ted;
44 pub mod hacks;
45 
46 use std::marker::PhantomData;
47 
48 use stdx::format_to;
49 use text_edit::Indel;
50 use triomphe::Arc;
51 
52 pub use crate::{
53     ast::{AstNode, AstToken},
54     ptr::{AstPtr, SyntaxNodePtr},
55     syntax_error::SyntaxError,
56     syntax_node::{
57         PreorderWithTokens, RustLanguage, SyntaxElement, SyntaxElementChildren, SyntaxNode,
58         SyntaxNodeChildren, SyntaxToken, SyntaxTreeBuilder,
59     },
60     token_text::TokenText,
61 };
62 pub use parser::{SyntaxKind, T};
63 pub use rowan::{
64     api::Preorder, Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize,
65     TokenAtOffset, WalkEvent,
66 };
67 pub use smol_str::SmolStr;
68 
69 /// `Parse` is the result of the parsing: a syntax tree and a collection of
70 /// errors.
71 ///
72 /// Note that we always produce a syntax tree, even for completely invalid
73 /// files.
74 #[derive(Debug, PartialEq, Eq)]
75 pub struct Parse<T> {
76     green: GreenNode,
77     errors: Arc<Vec<SyntaxError>>,
78     _ty: PhantomData<fn() -> T>,
79 }
80 
81 impl<T> Clone for Parse<T> {
clone(&self) -> Parse<T>82     fn clone(&self) -> Parse<T> {
83         Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData }
84     }
85 }
86 
87 impl<T> Parse<T> {
new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T>88     fn new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T> {
89         Parse { green, errors: Arc::new(errors), _ty: PhantomData }
90     }
91 
syntax_node(&self) -> SyntaxNode92     pub fn syntax_node(&self) -> SyntaxNode {
93         SyntaxNode::new_root(self.green.clone())
94     }
errors(&self) -> &[SyntaxError]95     pub fn errors(&self) -> &[SyntaxError] {
96         &self.errors
97     }
98 }
99 
100 impl<T: AstNode> Parse<T> {
to_syntax(self) -> Parse<SyntaxNode>101     pub fn to_syntax(self) -> Parse<SyntaxNode> {
102         Parse { green: self.green, errors: self.errors, _ty: PhantomData }
103     }
104 
tree(&self) -> T105     pub fn tree(&self) -> T {
106         T::cast(self.syntax_node()).unwrap()
107     }
108 
ok(self) -> Result<T, Arc<Vec<SyntaxError>>>109     pub fn ok(self) -> Result<T, Arc<Vec<SyntaxError>>> {
110         if self.errors.is_empty() {
111             Ok(self.tree())
112         } else {
113             Err(self.errors)
114         }
115     }
116 }
117 
118 impl Parse<SyntaxNode> {
cast<N: AstNode>(self) -> Option<Parse<N>>119     pub fn cast<N: AstNode>(self) -> Option<Parse<N>> {
120         if N::cast(self.syntax_node()).is_some() {
121             Some(Parse { green: self.green, errors: self.errors, _ty: PhantomData })
122         } else {
123             None
124         }
125     }
126 }
127 
128 impl Parse<SourceFile> {
debug_dump(&self) -> String129     pub fn debug_dump(&self) -> String {
130         let mut buf = format!("{:#?}", self.tree().syntax());
131         for err in self.errors.iter() {
132             format_to!(buf, "error {:?}: {}\n", err.range(), err);
133         }
134         buf
135     }
136 
reparse(&self, indel: &Indel) -> Parse<SourceFile>137     pub fn reparse(&self, indel: &Indel) -> Parse<SourceFile> {
138         self.incremental_reparse(indel).unwrap_or_else(|| self.full_reparse(indel))
139     }
140 
incremental_reparse(&self, indel: &Indel) -> Option<Parse<SourceFile>>141     fn incremental_reparse(&self, indel: &Indel) -> Option<Parse<SourceFile>> {
142         // FIXME: validation errors are not handled here
143         parsing::incremental_reparse(self.tree().syntax(), indel, self.errors.to_vec()).map(
144             |(green_node, errors, _reparsed_range)| Parse {
145                 green: green_node,
146                 errors: Arc::new(errors),
147                 _ty: PhantomData,
148             },
149         )
150     }
151 
full_reparse(&self, indel: &Indel) -> Parse<SourceFile>152     fn full_reparse(&self, indel: &Indel) -> Parse<SourceFile> {
153         let mut text = self.tree().syntax().text().to_string();
154         indel.apply(&mut text);
155         SourceFile::parse(&text)
156     }
157 }
158 
159 /// `SourceFile` represents a parse tree for a single Rust file.
160 pub use crate::ast::SourceFile;
161 
162 impl SourceFile {
parse(text: &str) -> Parse<SourceFile>163     pub fn parse(text: &str) -> Parse<SourceFile> {
164         let (green, mut errors) = parsing::parse_text(text);
165         let root = SyntaxNode::new_root(green.clone());
166 
167         errors.extend(validation::validate(&root));
168 
169         assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
170         Parse { green, errors: Arc::new(errors), _ty: PhantomData }
171     }
172 }
173 
174 /// Matches a `SyntaxNode` against an `ast` type.
175 ///
176 /// # Example:
177 ///
178 /// ```ignore
179 /// match_ast! {
180 ///     match node {
181 ///         ast::CallExpr(it) => { ... },
182 ///         ast::MethodCallExpr(it) => { ... },
183 ///         ast::MacroCall(it) => { ... },
184 ///         _ => None,
185 ///     }
186 /// }
187 /// ```
188 #[macro_export]
189 macro_rules! match_ast {
190     (match $node:ident { $($tt:tt)* }) => { $crate::match_ast!(match ($node) { $($tt)* }) };
191 
192     (match ($node:expr) {
193         $( $( $path:ident )::+ ($it:pat) => $res:expr, )*
194         _ => $catch_all:expr $(,)?
195     }) => {{
196         $( if let Some($it) = $($path::)+cast($node.clone()) { $res } else )*
197         { $catch_all }
198     }};
199 }
200 
201 /// This test does not assert anything and instead just shows off the crate's
202 /// API.
203 #[test]
api_walkthrough()204 fn api_walkthrough() {
205     use ast::{HasModuleItem, HasName};
206 
207     let source_code = "
208         fn foo() {
209             1 + 1
210         }
211     ";
212     // `SourceFile` is the main entry point.
213     //
214     // The `parse` method returns a `Parse` -- a pair of syntax tree and a list
215     // of errors. That is, syntax tree is constructed even in presence of errors.
216     let parse = SourceFile::parse(source_code);
217     assert!(parse.errors().is_empty());
218 
219     // The `tree` method returns an owned syntax node of type `SourceFile`.
220     // Owned nodes are cheap: inside, they are `Rc` handles to the underling data.
221     let file: SourceFile = parse.tree();
222 
223     // `SourceFile` is the root of the syntax tree. We can iterate file's items.
224     // Let's fetch the `foo` function.
225     let mut func = None;
226     for item in file.items() {
227         match item {
228             ast::Item::Fn(f) => func = Some(f),
229             _ => unreachable!(),
230         }
231     }
232     let func: ast::Fn = func.unwrap();
233 
234     // Each AST node has a bunch of getters for children. All getters return
235     // `Option`s though, to account for incomplete code. Some getters are common
236     // for several kinds of node. In this case, a trait like `ast::NameOwner`
237     // usually exists. By convention, all ast types should be used with `ast::`
238     // qualifier.
239     let name: Option<ast::Name> = func.name();
240     let name = name.unwrap();
241     assert_eq!(name.text(), "foo");
242 
243     // Let's get the `1 + 1` expression!
244     let body: ast::BlockExpr = func.body().unwrap();
245     let stmt_list: ast::StmtList = body.stmt_list().unwrap();
246     let expr: ast::Expr = stmt_list.tail_expr().unwrap();
247 
248     // Enums are used to group related ast nodes together, and can be used for
249     // matching. However, because there are no public fields, it's possible to
250     // match only the top level enum: that is the price we pay for increased API
251     // flexibility
252     let bin_expr: &ast::BinExpr = match &expr {
253         ast::Expr::BinExpr(e) => e,
254         _ => unreachable!(),
255     };
256 
257     // Besides the "typed" AST API, there's an untyped CST one as well.
258     // To switch from AST to CST, call `.syntax()` method:
259     let expr_syntax: &SyntaxNode = expr.syntax();
260 
261     // Note how `expr` and `bin_expr` are in fact the same node underneath:
262     assert!(expr_syntax == bin_expr.syntax());
263 
264     // To go from CST to AST, `AstNode::cast` function is used:
265     let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) {
266         Some(e) => e,
267         None => unreachable!(),
268     };
269 
270     // The two properties each syntax node has is a `SyntaxKind`:
271     assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
272 
273     // And text range:
274     assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into()));
275 
276     // You can get node's text as a `SyntaxText` object, which will traverse the
277     // tree collecting token's text:
278     let text: SyntaxText = expr_syntax.text();
279     assert_eq!(text.to_string(), "1 + 1");
280 
281     // There's a bunch of traversal methods on `SyntaxNode`:
282     assert_eq!(expr_syntax.parent().as_ref(), Some(stmt_list.syntax()));
283     assert_eq!(stmt_list.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
284     assert_eq!(
285         expr_syntax.next_sibling_or_token().map(|it| it.kind()),
286         Some(SyntaxKind::WHITESPACE)
287     );
288 
289     // As well as some iterator helpers:
290     let f = expr_syntax.ancestors().find_map(ast::Fn::cast);
291     assert_eq!(f, Some(func));
292     assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
293     assert_eq!(
294         expr_syntax.descendants_with_tokens().count(),
295         8, // 5 tokens `1`, ` `, `+`, ` `, `!`
296            // 2 child literal expressions: `1`, `1`
297            // 1 the node itself: `1 + 1`
298     );
299 
300     // There's also a `preorder` method with a more fine-grained iteration control:
301     let mut buf = String::new();
302     let mut indent = 0;
303     for event in expr_syntax.preorder_with_tokens() {
304         match event {
305             WalkEvent::Enter(node) => {
306                 let text = match &node {
307                     NodeOrToken::Node(it) => it.text().to_string(),
308                     NodeOrToken::Token(it) => it.text().to_string(),
309                 };
310                 format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
311                 indent += 2;
312             }
313             WalkEvent::Leave(_) => indent -= 2,
314         }
315     }
316     assert_eq!(indent, 0);
317     assert_eq!(
318         buf.trim(),
319         r#"
320 "1 + 1" BIN_EXPR
321   "1" LITERAL
322     "1" INT_NUMBER
323   " " WHITESPACE
324   "+" PLUS
325   " " WHITESPACE
326   "1" LITERAL
327     "1" INT_NUMBER
328 "#
329         .trim()
330     );
331 
332     // To recursively process the tree, there are three approaches:
333     // 1. explicitly call getter methods on AST nodes.
334     // 2. use descendants and `AstNode::cast`.
335     // 3. use descendants and `match_ast!`.
336     //
337     // Here's how the first one looks like:
338     let exprs_cast: Vec<String> = file
339         .syntax()
340         .descendants()
341         .filter_map(ast::Expr::cast)
342         .map(|expr| expr.syntax().text().to_string())
343         .collect();
344 
345     // An alternative is to use a macro.
346     let mut exprs_visit = Vec::new();
347     for node in file.syntax().descendants() {
348         match_ast! {
349             match node {
350                 ast::Expr(it) => {
351                     let res = it.syntax().text().to_string();
352                     exprs_visit.push(res);
353                 },
354                 _ => (),
355             }
356         }
357     }
358     assert_eq!(exprs_cast, exprs_visit);
359 }
360