• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2 
3 use rustc_hash::FxHashMap;
4 use stdx::{always, non_empty_vec::NonEmptyVec};
5 use syntax::{
6     ast::{self, make::tokens::doc_comment},
7     AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
8     SyntaxKind::*,
9     SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
10 };
11 
12 use crate::{
13     to_parser_input::to_parser_input,
14     tt::{
15         self,
16         buffer::{Cursor, TokenBuffer},
17     },
18     tt_iter::TtIter,
19     TokenMap,
20 };
21 
22 #[cfg(test)]
23 mod tests;
24 
25 /// Convert the syntax node to a `TokenTree` (what macro
26 /// will consume).
syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap)27 pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
28     let (subtree, token_map, _) = syntax_node_to_token_tree_with_modifications(
29         node,
30         Default::default(),
31         0,
32         Default::default(),
33         Default::default(),
34     );
35     (subtree, token_map)
36 }
37 
38 /// Convert the syntax node to a `TokenTree` (what macro will consume)
39 /// with the censored range excluded.
syntax_node_to_token_tree_with_modifications( node: &SyntaxNode, existing_token_map: TokenMap, next_id: u32, replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> (tt::Subtree, TokenMap, u32)40 pub fn syntax_node_to_token_tree_with_modifications(
41     node: &SyntaxNode,
42     existing_token_map: TokenMap,
43     next_id: u32,
44     replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
45     append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
46 ) -> (tt::Subtree, TokenMap, u32) {
47     let global_offset = node.text_range().start();
48     let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append);
49     let subtree = convert_tokens(&mut c);
50     c.id_alloc.map.shrink_to_fit();
51     always!(c.replace.is_empty(), "replace: {:?}", c.replace);
52     always!(c.append.is_empty(), "append: {:?}", c.append);
53     (subtree, c.id_alloc.map, c.id_alloc.next_id)
54 }
55 
56 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
57 pub struct SyntheticTokenId(pub u32);
58 
59 #[derive(Debug, Clone)]
60 pub struct SyntheticToken {
61     pub kind: SyntaxKind,
62     pub text: SmolStr,
63     pub range: TextRange,
64     pub id: SyntheticTokenId,
65 }
66 
67 // The following items are what `rustc` macro can be parsed into :
68 // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
69 // * Expr(P<ast::Expr>)                     -> token_tree_to_expr
70 // * Pat(P<ast::Pat>)                       -> token_tree_to_pat
71 // * Ty(P<ast::Ty>)                         -> token_tree_to_ty
72 // * Stmts(SmallVec<[ast::Stmt; 1]>)        -> token_tree_to_stmts
73 // * Items(SmallVec<[P<ast::Item>; 1]>)     -> token_tree_to_items
74 //
75 // * TraitItems(SmallVec<[ast::TraitItem; 1]>)
76 // * AssocItems(SmallVec<[ast::AssocItem; 1]>)
77 // * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
78 
token_tree_to_syntax_node( tt: &tt::Subtree, entry_point: parser::TopEntryPoint, ) -> (Parse<SyntaxNode>, TokenMap)79 pub fn token_tree_to_syntax_node(
80     tt: &tt::Subtree,
81     entry_point: parser::TopEntryPoint,
82 ) -> (Parse<SyntaxNode>, TokenMap) {
83     let buffer = match tt {
84         tt::Subtree {
85             delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. },
86             token_trees,
87         } => TokenBuffer::from_tokens(token_trees.as_slice()),
88         _ => TokenBuffer::from_subtree(tt),
89     };
90     let parser_input = to_parser_input(&buffer);
91     let parser_output = entry_point.parse(&parser_input);
92     let mut tree_sink = TtTreeSink::new(buffer.begin());
93     for event in parser_output.iter() {
94         match event {
95             parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
96                 tree_sink.token(kind, n_raw_tokens)
97             }
98             parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
99                 tree_sink.float_split(has_pseudo_dot)
100             }
101             parser::Step::Enter { kind } => tree_sink.start_node(kind),
102             parser::Step::Exit => tree_sink.finish_node(),
103             parser::Step::Error { msg } => tree_sink.error(msg.to_string()),
104         }
105     }
106     tree_sink.finish()
107 }
108 
109 /// Convert a string to a `TokenTree`
parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)>110 pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
111     let lexed = parser::LexedStr::new(text);
112     if lexed.errors().next().is_some() {
113         return None;
114     }
115 
116     let mut conv = RawConverter {
117         lexed,
118         pos: 0,
119         id_alloc: TokenIdAlloc {
120             map: Default::default(),
121             global_offset: TextSize::default(),
122             next_id: 0,
123         },
124     };
125 
126     let subtree = convert_tokens(&mut conv);
127     Some((subtree, conv.id_alloc.map))
128 }
129 
130 /// Split token tree with separate expr: $($e:expr)SEP*
parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree>131 pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
132     if tt.token_trees.is_empty() {
133         return Vec::new();
134     }
135 
136     let mut iter = TtIter::new(tt);
137     let mut res = Vec::new();
138 
139     while iter.peek_n(0).is_some() {
140         let expanded = iter.expect_fragment(parser::PrefixEntryPoint::Expr);
141 
142         res.push(match expanded.value {
143             None => break,
144             Some(tt @ tt::TokenTree::Leaf(_)) => {
145                 tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![tt] }
146             }
147             Some(tt::TokenTree::Subtree(tt)) => tt,
148         });
149 
150         let mut fork = iter.clone();
151         if fork.expect_char(sep).is_err() {
152             break;
153         }
154         iter = fork;
155     }
156 
157     if iter.peek_n(0).is_some() {
158         res.push(tt::Subtree {
159             delimiter: tt::Delimiter::unspecified(),
160             token_trees: iter.cloned().collect(),
161         });
162     }
163 
164     res
165 }
166 
convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree167 fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree {
168     struct StackEntry {
169         subtree: tt::Subtree,
170         idx: usize,
171         open_range: TextRange,
172     }
173 
174     let entry = StackEntry {
175         subtree: tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![] },
176         // never used (delimiter is `None`)
177         idx: !0,
178         open_range: TextRange::empty(TextSize::of('.')),
179     };
180     let mut stack = NonEmptyVec::new(entry);
181 
182     loop {
183         let StackEntry { subtree, .. } = stack.last_mut();
184         let result = &mut subtree.token_trees;
185         let (token, range) = match conv.bump() {
186             Some(it) => it,
187             None => break,
188         };
189         let synth_id = token.synthetic_id(conv);
190 
191         let kind = token.kind(conv);
192         if kind == COMMENT {
193             // Since `convert_doc_comment` can fail, we need to peek the next id, so that we can
194             // figure out which token id to use for the doc comment, if it is converted successfully.
195             let next_id = conv.id_alloc().peek_next_id();
196             if let Some(tokens) = conv.convert_doc_comment(&token, next_id) {
197                 let id = conv.id_alloc().alloc(range, synth_id);
198                 debug_assert_eq!(id, next_id);
199                 result.extend(tokens);
200             }
201             continue;
202         }
203         let tt = if kind.is_punct() && kind != UNDERSCORE {
204             if synth_id.is_none() {
205                 assert_eq!(range.len(), TextSize::of('.'));
206             }
207 
208             let expected = match subtree.delimiter.kind {
209                 tt::DelimiterKind::Parenthesis => Some(T![')']),
210                 tt::DelimiterKind::Brace => Some(T!['}']),
211                 tt::DelimiterKind::Bracket => Some(T![']']),
212                 tt::DelimiterKind::Invisible => None,
213             };
214 
215             if let Some(expected) = expected {
216                 if kind == expected {
217                     if let Some(entry) = stack.pop() {
218                         conv.id_alloc().close_delim(entry.idx, Some(range));
219                         stack.last_mut().subtree.token_trees.push(entry.subtree.into());
220                     }
221                     continue;
222                 }
223             }
224 
225             let delim = match kind {
226                 T!['('] => Some(tt::DelimiterKind::Parenthesis),
227                 T!['{'] => Some(tt::DelimiterKind::Brace),
228                 T!['['] => Some(tt::DelimiterKind::Bracket),
229                 _ => None,
230             };
231 
232             if let Some(kind) = delim {
233                 let (id, idx) = conv.id_alloc().open_delim(range, synth_id);
234                 let subtree = tt::Subtree {
235                     delimiter: tt::Delimiter { open: id, close: tt::TokenId::UNSPECIFIED, kind },
236                     token_trees: vec![],
237                 };
238                 stack.push(StackEntry { subtree, idx, open_range: range });
239                 continue;
240             }
241 
242             let spacing = match conv.peek().map(|next| next.kind(conv)) {
243                 Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
244                 _ => tt::Spacing::Alone,
245             };
246             let char = match token.to_char(conv) {
247                 Some(c) => c,
248                 None => {
249                     panic!("Token from lexer must be single char: token = {token:#?}");
250                 }
251             };
252             tt::Leaf::from(tt::Punct {
253                 char,
254                 spacing,
255                 span: conv.id_alloc().alloc(range, synth_id),
256             })
257             .into()
258         } else {
259             macro_rules! make_leaf {
260                 ($i:ident) => {
261                     tt::$i {
262                         span: conv.id_alloc().alloc(range, synth_id),
263                         text: token.to_text(conv),
264                     }
265                     .into()
266                 };
267             }
268             let leaf: tt::Leaf = match kind {
269                 T![true] | T![false] => make_leaf!(Ident),
270                 IDENT => make_leaf!(Ident),
271                 UNDERSCORE => make_leaf!(Ident),
272                 k if k.is_keyword() => make_leaf!(Ident),
273                 k if k.is_literal() => make_leaf!(Literal),
274                 LIFETIME_IDENT => {
275                     let char_unit = TextSize::of('\'');
276                     let r = TextRange::at(range.start(), char_unit);
277                     let apostrophe = tt::Leaf::from(tt::Punct {
278                         char: '\'',
279                         spacing: tt::Spacing::Joint,
280                         span: conv.id_alloc().alloc(r, synth_id),
281                     });
282                     result.push(apostrophe.into());
283 
284                     let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
285                     let ident = tt::Leaf::from(tt::Ident {
286                         text: SmolStr::new(&token.to_text(conv)[1..]),
287                         span: conv.id_alloc().alloc(r, synth_id),
288                     });
289                     result.push(ident.into());
290                     continue;
291                 }
292                 _ => continue,
293             };
294 
295             leaf.into()
296         };
297         result.push(tt);
298     }
299 
300     // If we get here, we've consumed all input tokens.
301     // We might have more than one subtree in the stack, if the delimiters are improperly balanced.
302     // Merge them so we're left with one.
303     while let Some(entry) = stack.pop() {
304         let parent = stack.last_mut();
305 
306         conv.id_alloc().close_delim(entry.idx, None);
307         let leaf: tt::Leaf = tt::Punct {
308             span: conv.id_alloc().alloc(entry.open_range, None),
309             char: match entry.subtree.delimiter.kind {
310                 tt::DelimiterKind::Parenthesis => '(',
311                 tt::DelimiterKind::Brace => '{',
312                 tt::DelimiterKind::Bracket => '[',
313                 tt::DelimiterKind::Invisible => '$',
314             },
315             spacing: tt::Spacing::Alone,
316         }
317         .into();
318         parent.subtree.token_trees.push(leaf.into());
319         parent.subtree.token_trees.extend(entry.subtree.token_trees);
320     }
321 
322     let subtree = stack.into_last().subtree;
323     if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees {
324         first.clone()
325     } else {
326         subtree
327     }
328 }
329 
is_single_token_op(kind: SyntaxKind) -> bool330 fn is_single_token_op(kind: SyntaxKind) -> bool {
331     matches!(
332         kind,
333         EQ | L_ANGLE
334             | R_ANGLE
335             | BANG
336             | AMP
337             | PIPE
338             | TILDE
339             | AT
340             | DOT
341             | COMMA
342             | SEMICOLON
343             | COLON
344             | POUND
345             | DOLLAR
346             | QUESTION
347             | PLUS
348             | MINUS
349             | STAR
350             | SLASH
351             | PERCENT
352             | CARET
353             // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
354             // identifier.
355             | LIFETIME_IDENT
356     )
357 }
358 
359 /// Returns the textual content of a doc comment block as a quoted string
360 /// That is, strips leading `///` (or `/**`, etc)
361 /// and strips the ending `*/`
362 /// And then quote the string, which is needed to convert to `tt::Literal`
doc_comment_text(comment: &ast::Comment) -> SmolStr363 fn doc_comment_text(comment: &ast::Comment) -> SmolStr {
364     let prefix_len = comment.prefix().len();
365     let mut text = &comment.text()[prefix_len..];
366 
367     // Remove ending "*/"
368     if comment.kind().shape == ast::CommentShape::Block {
369         text = &text[0..text.len() - 2];
370     }
371 
372     // Quote the string
373     // Note that `tt::Literal` expect an escaped string
374     let text = format!("\"{}\"", text.escape_debug());
375     text.into()
376 }
377 
convert_doc_comment( token: &syntax::SyntaxToken, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>378 fn convert_doc_comment(
379     token: &syntax::SyntaxToken,
380     span: tt::TokenId,
381 ) -> Option<Vec<tt::TokenTree>> {
382     cov_mark::hit!(test_meta_doc_comments);
383     let comment = ast::Comment::cast(token.clone())?;
384     let doc = comment.kind().doc?;
385 
386     // Make `doc="\" Comments\""
387     let meta_tkns =
388         vec![mk_ident("doc", span), mk_punct('=', span), mk_doc_literal(&comment, span)];
389 
390     // Make `#![]`
391     let mut token_trees = Vec::with_capacity(3);
392     token_trees.push(mk_punct('#', span));
393     if let ast::CommentPlacement::Inner = doc {
394         token_trees.push(mk_punct('!', span));
395     }
396     token_trees.push(tt::TokenTree::from(tt::Subtree {
397         delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket },
398         token_trees: meta_tkns,
399     }));
400 
401     return Some(token_trees);
402 
403     // Helper functions
404     fn mk_ident(s: &str, span: tt::TokenId) -> tt::TokenTree {
405         tt::TokenTree::from(tt::Leaf::from(tt::Ident { text: s.into(), span }))
406     }
407 
408     fn mk_punct(c: char, span: tt::TokenId) -> tt::TokenTree {
409         tt::TokenTree::from(tt::Leaf::from(tt::Punct {
410             char: c,
411             spacing: tt::Spacing::Alone,
412             span,
413         }))
414     }
415 
416     fn mk_doc_literal(comment: &ast::Comment, span: tt::TokenId) -> tt::TokenTree {
417         let lit = tt::Literal { text: doc_comment_text(comment), span };
418 
419         tt::TokenTree::from(tt::Leaf::from(lit))
420     }
421 }
422 
423 struct TokenIdAlloc {
424     map: TokenMap,
425     global_offset: TextSize,
426     next_id: u32,
427 }
428 
429 impl TokenIdAlloc {
alloc( &mut self, absolute_range: TextRange, synthetic_id: Option<SyntheticTokenId>, ) -> tt::TokenId430     fn alloc(
431         &mut self,
432         absolute_range: TextRange,
433         synthetic_id: Option<SyntheticTokenId>,
434     ) -> tt::TokenId {
435         let relative_range = absolute_range - self.global_offset;
436         let token_id = tt::TokenId(self.next_id);
437         self.next_id += 1;
438         self.map.insert(token_id, relative_range);
439         if let Some(id) = synthetic_id {
440             self.map.insert_synthetic(token_id, id);
441         }
442         token_id
443     }
444 
open_delim( &mut self, open_abs_range: TextRange, synthetic_id: Option<SyntheticTokenId>, ) -> (tt::TokenId, usize)445     fn open_delim(
446         &mut self,
447         open_abs_range: TextRange,
448         synthetic_id: Option<SyntheticTokenId>,
449     ) -> (tt::TokenId, usize) {
450         let token_id = tt::TokenId(self.next_id);
451         self.next_id += 1;
452         let idx = self.map.insert_delim(
453             token_id,
454             open_abs_range - self.global_offset,
455             open_abs_range - self.global_offset,
456         );
457         if let Some(id) = synthetic_id {
458             self.map.insert_synthetic(token_id, id);
459         }
460         (token_id, idx)
461     }
462 
close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>)463     fn close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>) {
464         match close_abs_range {
465             None => {
466                 self.map.remove_delim(idx);
467             }
468             Some(close) => {
469                 self.map.update_close_delim(idx, close - self.global_offset);
470             }
471         }
472     }
473 
peek_next_id(&self) -> tt::TokenId474     fn peek_next_id(&self) -> tt::TokenId {
475         tt::TokenId(self.next_id)
476     }
477 }
478 
479 /// A raw token (straight from lexer) converter
480 struct RawConverter<'a> {
481     lexed: parser::LexedStr<'a>,
482     pos: usize,
483     id_alloc: TokenIdAlloc,
484 }
485 
486 trait SrcToken<Ctx>: std::fmt::Debug {
kind(&self, ctx: &Ctx) -> SyntaxKind487     fn kind(&self, ctx: &Ctx) -> SyntaxKind;
488 
to_char(&self, ctx: &Ctx) -> Option<char>489     fn to_char(&self, ctx: &Ctx) -> Option<char>;
490 
to_text(&self, ctx: &Ctx) -> SmolStr491     fn to_text(&self, ctx: &Ctx) -> SmolStr;
492 
synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>493     fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
494 }
495 
496 trait TokenConverter: Sized {
497     type Token: SrcToken<Self>;
498 
convert_doc_comment( &self, token: &Self::Token, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>499     fn convert_doc_comment(
500         &self,
501         token: &Self::Token,
502         span: tt::TokenId,
503     ) -> Option<Vec<tt::TokenTree>>;
504 
bump(&mut self) -> Option<(Self::Token, TextRange)>505     fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
506 
peek(&self) -> Option<Self::Token>507     fn peek(&self) -> Option<Self::Token>;
508 
id_alloc(&mut self) -> &mut TokenIdAlloc509     fn id_alloc(&mut self) -> &mut TokenIdAlloc;
510 }
511 
512 impl<'a> SrcToken<RawConverter<'a>> for usize {
kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind513     fn kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind {
514         ctx.lexed.kind(*self)
515     }
516 
to_char(&self, ctx: &RawConverter<'a>) -> Option<char>517     fn to_char(&self, ctx: &RawConverter<'a>) -> Option<char> {
518         ctx.lexed.text(*self).chars().next()
519     }
520 
to_text(&self, ctx: &RawConverter<'_>) -> SmolStr521     fn to_text(&self, ctx: &RawConverter<'_>) -> SmolStr {
522         ctx.lexed.text(*self).into()
523     }
524 
synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId>525     fn synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId> {
526         None
527     }
528 }
529 
530 impl<'a> TokenConverter for RawConverter<'a> {
531     type Token = usize;
532 
convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>>533     fn convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>> {
534         let text = self.lexed.text(token);
535         convert_doc_comment(&doc_comment(text), span)
536     }
537 
bump(&mut self) -> Option<(Self::Token, TextRange)>538     fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
539         if self.pos == self.lexed.len() {
540             return None;
541         }
542         let token = self.pos;
543         self.pos += 1;
544         let range = self.lexed.text_range(token);
545         let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
546 
547         Some((token, range))
548     }
549 
peek(&self) -> Option<Self::Token>550     fn peek(&self) -> Option<Self::Token> {
551         if self.pos == self.lexed.len() {
552             return None;
553         }
554         Some(self.pos)
555     }
556 
id_alloc(&mut self) -> &mut TokenIdAlloc557     fn id_alloc(&mut self) -> &mut TokenIdAlloc {
558         &mut self.id_alloc
559     }
560 }
561 
562 struct Converter {
563     id_alloc: TokenIdAlloc,
564     current: Option<SyntaxToken>,
565     current_synthetic: Vec<SyntheticToken>,
566     preorder: PreorderWithTokens,
567     replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
568     append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
569     range: TextRange,
570     punct_offset: Option<(SyntaxToken, TextSize)>,
571 }
572 
573 impl Converter {
new( node: &SyntaxNode, global_offset: TextSize, existing_token_map: TokenMap, next_id: u32, mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> Converter574     fn new(
575         node: &SyntaxNode,
576         global_offset: TextSize,
577         existing_token_map: TokenMap,
578         next_id: u32,
579         mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
580         mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
581     ) -> Converter {
582         let range = node.text_range();
583         let mut preorder = node.preorder_with_tokens();
584         let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append);
585         Converter {
586             id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } },
587             current: first,
588             current_synthetic: synthetic,
589             preorder,
590             range,
591             replace,
592             append,
593             punct_offset: None,
594         }
595     }
596 
next_token( preorder: &mut PreorderWithTokens, replace: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, append: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> (Option<SyntaxToken>, Vec<SyntheticToken>)597     fn next_token(
598         preorder: &mut PreorderWithTokens,
599         replace: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
600         append: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
601     ) -> (Option<SyntaxToken>, Vec<SyntheticToken>) {
602         while let Some(ev) = preorder.next() {
603             let ele = match ev {
604                 WalkEvent::Enter(ele) => ele,
605                 WalkEvent::Leave(ele) => {
606                     if let Some(mut v) = append.remove(&ele) {
607                         if !v.is_empty() {
608                             v.reverse();
609                             return (None, v);
610                         }
611                     }
612                     continue;
613                 }
614             };
615             if let Some(mut v) = replace.remove(&ele) {
616                 preorder.skip_subtree();
617                 if !v.is_empty() {
618                     v.reverse();
619                     return (None, v);
620                 }
621             }
622             match ele {
623                 SyntaxElement::Token(t) => return (Some(t), Vec::new()),
624                 _ => {}
625             }
626         }
627         (None, Vec::new())
628     }
629 }
630 
631 #[derive(Debug)]
632 enum SynToken {
633     Ordinary(SyntaxToken),
634     // FIXME is this supposed to be `Punct`?
635     Punch(SyntaxToken, TextSize),
636     Synthetic(SyntheticToken),
637 }
638 
639 impl SynToken {
token(&self) -> Option<&SyntaxToken>640     fn token(&self) -> Option<&SyntaxToken> {
641         match self {
642             SynToken::Ordinary(it) | SynToken::Punch(it, _) => Some(it),
643             SynToken::Synthetic(_) => None,
644         }
645     }
646 }
647 
648 impl SrcToken<Converter> for SynToken {
kind(&self, ctx: &Converter) -> SyntaxKind649     fn kind(&self, ctx: &Converter) -> SyntaxKind {
650         match self {
651             SynToken::Ordinary(token) => token.kind(),
652             SynToken::Punch(..) => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(),
653             SynToken::Synthetic(token) => token.kind,
654         }
655     }
to_char(&self, _ctx: &Converter) -> Option<char>656     fn to_char(&self, _ctx: &Converter) -> Option<char> {
657         match self {
658             SynToken::Ordinary(_) => None,
659             SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
660             SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
661             SynToken::Synthetic(_) => None,
662         }
663     }
to_text(&self, _ctx: &Converter) -> SmolStr664     fn to_text(&self, _ctx: &Converter) -> SmolStr {
665         match self {
666             SynToken::Ordinary(token) => token.text().into(),
667             SynToken::Punch(token, _) => token.text().into(),
668             SynToken::Synthetic(token) => token.text.clone(),
669         }
670     }
671 
synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId>672     fn synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId> {
673         match self {
674             SynToken::Synthetic(token) => Some(token.id),
675             _ => None,
676         }
677     }
678 }
679 
680 impl TokenConverter for Converter {
681     type Token = SynToken;
convert_doc_comment( &self, token: &Self::Token, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>682     fn convert_doc_comment(
683         &self,
684         token: &Self::Token,
685         span: tt::TokenId,
686     ) -> Option<Vec<tt::TokenTree>> {
687         convert_doc_comment(token.token()?, span)
688     }
689 
bump(&mut self) -> Option<(Self::Token, TextRange)>690     fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
691         if let Some((punct, offset)) = self.punct_offset.clone() {
692             if usize::from(offset) + 1 < punct.text().len() {
693                 let offset = offset + TextSize::of('.');
694                 let range = punct.text_range();
695                 self.punct_offset = Some((punct.clone(), offset));
696                 let range = TextRange::at(range.start() + offset, TextSize::of('.'));
697                 return Some((SynToken::Punch(punct, offset), range));
698             }
699         }
700 
701         if let Some(synth_token) = self.current_synthetic.pop() {
702             if self.current_synthetic.is_empty() {
703                 let (new_current, new_synth) =
704                     Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
705                 self.current = new_current;
706                 self.current_synthetic = new_synth;
707             }
708             let range = synth_token.range;
709             return Some((SynToken::Synthetic(synth_token), range));
710         }
711 
712         let curr = self.current.clone()?;
713         if !self.range.contains_range(curr.text_range()) {
714             return None;
715         }
716         let (new_current, new_synth) =
717             Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
718         self.current = new_current;
719         self.current_synthetic = new_synth;
720         let token = if curr.kind().is_punct() {
721             self.punct_offset = Some((curr.clone(), 0.into()));
722             let range = curr.text_range();
723             let range = TextRange::at(range.start(), TextSize::of('.'));
724             (SynToken::Punch(curr, 0.into()), range)
725         } else {
726             self.punct_offset = None;
727             let range = curr.text_range();
728             (SynToken::Ordinary(curr), range)
729         };
730 
731         Some(token)
732     }
733 
peek(&self) -> Option<Self::Token>734     fn peek(&self) -> Option<Self::Token> {
735         if let Some((punct, mut offset)) = self.punct_offset.clone() {
736             offset += TextSize::of('.');
737             if usize::from(offset) < punct.text().len() {
738                 return Some(SynToken::Punch(punct, offset));
739             }
740         }
741 
742         if let Some(synth_token) = self.current_synthetic.last() {
743             return Some(SynToken::Synthetic(synth_token.clone()));
744         }
745 
746         let curr = self.current.clone()?;
747         if !self.range.contains_range(curr.text_range()) {
748             return None;
749         }
750 
751         let token = if curr.kind().is_punct() {
752             SynToken::Punch(curr, 0.into())
753         } else {
754             SynToken::Ordinary(curr)
755         };
756         Some(token)
757     }
758 
id_alloc(&mut self) -> &mut TokenIdAlloc759     fn id_alloc(&mut self) -> &mut TokenIdAlloc {
760         &mut self.id_alloc
761     }
762 }
763 
764 struct TtTreeSink<'a> {
765     buf: String,
766     cursor: Cursor<'a>,
767     open_delims: FxHashMap<tt::TokenId, TextSize>,
768     text_pos: TextSize,
769     inner: SyntaxTreeBuilder,
770     token_map: TokenMap,
771 }
772 
773 impl<'a> TtTreeSink<'a> {
new(cursor: Cursor<'a>) -> Self774     fn new(cursor: Cursor<'a>) -> Self {
775         TtTreeSink {
776             buf: String::new(),
777             cursor,
778             open_delims: FxHashMap::default(),
779             text_pos: 0.into(),
780             inner: SyntaxTreeBuilder::default(),
781             token_map: TokenMap::default(),
782         }
783     }
784 
finish(mut self) -> (Parse<SyntaxNode>, TokenMap)785     fn finish(mut self) -> (Parse<SyntaxNode>, TokenMap) {
786         self.token_map.shrink_to_fit();
787         (self.inner.finish(), self.token_map)
788     }
789 }
790 
delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str>791 fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
792     let texts = match d {
793         tt::DelimiterKind::Parenthesis => "()",
794         tt::DelimiterKind::Brace => "{}",
795         tt::DelimiterKind::Bracket => "[]",
796         tt::DelimiterKind::Invisible => return None,
797     };
798 
799     let idx = closing as usize;
800     Some(&texts[idx..texts.len() - (1 - idx)])
801 }
802 
803 impl<'a> TtTreeSink<'a> {
804     /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
805     /// This occurs when a float literal is used as a field access.
float_split(&mut self, has_pseudo_dot: bool)806     fn float_split(&mut self, has_pseudo_dot: bool) {
807         let (text, _span) = match self.cursor.token_tree() {
808             Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => {
809                 (lit.text.as_str(), lit.span)
810             }
811             _ => unreachable!(),
812         };
813         match text.split_once('.') {
814             Some((left, right)) => {
815                 assert!(!left.is_empty());
816                 self.inner.start_node(SyntaxKind::NAME_REF);
817                 self.inner.token(SyntaxKind::INT_NUMBER, left);
818                 self.inner.finish_node();
819 
820                 // here we move the exit up, the original exit has been deleted in process
821                 self.inner.finish_node();
822 
823                 self.inner.token(SyntaxKind::DOT, ".");
824 
825                 if has_pseudo_dot {
826                     assert!(right.is_empty(), "{left}.{right}");
827                 } else {
828                     self.inner.start_node(SyntaxKind::NAME_REF);
829                     self.inner.token(SyntaxKind::INT_NUMBER, right);
830                     self.inner.finish_node();
831 
832                     // the parser creates an unbalanced start node, we are required to close it here
833                     self.inner.finish_node();
834                 }
835             }
836             None => unreachable!(),
837         }
838         self.cursor = self.cursor.bump();
839     }
840 
token(&mut self, kind: SyntaxKind, mut n_tokens: u8)841     fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
842         if kind == LIFETIME_IDENT {
843             n_tokens = 2;
844         }
845 
846         let mut last = self.cursor;
847         for _ in 0..n_tokens {
848             let tmp: u8;
849             if self.cursor.eof() {
850                 break;
851             }
852             last = self.cursor;
853             let text: &str = loop {
854                 break match self.cursor.token_tree() {
855                     Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
856                         // Mark the range if needed
857                         let (text, id) = match leaf {
858                             tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.span),
859                             tt::Leaf::Punct(punct) => {
860                                 assert!(punct.char.is_ascii());
861                                 tmp = punct.char as u8;
862                                 (
863                                     std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
864                                     punct.span,
865                                 )
866                             }
867                             tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.span),
868                         };
869                         let range = TextRange::at(self.text_pos, TextSize::of(text));
870                         self.token_map.insert(id, range);
871                         self.cursor = self.cursor.bump();
872                         text
873                     }
874                     Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
875                         self.cursor = self.cursor.subtree().unwrap();
876                         match delim_to_str(subtree.delimiter.kind, false) {
877                             Some(it) => {
878                                 self.open_delims.insert(subtree.delimiter.open, self.text_pos);
879                                 it
880                             }
881                             None => continue,
882                         }
883                     }
884                     None => {
885                         let parent = self.cursor.end().unwrap();
886                         self.cursor = self.cursor.bump();
887                         match delim_to_str(parent.delimiter.kind, true) {
888                             Some(it) => {
889                                 if let Some(open_delim) =
890                                     self.open_delims.get(&parent.delimiter.open)
891                                 {
892                                     let open_range = TextRange::at(*open_delim, TextSize::of('('));
893                                     let close_range =
894                                         TextRange::at(self.text_pos, TextSize::of('('));
895                                     self.token_map.insert_delim(
896                                         parent.delimiter.open,
897                                         open_range,
898                                         close_range,
899                                     );
900                                 }
901                                 it
902                             }
903                             None => continue,
904                         }
905                     }
906                 };
907             };
908             self.buf += text;
909             self.text_pos += TextSize::of(text);
910         }
911 
912         self.inner.token(kind, self.buf.as_str());
913         self.buf.clear();
914         // Add whitespace between adjoint puncts
915         let next = last.bump();
916         if let (
917             Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)),
918             Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)),
919         ) = (last.token_tree(), next.token_tree())
920         {
921             // Note: We always assume the semi-colon would be the last token in
922             // other parts of RA such that we don't add whitespace here.
923             //
924             // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
925             // need to add whitespace either.
926             if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
927                 self.inner.token(WHITESPACE, " ");
928                 self.text_pos += TextSize::of(' ');
929             }
930         }
931     }
932 
start_node(&mut self, kind: SyntaxKind)933     fn start_node(&mut self, kind: SyntaxKind) {
934         self.inner.start_node(kind);
935     }
936 
finish_node(&mut self)937     fn finish_node(&mut self) {
938         self.inner.finish_node();
939     }
940 
error(&mut self, error: String)941     fn error(&mut self, error: String) {
942         self.inner.error(error, self.text_pos)
943     }
944 }
945