1 //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
2
3 use rustc_hash::FxHashMap;
4 use stdx::{always, non_empty_vec::NonEmptyVec};
5 use syntax::{
6 ast::{self, make::tokens::doc_comment},
7 AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind,
8 SyntaxKind::*,
9 SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T,
10 };
11
12 use crate::{
13 to_parser_input::to_parser_input,
14 tt::{
15 self,
16 buffer::{Cursor, TokenBuffer},
17 },
18 tt_iter::TtIter,
19 TokenMap,
20 };
21
22 #[cfg(test)]
23 mod tests;
24
25 /// Convert the syntax node to a `TokenTree` (what macro
26 /// will consume).
syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap)27 pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
28 let (subtree, token_map, _) = syntax_node_to_token_tree_with_modifications(
29 node,
30 Default::default(),
31 0,
32 Default::default(),
33 Default::default(),
34 );
35 (subtree, token_map)
36 }
37
38 /// Convert the syntax node to a `TokenTree` (what macro will consume)
39 /// with the censored range excluded.
syntax_node_to_token_tree_with_modifications( node: &SyntaxNode, existing_token_map: TokenMap, next_id: u32, replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> (tt::Subtree, TokenMap, u32)40 pub fn syntax_node_to_token_tree_with_modifications(
41 node: &SyntaxNode,
42 existing_token_map: TokenMap,
43 next_id: u32,
44 replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
45 append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
46 ) -> (tt::Subtree, TokenMap, u32) {
47 let global_offset = node.text_range().start();
48 let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append);
49 let subtree = convert_tokens(&mut c);
50 c.id_alloc.map.shrink_to_fit();
51 always!(c.replace.is_empty(), "replace: {:?}", c.replace);
52 always!(c.append.is_empty(), "append: {:?}", c.append);
53 (subtree, c.id_alloc.map, c.id_alloc.next_id)
54 }
55
56 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
57 pub struct SyntheticTokenId(pub u32);
58
59 #[derive(Debug, Clone)]
60 pub struct SyntheticToken {
61 pub kind: SyntaxKind,
62 pub text: SmolStr,
63 pub range: TextRange,
64 pub id: SyntheticTokenId,
65 }
66
67 // The following items are what `rustc` macro can be parsed into :
68 // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
69 // * Expr(P<ast::Expr>) -> token_tree_to_expr
70 // * Pat(P<ast::Pat>) -> token_tree_to_pat
71 // * Ty(P<ast::Ty>) -> token_tree_to_ty
72 // * Stmts(SmallVec<[ast::Stmt; 1]>) -> token_tree_to_stmts
73 // * Items(SmallVec<[P<ast::Item>; 1]>) -> token_tree_to_items
74 //
75 // * TraitItems(SmallVec<[ast::TraitItem; 1]>)
76 // * AssocItems(SmallVec<[ast::AssocItem; 1]>)
77 // * ForeignItems(SmallVec<[ast::ForeignItem; 1]>
78
token_tree_to_syntax_node( tt: &tt::Subtree, entry_point: parser::TopEntryPoint, ) -> (Parse<SyntaxNode>, TokenMap)79 pub fn token_tree_to_syntax_node(
80 tt: &tt::Subtree,
81 entry_point: parser::TopEntryPoint,
82 ) -> (Parse<SyntaxNode>, TokenMap) {
83 let buffer = match tt {
84 tt::Subtree {
85 delimiter: tt::Delimiter { kind: tt::DelimiterKind::Invisible, .. },
86 token_trees,
87 } => TokenBuffer::from_tokens(token_trees.as_slice()),
88 _ => TokenBuffer::from_subtree(tt),
89 };
90 let parser_input = to_parser_input(&buffer);
91 let parser_output = entry_point.parse(&parser_input);
92 let mut tree_sink = TtTreeSink::new(buffer.begin());
93 for event in parser_output.iter() {
94 match event {
95 parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
96 tree_sink.token(kind, n_raw_tokens)
97 }
98 parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
99 tree_sink.float_split(has_pseudo_dot)
100 }
101 parser::Step::Enter { kind } => tree_sink.start_node(kind),
102 parser::Step::Exit => tree_sink.finish_node(),
103 parser::Step::Error { msg } => tree_sink.error(msg.to_string()),
104 }
105 }
106 tree_sink.finish()
107 }
108
109 /// Convert a string to a `TokenTree`
parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)>110 pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
111 let lexed = parser::LexedStr::new(text);
112 if lexed.errors().next().is_some() {
113 return None;
114 }
115
116 let mut conv = RawConverter {
117 lexed,
118 pos: 0,
119 id_alloc: TokenIdAlloc {
120 map: Default::default(),
121 global_offset: TextSize::default(),
122 next_id: 0,
123 },
124 };
125
126 let subtree = convert_tokens(&mut conv);
127 Some((subtree, conv.id_alloc.map))
128 }
129
130 /// Split token tree with separate expr: $($e:expr)SEP*
parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree>131 pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
132 if tt.token_trees.is_empty() {
133 return Vec::new();
134 }
135
136 let mut iter = TtIter::new(tt);
137 let mut res = Vec::new();
138
139 while iter.peek_n(0).is_some() {
140 let expanded = iter.expect_fragment(parser::PrefixEntryPoint::Expr);
141
142 res.push(match expanded.value {
143 None => break,
144 Some(tt @ tt::TokenTree::Leaf(_)) => {
145 tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![tt] }
146 }
147 Some(tt::TokenTree::Subtree(tt)) => tt,
148 });
149
150 let mut fork = iter.clone();
151 if fork.expect_char(sep).is_err() {
152 break;
153 }
154 iter = fork;
155 }
156
157 if iter.peek_n(0).is_some() {
158 res.push(tt::Subtree {
159 delimiter: tt::Delimiter::unspecified(),
160 token_trees: iter.cloned().collect(),
161 });
162 }
163
164 res
165 }
166
convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree167 fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree {
168 struct StackEntry {
169 subtree: tt::Subtree,
170 idx: usize,
171 open_range: TextRange,
172 }
173
174 let entry = StackEntry {
175 subtree: tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![] },
176 // never used (delimiter is `None`)
177 idx: !0,
178 open_range: TextRange::empty(TextSize::of('.')),
179 };
180 let mut stack = NonEmptyVec::new(entry);
181
182 loop {
183 let StackEntry { subtree, .. } = stack.last_mut();
184 let result = &mut subtree.token_trees;
185 let (token, range) = match conv.bump() {
186 Some(it) => it,
187 None => break,
188 };
189 let synth_id = token.synthetic_id(conv);
190
191 let kind = token.kind(conv);
192 if kind == COMMENT {
193 // Since `convert_doc_comment` can fail, we need to peek the next id, so that we can
194 // figure out which token id to use for the doc comment, if it is converted successfully.
195 let next_id = conv.id_alloc().peek_next_id();
196 if let Some(tokens) = conv.convert_doc_comment(&token, next_id) {
197 let id = conv.id_alloc().alloc(range, synth_id);
198 debug_assert_eq!(id, next_id);
199 result.extend(tokens);
200 }
201 continue;
202 }
203 let tt = if kind.is_punct() && kind != UNDERSCORE {
204 if synth_id.is_none() {
205 assert_eq!(range.len(), TextSize::of('.'));
206 }
207
208 let expected = match subtree.delimiter.kind {
209 tt::DelimiterKind::Parenthesis => Some(T![')']),
210 tt::DelimiterKind::Brace => Some(T!['}']),
211 tt::DelimiterKind::Bracket => Some(T![']']),
212 tt::DelimiterKind::Invisible => None,
213 };
214
215 if let Some(expected) = expected {
216 if kind == expected {
217 if let Some(entry) = stack.pop() {
218 conv.id_alloc().close_delim(entry.idx, Some(range));
219 stack.last_mut().subtree.token_trees.push(entry.subtree.into());
220 }
221 continue;
222 }
223 }
224
225 let delim = match kind {
226 T!['('] => Some(tt::DelimiterKind::Parenthesis),
227 T!['{'] => Some(tt::DelimiterKind::Brace),
228 T!['['] => Some(tt::DelimiterKind::Bracket),
229 _ => None,
230 };
231
232 if let Some(kind) = delim {
233 let (id, idx) = conv.id_alloc().open_delim(range, synth_id);
234 let subtree = tt::Subtree {
235 delimiter: tt::Delimiter { open: id, close: tt::TokenId::UNSPECIFIED, kind },
236 token_trees: vec![],
237 };
238 stack.push(StackEntry { subtree, idx, open_range: range });
239 continue;
240 }
241
242 let spacing = match conv.peek().map(|next| next.kind(conv)) {
243 Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
244 _ => tt::Spacing::Alone,
245 };
246 let char = match token.to_char(conv) {
247 Some(c) => c,
248 None => {
249 panic!("Token from lexer must be single char: token = {token:#?}");
250 }
251 };
252 tt::Leaf::from(tt::Punct {
253 char,
254 spacing,
255 span: conv.id_alloc().alloc(range, synth_id),
256 })
257 .into()
258 } else {
259 macro_rules! make_leaf {
260 ($i:ident) => {
261 tt::$i {
262 span: conv.id_alloc().alloc(range, synth_id),
263 text: token.to_text(conv),
264 }
265 .into()
266 };
267 }
268 let leaf: tt::Leaf = match kind {
269 T![true] | T![false] => make_leaf!(Ident),
270 IDENT => make_leaf!(Ident),
271 UNDERSCORE => make_leaf!(Ident),
272 k if k.is_keyword() => make_leaf!(Ident),
273 k if k.is_literal() => make_leaf!(Literal),
274 LIFETIME_IDENT => {
275 let char_unit = TextSize::of('\'');
276 let r = TextRange::at(range.start(), char_unit);
277 let apostrophe = tt::Leaf::from(tt::Punct {
278 char: '\'',
279 spacing: tt::Spacing::Joint,
280 span: conv.id_alloc().alloc(r, synth_id),
281 });
282 result.push(apostrophe.into());
283
284 let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
285 let ident = tt::Leaf::from(tt::Ident {
286 text: SmolStr::new(&token.to_text(conv)[1..]),
287 span: conv.id_alloc().alloc(r, synth_id),
288 });
289 result.push(ident.into());
290 continue;
291 }
292 _ => continue,
293 };
294
295 leaf.into()
296 };
297 result.push(tt);
298 }
299
300 // If we get here, we've consumed all input tokens.
301 // We might have more than one subtree in the stack, if the delimiters are improperly balanced.
302 // Merge them so we're left with one.
303 while let Some(entry) = stack.pop() {
304 let parent = stack.last_mut();
305
306 conv.id_alloc().close_delim(entry.idx, None);
307 let leaf: tt::Leaf = tt::Punct {
308 span: conv.id_alloc().alloc(entry.open_range, None),
309 char: match entry.subtree.delimiter.kind {
310 tt::DelimiterKind::Parenthesis => '(',
311 tt::DelimiterKind::Brace => '{',
312 tt::DelimiterKind::Bracket => '[',
313 tt::DelimiterKind::Invisible => '$',
314 },
315 spacing: tt::Spacing::Alone,
316 }
317 .into();
318 parent.subtree.token_trees.push(leaf.into());
319 parent.subtree.token_trees.extend(entry.subtree.token_trees);
320 }
321
322 let subtree = stack.into_last().subtree;
323 if let [tt::TokenTree::Subtree(first)] = &*subtree.token_trees {
324 first.clone()
325 } else {
326 subtree
327 }
328 }
329
is_single_token_op(kind: SyntaxKind) -> bool330 fn is_single_token_op(kind: SyntaxKind) -> bool {
331 matches!(
332 kind,
333 EQ | L_ANGLE
334 | R_ANGLE
335 | BANG
336 | AMP
337 | PIPE
338 | TILDE
339 | AT
340 | DOT
341 | COMMA
342 | SEMICOLON
343 | COLON
344 | POUND
345 | DOLLAR
346 | QUESTION
347 | PLUS
348 | MINUS
349 | STAR
350 | SLASH
351 | PERCENT
352 | CARET
353 // LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
354 // identifier.
355 | LIFETIME_IDENT
356 )
357 }
358
359 /// Returns the textual content of a doc comment block as a quoted string
360 /// That is, strips leading `///` (or `/**`, etc)
361 /// and strips the ending `*/`
362 /// And then quote the string, which is needed to convert to `tt::Literal`
doc_comment_text(comment: &ast::Comment) -> SmolStr363 fn doc_comment_text(comment: &ast::Comment) -> SmolStr {
364 let prefix_len = comment.prefix().len();
365 let mut text = &comment.text()[prefix_len..];
366
367 // Remove ending "*/"
368 if comment.kind().shape == ast::CommentShape::Block {
369 text = &text[0..text.len() - 2];
370 }
371
372 // Quote the string
373 // Note that `tt::Literal` expect an escaped string
374 let text = format!("\"{}\"", text.escape_debug());
375 text.into()
376 }
377
convert_doc_comment( token: &syntax::SyntaxToken, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>378 fn convert_doc_comment(
379 token: &syntax::SyntaxToken,
380 span: tt::TokenId,
381 ) -> Option<Vec<tt::TokenTree>> {
382 cov_mark::hit!(test_meta_doc_comments);
383 let comment = ast::Comment::cast(token.clone())?;
384 let doc = comment.kind().doc?;
385
386 // Make `doc="\" Comments\""
387 let meta_tkns =
388 vec![mk_ident("doc", span), mk_punct('=', span), mk_doc_literal(&comment, span)];
389
390 // Make `#![]`
391 let mut token_trees = Vec::with_capacity(3);
392 token_trees.push(mk_punct('#', span));
393 if let ast::CommentPlacement::Inner = doc {
394 token_trees.push(mk_punct('!', span));
395 }
396 token_trees.push(tt::TokenTree::from(tt::Subtree {
397 delimiter: tt::Delimiter { open: span, close: span, kind: tt::DelimiterKind::Bracket },
398 token_trees: meta_tkns,
399 }));
400
401 return Some(token_trees);
402
403 // Helper functions
404 fn mk_ident(s: &str, span: tt::TokenId) -> tt::TokenTree {
405 tt::TokenTree::from(tt::Leaf::from(tt::Ident { text: s.into(), span }))
406 }
407
408 fn mk_punct(c: char, span: tt::TokenId) -> tt::TokenTree {
409 tt::TokenTree::from(tt::Leaf::from(tt::Punct {
410 char: c,
411 spacing: tt::Spacing::Alone,
412 span,
413 }))
414 }
415
416 fn mk_doc_literal(comment: &ast::Comment, span: tt::TokenId) -> tt::TokenTree {
417 let lit = tt::Literal { text: doc_comment_text(comment), span };
418
419 tt::TokenTree::from(tt::Leaf::from(lit))
420 }
421 }
422
423 struct TokenIdAlloc {
424 map: TokenMap,
425 global_offset: TextSize,
426 next_id: u32,
427 }
428
429 impl TokenIdAlloc {
alloc( &mut self, absolute_range: TextRange, synthetic_id: Option<SyntheticTokenId>, ) -> tt::TokenId430 fn alloc(
431 &mut self,
432 absolute_range: TextRange,
433 synthetic_id: Option<SyntheticTokenId>,
434 ) -> tt::TokenId {
435 let relative_range = absolute_range - self.global_offset;
436 let token_id = tt::TokenId(self.next_id);
437 self.next_id += 1;
438 self.map.insert(token_id, relative_range);
439 if let Some(id) = synthetic_id {
440 self.map.insert_synthetic(token_id, id);
441 }
442 token_id
443 }
444
open_delim( &mut self, open_abs_range: TextRange, synthetic_id: Option<SyntheticTokenId>, ) -> (tt::TokenId, usize)445 fn open_delim(
446 &mut self,
447 open_abs_range: TextRange,
448 synthetic_id: Option<SyntheticTokenId>,
449 ) -> (tt::TokenId, usize) {
450 let token_id = tt::TokenId(self.next_id);
451 self.next_id += 1;
452 let idx = self.map.insert_delim(
453 token_id,
454 open_abs_range - self.global_offset,
455 open_abs_range - self.global_offset,
456 );
457 if let Some(id) = synthetic_id {
458 self.map.insert_synthetic(token_id, id);
459 }
460 (token_id, idx)
461 }
462
close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>)463 fn close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>) {
464 match close_abs_range {
465 None => {
466 self.map.remove_delim(idx);
467 }
468 Some(close) => {
469 self.map.update_close_delim(idx, close - self.global_offset);
470 }
471 }
472 }
473
peek_next_id(&self) -> tt::TokenId474 fn peek_next_id(&self) -> tt::TokenId {
475 tt::TokenId(self.next_id)
476 }
477 }
478
479 /// A raw token (straight from lexer) converter
480 struct RawConverter<'a> {
481 lexed: parser::LexedStr<'a>,
482 pos: usize,
483 id_alloc: TokenIdAlloc,
484 }
485
486 trait SrcToken<Ctx>: std::fmt::Debug {
kind(&self, ctx: &Ctx) -> SyntaxKind487 fn kind(&self, ctx: &Ctx) -> SyntaxKind;
488
to_char(&self, ctx: &Ctx) -> Option<char>489 fn to_char(&self, ctx: &Ctx) -> Option<char>;
490
to_text(&self, ctx: &Ctx) -> SmolStr491 fn to_text(&self, ctx: &Ctx) -> SmolStr;
492
synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>493 fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
494 }
495
496 trait TokenConverter: Sized {
497 type Token: SrcToken<Self>;
498
convert_doc_comment( &self, token: &Self::Token, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>499 fn convert_doc_comment(
500 &self,
501 token: &Self::Token,
502 span: tt::TokenId,
503 ) -> Option<Vec<tt::TokenTree>>;
504
bump(&mut self) -> Option<(Self::Token, TextRange)>505 fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
506
peek(&self) -> Option<Self::Token>507 fn peek(&self) -> Option<Self::Token>;
508
id_alloc(&mut self) -> &mut TokenIdAlloc509 fn id_alloc(&mut self) -> &mut TokenIdAlloc;
510 }
511
512 impl<'a> SrcToken<RawConverter<'a>> for usize {
kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind513 fn kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind {
514 ctx.lexed.kind(*self)
515 }
516
to_char(&self, ctx: &RawConverter<'a>) -> Option<char>517 fn to_char(&self, ctx: &RawConverter<'a>) -> Option<char> {
518 ctx.lexed.text(*self).chars().next()
519 }
520
to_text(&self, ctx: &RawConverter<'_>) -> SmolStr521 fn to_text(&self, ctx: &RawConverter<'_>) -> SmolStr {
522 ctx.lexed.text(*self).into()
523 }
524
synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId>525 fn synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId> {
526 None
527 }
528 }
529
530 impl<'a> TokenConverter for RawConverter<'a> {
531 type Token = usize;
532
convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>>533 fn convert_doc_comment(&self, &token: &usize, span: tt::TokenId) -> Option<Vec<tt::TokenTree>> {
534 let text = self.lexed.text(token);
535 convert_doc_comment(&doc_comment(text), span)
536 }
537
bump(&mut self) -> Option<(Self::Token, TextRange)>538 fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
539 if self.pos == self.lexed.len() {
540 return None;
541 }
542 let token = self.pos;
543 self.pos += 1;
544 let range = self.lexed.text_range(token);
545 let range = TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
546
547 Some((token, range))
548 }
549
peek(&self) -> Option<Self::Token>550 fn peek(&self) -> Option<Self::Token> {
551 if self.pos == self.lexed.len() {
552 return None;
553 }
554 Some(self.pos)
555 }
556
id_alloc(&mut self) -> &mut TokenIdAlloc557 fn id_alloc(&mut self) -> &mut TokenIdAlloc {
558 &mut self.id_alloc
559 }
560 }
561
562 struct Converter {
563 id_alloc: TokenIdAlloc,
564 current: Option<SyntaxToken>,
565 current_synthetic: Vec<SyntheticToken>,
566 preorder: PreorderWithTokens,
567 replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
568 append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
569 range: TextRange,
570 punct_offset: Option<(SyntaxToken, TextSize)>,
571 }
572
573 impl Converter {
new( node: &SyntaxNode, global_offset: TextSize, existing_token_map: TokenMap, next_id: u32, mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> Converter574 fn new(
575 node: &SyntaxNode,
576 global_offset: TextSize,
577 existing_token_map: TokenMap,
578 next_id: u32,
579 mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
580 mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
581 ) -> Converter {
582 let range = node.text_range();
583 let mut preorder = node.preorder_with_tokens();
584 let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append);
585 Converter {
586 id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } },
587 current: first,
588 current_synthetic: synthetic,
589 preorder,
590 range,
591 replace,
592 append,
593 punct_offset: None,
594 }
595 }
596
next_token( preorder: &mut PreorderWithTokens, replace: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, append: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>, ) -> (Option<SyntaxToken>, Vec<SyntheticToken>)597 fn next_token(
598 preorder: &mut PreorderWithTokens,
599 replace: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
600 append: &mut FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
601 ) -> (Option<SyntaxToken>, Vec<SyntheticToken>) {
602 while let Some(ev) = preorder.next() {
603 let ele = match ev {
604 WalkEvent::Enter(ele) => ele,
605 WalkEvent::Leave(ele) => {
606 if let Some(mut v) = append.remove(&ele) {
607 if !v.is_empty() {
608 v.reverse();
609 return (None, v);
610 }
611 }
612 continue;
613 }
614 };
615 if let Some(mut v) = replace.remove(&ele) {
616 preorder.skip_subtree();
617 if !v.is_empty() {
618 v.reverse();
619 return (None, v);
620 }
621 }
622 match ele {
623 SyntaxElement::Token(t) => return (Some(t), Vec::new()),
624 _ => {}
625 }
626 }
627 (None, Vec::new())
628 }
629 }
630
631 #[derive(Debug)]
632 enum SynToken {
633 Ordinary(SyntaxToken),
634 // FIXME is this supposed to be `Punct`?
635 Punch(SyntaxToken, TextSize),
636 Synthetic(SyntheticToken),
637 }
638
639 impl SynToken {
token(&self) -> Option<&SyntaxToken>640 fn token(&self) -> Option<&SyntaxToken> {
641 match self {
642 SynToken::Ordinary(it) | SynToken::Punch(it, _) => Some(it),
643 SynToken::Synthetic(_) => None,
644 }
645 }
646 }
647
648 impl SrcToken<Converter> for SynToken {
kind(&self, ctx: &Converter) -> SyntaxKind649 fn kind(&self, ctx: &Converter) -> SyntaxKind {
650 match self {
651 SynToken::Ordinary(token) => token.kind(),
652 SynToken::Punch(..) => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(),
653 SynToken::Synthetic(token) => token.kind,
654 }
655 }
to_char(&self, _ctx: &Converter) -> Option<char>656 fn to_char(&self, _ctx: &Converter) -> Option<char> {
657 match self {
658 SynToken::Ordinary(_) => None,
659 SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
660 SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
661 SynToken::Synthetic(_) => None,
662 }
663 }
to_text(&self, _ctx: &Converter) -> SmolStr664 fn to_text(&self, _ctx: &Converter) -> SmolStr {
665 match self {
666 SynToken::Ordinary(token) => token.text().into(),
667 SynToken::Punch(token, _) => token.text().into(),
668 SynToken::Synthetic(token) => token.text.clone(),
669 }
670 }
671
synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId>672 fn synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId> {
673 match self {
674 SynToken::Synthetic(token) => Some(token.id),
675 _ => None,
676 }
677 }
678 }
679
680 impl TokenConverter for Converter {
681 type Token = SynToken;
convert_doc_comment( &self, token: &Self::Token, span: tt::TokenId, ) -> Option<Vec<tt::TokenTree>>682 fn convert_doc_comment(
683 &self,
684 token: &Self::Token,
685 span: tt::TokenId,
686 ) -> Option<Vec<tt::TokenTree>> {
687 convert_doc_comment(token.token()?, span)
688 }
689
bump(&mut self) -> Option<(Self::Token, TextRange)>690 fn bump(&mut self) -> Option<(Self::Token, TextRange)> {
691 if let Some((punct, offset)) = self.punct_offset.clone() {
692 if usize::from(offset) + 1 < punct.text().len() {
693 let offset = offset + TextSize::of('.');
694 let range = punct.text_range();
695 self.punct_offset = Some((punct.clone(), offset));
696 let range = TextRange::at(range.start() + offset, TextSize::of('.'));
697 return Some((SynToken::Punch(punct, offset), range));
698 }
699 }
700
701 if let Some(synth_token) = self.current_synthetic.pop() {
702 if self.current_synthetic.is_empty() {
703 let (new_current, new_synth) =
704 Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
705 self.current = new_current;
706 self.current_synthetic = new_synth;
707 }
708 let range = synth_token.range;
709 return Some((SynToken::Synthetic(synth_token), range));
710 }
711
712 let curr = self.current.clone()?;
713 if !self.range.contains_range(curr.text_range()) {
714 return None;
715 }
716 let (new_current, new_synth) =
717 Self::next_token(&mut self.preorder, &mut self.replace, &mut self.append);
718 self.current = new_current;
719 self.current_synthetic = new_synth;
720 let token = if curr.kind().is_punct() {
721 self.punct_offset = Some((curr.clone(), 0.into()));
722 let range = curr.text_range();
723 let range = TextRange::at(range.start(), TextSize::of('.'));
724 (SynToken::Punch(curr, 0.into()), range)
725 } else {
726 self.punct_offset = None;
727 let range = curr.text_range();
728 (SynToken::Ordinary(curr), range)
729 };
730
731 Some(token)
732 }
733
peek(&self) -> Option<Self::Token>734 fn peek(&self) -> Option<Self::Token> {
735 if let Some((punct, mut offset)) = self.punct_offset.clone() {
736 offset += TextSize::of('.');
737 if usize::from(offset) < punct.text().len() {
738 return Some(SynToken::Punch(punct, offset));
739 }
740 }
741
742 if let Some(synth_token) = self.current_synthetic.last() {
743 return Some(SynToken::Synthetic(synth_token.clone()));
744 }
745
746 let curr = self.current.clone()?;
747 if !self.range.contains_range(curr.text_range()) {
748 return None;
749 }
750
751 let token = if curr.kind().is_punct() {
752 SynToken::Punch(curr, 0.into())
753 } else {
754 SynToken::Ordinary(curr)
755 };
756 Some(token)
757 }
758
id_alloc(&mut self) -> &mut TokenIdAlloc759 fn id_alloc(&mut self) -> &mut TokenIdAlloc {
760 &mut self.id_alloc
761 }
762 }
763
764 struct TtTreeSink<'a> {
765 buf: String,
766 cursor: Cursor<'a>,
767 open_delims: FxHashMap<tt::TokenId, TextSize>,
768 text_pos: TextSize,
769 inner: SyntaxTreeBuilder,
770 token_map: TokenMap,
771 }
772
773 impl<'a> TtTreeSink<'a> {
new(cursor: Cursor<'a>) -> Self774 fn new(cursor: Cursor<'a>) -> Self {
775 TtTreeSink {
776 buf: String::new(),
777 cursor,
778 open_delims: FxHashMap::default(),
779 text_pos: 0.into(),
780 inner: SyntaxTreeBuilder::default(),
781 token_map: TokenMap::default(),
782 }
783 }
784
finish(mut self) -> (Parse<SyntaxNode>, TokenMap)785 fn finish(mut self) -> (Parse<SyntaxNode>, TokenMap) {
786 self.token_map.shrink_to_fit();
787 (self.inner.finish(), self.token_map)
788 }
789 }
790
delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str>791 fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
792 let texts = match d {
793 tt::DelimiterKind::Parenthesis => "()",
794 tt::DelimiterKind::Brace => "{}",
795 tt::DelimiterKind::Bracket => "[]",
796 tt::DelimiterKind::Invisible => return None,
797 };
798
799 let idx = closing as usize;
800 Some(&texts[idx..texts.len() - (1 - idx)])
801 }
802
803 impl<'a> TtTreeSink<'a> {
804 /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
805 /// This occurs when a float literal is used as a field access.
float_split(&mut self, has_pseudo_dot: bool)806 fn float_split(&mut self, has_pseudo_dot: bool) {
807 let (text, _span) = match self.cursor.token_tree() {
808 Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => {
809 (lit.text.as_str(), lit.span)
810 }
811 _ => unreachable!(),
812 };
813 match text.split_once('.') {
814 Some((left, right)) => {
815 assert!(!left.is_empty());
816 self.inner.start_node(SyntaxKind::NAME_REF);
817 self.inner.token(SyntaxKind::INT_NUMBER, left);
818 self.inner.finish_node();
819
820 // here we move the exit up, the original exit has been deleted in process
821 self.inner.finish_node();
822
823 self.inner.token(SyntaxKind::DOT, ".");
824
825 if has_pseudo_dot {
826 assert!(right.is_empty(), "{left}.{right}");
827 } else {
828 self.inner.start_node(SyntaxKind::NAME_REF);
829 self.inner.token(SyntaxKind::INT_NUMBER, right);
830 self.inner.finish_node();
831
832 // the parser creates an unbalanced start node, we are required to close it here
833 self.inner.finish_node();
834 }
835 }
836 None => unreachable!(),
837 }
838 self.cursor = self.cursor.bump();
839 }
840
token(&mut self, kind: SyntaxKind, mut n_tokens: u8)841 fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
842 if kind == LIFETIME_IDENT {
843 n_tokens = 2;
844 }
845
846 let mut last = self.cursor;
847 for _ in 0..n_tokens {
848 let tmp: u8;
849 if self.cursor.eof() {
850 break;
851 }
852 last = self.cursor;
853 let text: &str = loop {
854 break match self.cursor.token_tree() {
855 Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
856 // Mark the range if needed
857 let (text, id) = match leaf {
858 tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.span),
859 tt::Leaf::Punct(punct) => {
860 assert!(punct.char.is_ascii());
861 tmp = punct.char as u8;
862 (
863 std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(),
864 punct.span,
865 )
866 }
867 tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.span),
868 };
869 let range = TextRange::at(self.text_pos, TextSize::of(text));
870 self.token_map.insert(id, range);
871 self.cursor = self.cursor.bump();
872 text
873 }
874 Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
875 self.cursor = self.cursor.subtree().unwrap();
876 match delim_to_str(subtree.delimiter.kind, false) {
877 Some(it) => {
878 self.open_delims.insert(subtree.delimiter.open, self.text_pos);
879 it
880 }
881 None => continue,
882 }
883 }
884 None => {
885 let parent = self.cursor.end().unwrap();
886 self.cursor = self.cursor.bump();
887 match delim_to_str(parent.delimiter.kind, true) {
888 Some(it) => {
889 if let Some(open_delim) =
890 self.open_delims.get(&parent.delimiter.open)
891 {
892 let open_range = TextRange::at(*open_delim, TextSize::of('('));
893 let close_range =
894 TextRange::at(self.text_pos, TextSize::of('('));
895 self.token_map.insert_delim(
896 parent.delimiter.open,
897 open_range,
898 close_range,
899 );
900 }
901 it
902 }
903 None => continue,
904 }
905 }
906 };
907 };
908 self.buf += text;
909 self.text_pos += TextSize::of(text);
910 }
911
912 self.inner.token(kind, self.buf.as_str());
913 self.buf.clear();
914 // Add whitespace between adjoint puncts
915 let next = last.bump();
916 if let (
917 Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)),
918 Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)),
919 ) = (last.token_tree(), next.token_tree())
920 {
921 // Note: We always assume the semi-colon would be the last token in
922 // other parts of RA such that we don't add whitespace here.
923 //
924 // When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
925 // need to add whitespace either.
926 if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
927 self.inner.token(WHITESPACE, " ");
928 self.text_pos += TextSize::of(' ');
929 }
930 }
931 }
932
start_node(&mut self, kind: SyntaxKind)933 fn start_node(&mut self, kind: SyntaxKind) {
934 self.inner.start_node(kind);
935 }
936
finish_node(&mut self)937 fn finish_node(&mut self) {
938 self.inner.finish_node();
939 }
940
error(&mut self, error: String)941 fn error(&mut self, error: String) {
942 self.inner.error(error, self.text_pos)
943 }
944 }
945