• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Shortcuts that span lexer/parser abstraction.
2 //!
3 //! The way Rust works, parser doesn't necessary parse text, and you might
4 //! tokenize text without parsing it further. So, it makes sense to keep
5 //! abstract token parsing, and string tokenization as completely separate
6 //! layers.
7 //!
8 //! However, often you do pares text into syntax trees and the glue code for
9 //! that needs to live somewhere. Rather than putting it to lexer or parser, we
10 //! use a separate shortcuts module for that.
11 
12 use std::mem;
13 
14 use crate::{
15     LexedStr, Step,
16     SyntaxKind::{self, *},
17 };
18 
19 #[derive(Debug)]
20 pub enum StrStep<'a> {
21     Token { kind: SyntaxKind, text: &'a str },
22     Enter { kind: SyntaxKind },
23     Exit,
24     Error { msg: &'a str, pos: usize },
25 }
26 
27 impl<'a> LexedStr<'a> {
to_input(&self) -> crate::Input28     pub fn to_input(&self) -> crate::Input {
29         let mut res = crate::Input::default();
30         let mut was_joint = false;
31         for i in 0..self.len() {
32             let kind = self.kind(i);
33             if kind.is_trivia() {
34                 was_joint = false
35             } else {
36                 if kind == SyntaxKind::IDENT {
37                     let token_text = self.text(i);
38                     let contextual_kw = SyntaxKind::from_contextual_keyword(token_text)
39                         .unwrap_or(SyntaxKind::IDENT);
40                     res.push_ident(contextual_kw);
41                 } else {
42                     if was_joint {
43                         res.was_joint();
44                     }
45                     res.push(kind);
46                     // Tag the token as joint if it is float with a fractional part
47                     // we use this jointness to inform the parser about what token split
48                     // event to emit when we encounter a float literal in a field access
49                     if kind == SyntaxKind::FLOAT_NUMBER && !self.text(i).ends_with('.') {
50                         res.was_joint();
51                     }
52                 }
53 
54                 was_joint = true;
55             }
56         }
57         res
58     }
59 
60     /// NB: only valid to call with Output from Reparser/TopLevelEntry.
intersperse_trivia( &self, output: &crate::Output, sink: &mut dyn FnMut(StrStep<'_>), ) -> bool61     pub fn intersperse_trivia(
62         &self,
63         output: &crate::Output,
64         sink: &mut dyn FnMut(StrStep<'_>),
65     ) -> bool {
66         let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink };
67 
68         for event in output.iter() {
69             match event {
70                 Step::Token { kind, n_input_tokens: n_raw_tokens } => {
71                     builder.token(kind, n_raw_tokens)
72                 }
73                 Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
74                     builder.float_split(has_pseudo_dot)
75                 }
76                 Step::Enter { kind } => builder.enter(kind),
77                 Step::Exit => builder.exit(),
78                 Step::Error { msg } => {
79                     let text_pos = builder.lexed.text_start(builder.pos);
80                     (builder.sink)(StrStep::Error { msg, pos: text_pos });
81                 }
82             }
83         }
84 
85         match mem::replace(&mut builder.state, State::Normal) {
86             State::PendingExit => {
87                 builder.eat_trivias();
88                 (builder.sink)(StrStep::Exit);
89             }
90             State::PendingEnter | State::Normal => unreachable!(),
91         }
92 
93         // is_eof?
94         builder.pos == builder.lexed.len()
95     }
96 }
97 
98 struct Builder<'a, 'b> {
99     lexed: &'a LexedStr<'a>,
100     pos: usize,
101     state: State,
102     sink: &'b mut dyn FnMut(StrStep<'_>),
103 }
104 
105 enum State {
106     PendingEnter,
107     Normal,
108     PendingExit,
109 }
110 
111 impl Builder<'_, '_> {
token(&mut self, kind: SyntaxKind, n_tokens: u8)112     fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
113         match mem::replace(&mut self.state, State::Normal) {
114             State::PendingEnter => unreachable!(),
115             State::PendingExit => (self.sink)(StrStep::Exit),
116             State::Normal => (),
117         }
118         self.eat_trivias();
119         self.do_token(kind, n_tokens as usize);
120     }
121 
float_split(&mut self, has_pseudo_dot: bool)122     fn float_split(&mut self, has_pseudo_dot: bool) {
123         match mem::replace(&mut self.state, State::Normal) {
124             State::PendingEnter => unreachable!(),
125             State::PendingExit => (self.sink)(StrStep::Exit),
126             State::Normal => (),
127         }
128         self.eat_trivias();
129         self.do_float_split(has_pseudo_dot);
130     }
131 
enter(&mut self, kind: SyntaxKind)132     fn enter(&mut self, kind: SyntaxKind) {
133         match mem::replace(&mut self.state, State::Normal) {
134             State::PendingEnter => {
135                 (self.sink)(StrStep::Enter { kind });
136                 // No need to attach trivias to previous node: there is no
137                 // previous node.
138                 return;
139             }
140             State::PendingExit => (self.sink)(StrStep::Exit),
141             State::Normal => (),
142         }
143 
144         let n_trivias =
145             (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
146         let leading_trivias = self.pos..self.pos + n_trivias;
147         let n_attached_trivias = n_attached_trivias(
148             kind,
149             leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
150         );
151         self.eat_n_trivias(n_trivias - n_attached_trivias);
152         (self.sink)(StrStep::Enter { kind });
153         self.eat_n_trivias(n_attached_trivias);
154     }
155 
exit(&mut self)156     fn exit(&mut self) {
157         match mem::replace(&mut self.state, State::PendingExit) {
158             State::PendingEnter => unreachable!(),
159             State::PendingExit => (self.sink)(StrStep::Exit),
160             State::Normal => (),
161         }
162     }
163 
eat_trivias(&mut self)164     fn eat_trivias(&mut self) {
165         while self.pos < self.lexed.len() {
166             let kind = self.lexed.kind(self.pos);
167             if !kind.is_trivia() {
168                 break;
169             }
170             self.do_token(kind, 1);
171         }
172     }
173 
eat_n_trivias(&mut self, n: usize)174     fn eat_n_trivias(&mut self, n: usize) {
175         for _ in 0..n {
176             let kind = self.lexed.kind(self.pos);
177             assert!(kind.is_trivia());
178             self.do_token(kind, 1);
179         }
180     }
181 
do_token(&mut self, kind: SyntaxKind, n_tokens: usize)182     fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
183         let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
184         self.pos += n_tokens;
185         (self.sink)(StrStep::Token { kind, text });
186     }
187 
do_float_split(&mut self, has_pseudo_dot: bool)188     fn do_float_split(&mut self, has_pseudo_dot: bool) {
189         let text = &self.lexed.range_text(self.pos..self.pos + 1);
190         self.pos += 1;
191         match text.split_once('.') {
192             Some((left, right)) => {
193                 assert!(!left.is_empty());
194                 (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
195                 (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
196                 (self.sink)(StrStep::Exit);
197 
198                 // here we move the exit up, the original exit has been deleted in process
199                 (self.sink)(StrStep::Exit);
200 
201                 (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
202 
203                 if has_pseudo_dot {
204                     assert!(right.is_empty(), "{left}.{right}");
205                     self.state = State::Normal;
206                 } else {
207                     (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
208                     (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
209                     (self.sink)(StrStep::Exit);
210 
211                     // the parser creates an unbalanced start node, we are required to close it here
212                     self.state = State::PendingExit;
213                 }
214             }
215             None => unreachable!(),
216         }
217     }
218 }
219 
n_attached_trivias<'a>( kind: SyntaxKind, trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, ) -> usize220 fn n_attached_trivias<'a>(
221     kind: SyntaxKind,
222     trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
223 ) -> usize {
224     match kind {
225         CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD
226         | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT => {
227             let mut res = 0;
228             let mut trivias = trivias.enumerate().peekable();
229 
230             while let Some((i, (kind, text))) = trivias.next() {
231                 match kind {
232                     WHITESPACE if text.contains("\n\n") => {
233                         // we check whether the next token is a doc-comment
234                         // and skip the whitespace in this case
235                         if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) {
236                             if is_outer(peek_text) {
237                                 continue;
238                             }
239                         }
240                         break;
241                     }
242                     COMMENT => {
243                         if is_inner(text) {
244                             break;
245                         }
246                         res = i + 1;
247                     }
248                     _ => (),
249                 }
250             }
251             res
252         }
253         _ => 0,
254     }
255 }
256 
is_outer(text: &str) -> bool257 fn is_outer(text: &str) -> bool {
258     if text.starts_with("////") || text.starts_with("/***") {
259         return false;
260     }
261     text.starts_with("///") || text.starts_with("/**")
262 }
263 
is_inner(text: &str) -> bool264 fn is_inner(text: &str) -> bool {
265     text.starts_with("//!") || text.starts_with("/*!")
266 }
267