1 //! Shortcuts that span lexer/parser abstraction.
2 //!
3 //! The way Rust works, parser doesn't necessary parse text, and you might
4 //! tokenize text without parsing it further. So, it makes sense to keep
5 //! abstract token parsing, and string tokenization as completely separate
6 //! layers.
7 //!
8 //! However, often you do pares text into syntax trees and the glue code for
9 //! that needs to live somewhere. Rather than putting it to lexer or parser, we
10 //! use a separate shortcuts module for that.
11
12 use std::mem;
13
14 use crate::{
15 LexedStr, Step,
16 SyntaxKind::{self, *},
17 };
18
19 #[derive(Debug)]
20 pub enum StrStep<'a> {
21 Token { kind: SyntaxKind, text: &'a str },
22 Enter { kind: SyntaxKind },
23 Exit,
24 Error { msg: &'a str, pos: usize },
25 }
26
27 impl<'a> LexedStr<'a> {
to_input(&self) -> crate::Input28 pub fn to_input(&self) -> crate::Input {
29 let mut res = crate::Input::default();
30 let mut was_joint = false;
31 for i in 0..self.len() {
32 let kind = self.kind(i);
33 if kind.is_trivia() {
34 was_joint = false
35 } else {
36 if kind == SyntaxKind::IDENT {
37 let token_text = self.text(i);
38 let contextual_kw = SyntaxKind::from_contextual_keyword(token_text)
39 .unwrap_or(SyntaxKind::IDENT);
40 res.push_ident(contextual_kw);
41 } else {
42 if was_joint {
43 res.was_joint();
44 }
45 res.push(kind);
46 // Tag the token as joint if it is float with a fractional part
47 // we use this jointness to inform the parser about what token split
48 // event to emit when we encounter a float literal in a field access
49 if kind == SyntaxKind::FLOAT_NUMBER && !self.text(i).ends_with('.') {
50 res.was_joint();
51 }
52 }
53
54 was_joint = true;
55 }
56 }
57 res
58 }
59
60 /// NB: only valid to call with Output from Reparser/TopLevelEntry.
intersperse_trivia( &self, output: &crate::Output, sink: &mut dyn FnMut(StrStep<'_>), ) -> bool61 pub fn intersperse_trivia(
62 &self,
63 output: &crate::Output,
64 sink: &mut dyn FnMut(StrStep<'_>),
65 ) -> bool {
66 let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink };
67
68 for event in output.iter() {
69 match event {
70 Step::Token { kind, n_input_tokens: n_raw_tokens } => {
71 builder.token(kind, n_raw_tokens)
72 }
73 Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
74 builder.float_split(has_pseudo_dot)
75 }
76 Step::Enter { kind } => builder.enter(kind),
77 Step::Exit => builder.exit(),
78 Step::Error { msg } => {
79 let text_pos = builder.lexed.text_start(builder.pos);
80 (builder.sink)(StrStep::Error { msg, pos: text_pos });
81 }
82 }
83 }
84
85 match mem::replace(&mut builder.state, State::Normal) {
86 State::PendingExit => {
87 builder.eat_trivias();
88 (builder.sink)(StrStep::Exit);
89 }
90 State::PendingEnter | State::Normal => unreachable!(),
91 }
92
93 // is_eof?
94 builder.pos == builder.lexed.len()
95 }
96 }
97
98 struct Builder<'a, 'b> {
99 lexed: &'a LexedStr<'a>,
100 pos: usize,
101 state: State,
102 sink: &'b mut dyn FnMut(StrStep<'_>),
103 }
104
105 enum State {
106 PendingEnter,
107 Normal,
108 PendingExit,
109 }
110
111 impl Builder<'_, '_> {
token(&mut self, kind: SyntaxKind, n_tokens: u8)112 fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
113 match mem::replace(&mut self.state, State::Normal) {
114 State::PendingEnter => unreachable!(),
115 State::PendingExit => (self.sink)(StrStep::Exit),
116 State::Normal => (),
117 }
118 self.eat_trivias();
119 self.do_token(kind, n_tokens as usize);
120 }
121
float_split(&mut self, has_pseudo_dot: bool)122 fn float_split(&mut self, has_pseudo_dot: bool) {
123 match mem::replace(&mut self.state, State::Normal) {
124 State::PendingEnter => unreachable!(),
125 State::PendingExit => (self.sink)(StrStep::Exit),
126 State::Normal => (),
127 }
128 self.eat_trivias();
129 self.do_float_split(has_pseudo_dot);
130 }
131
enter(&mut self, kind: SyntaxKind)132 fn enter(&mut self, kind: SyntaxKind) {
133 match mem::replace(&mut self.state, State::Normal) {
134 State::PendingEnter => {
135 (self.sink)(StrStep::Enter { kind });
136 // No need to attach trivias to previous node: there is no
137 // previous node.
138 return;
139 }
140 State::PendingExit => (self.sink)(StrStep::Exit),
141 State::Normal => (),
142 }
143
144 let n_trivias =
145 (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
146 let leading_trivias = self.pos..self.pos + n_trivias;
147 let n_attached_trivias = n_attached_trivias(
148 kind,
149 leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
150 );
151 self.eat_n_trivias(n_trivias - n_attached_trivias);
152 (self.sink)(StrStep::Enter { kind });
153 self.eat_n_trivias(n_attached_trivias);
154 }
155
exit(&mut self)156 fn exit(&mut self) {
157 match mem::replace(&mut self.state, State::PendingExit) {
158 State::PendingEnter => unreachable!(),
159 State::PendingExit => (self.sink)(StrStep::Exit),
160 State::Normal => (),
161 }
162 }
163
eat_trivias(&mut self)164 fn eat_trivias(&mut self) {
165 while self.pos < self.lexed.len() {
166 let kind = self.lexed.kind(self.pos);
167 if !kind.is_trivia() {
168 break;
169 }
170 self.do_token(kind, 1);
171 }
172 }
173
eat_n_trivias(&mut self, n: usize)174 fn eat_n_trivias(&mut self, n: usize) {
175 for _ in 0..n {
176 let kind = self.lexed.kind(self.pos);
177 assert!(kind.is_trivia());
178 self.do_token(kind, 1);
179 }
180 }
181
do_token(&mut self, kind: SyntaxKind, n_tokens: usize)182 fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
183 let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
184 self.pos += n_tokens;
185 (self.sink)(StrStep::Token { kind, text });
186 }
187
do_float_split(&mut self, has_pseudo_dot: bool)188 fn do_float_split(&mut self, has_pseudo_dot: bool) {
189 let text = &self.lexed.range_text(self.pos..self.pos + 1);
190 self.pos += 1;
191 match text.split_once('.') {
192 Some((left, right)) => {
193 assert!(!left.is_empty());
194 (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
195 (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
196 (self.sink)(StrStep::Exit);
197
198 // here we move the exit up, the original exit has been deleted in process
199 (self.sink)(StrStep::Exit);
200
201 (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
202
203 if has_pseudo_dot {
204 assert!(right.is_empty(), "{left}.{right}");
205 self.state = State::Normal;
206 } else {
207 (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
208 (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
209 (self.sink)(StrStep::Exit);
210
211 // the parser creates an unbalanced start node, we are required to close it here
212 self.state = State::PendingExit;
213 }
214 }
215 None => unreachable!(),
216 }
217 }
218 }
219
n_attached_trivias<'a>( kind: SyntaxKind, trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, ) -> usize220 fn n_attached_trivias<'a>(
221 kind: SyntaxKind,
222 trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
223 ) -> usize {
224 match kind {
225 CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD
226 | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT => {
227 let mut res = 0;
228 let mut trivias = trivias.enumerate().peekable();
229
230 while let Some((i, (kind, text))) = trivias.next() {
231 match kind {
232 WHITESPACE if text.contains("\n\n") => {
233 // we check whether the next token is a doc-comment
234 // and skip the whitespace in this case
235 if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) {
236 if is_outer(peek_text) {
237 continue;
238 }
239 }
240 break;
241 }
242 COMMENT => {
243 if is_inner(text) {
244 break;
245 }
246 res = i + 1;
247 }
248 _ => (),
249 }
250 }
251 res
252 }
253 _ => 0,
254 }
255 }
256
is_outer(text: &str) -> bool257 fn is_outer(text: &str) -> bool {
258 if text.starts_with("////") || text.starts_with("/***") {
259 return false;
260 }
261 text.starts_with("///") || text.starts_with("/**")
262 }
263
is_inner(text: &str) -> bool264 fn is_inner(text: &str) -> bool {
265 text.starts_with("//!") || text.starts_with("/*!")
266 }
267