1 //! An NFA-based parser, which is porting from rustc mbe parsing code
2 //!
3 //! See <https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs>
4 //! Here is a quick intro to how the parser works, copied from rustc:
5 //!
6 //! A 'position' is a dot in the middle of a matcher, usually represented as a
7 //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
8 //!
9 //! The parser walks through the input a character at a time, maintaining a list
10 //! of threads consistent with the current position in the input string: `cur_items`.
11 //!
12 //! As it processes them, it fills up `eof_items` with threads that would be valid if
13 //! the macro invocation is now over, `bb_items` with threads that are waiting on
14 //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting
15 //! on a particular token. Most of the logic concerns moving the · through the
16 //! repetitions indicated by Kleene stars. The rules for moving the · without
17 //! consuming any input are called epsilon transitions. It only advances or calls
18 //! out to the real Rust parser when no `cur_items` threads remain.
19 //!
20 //! Example:
21 //!
22 //! ```text, ignore
23 //! Start parsing a a a a b against [· a $( a )* a b].
24 //!
25 //! Remaining input: a a a a b
26 //! next: [· a $( a )* a b]
27 //!
28 //! - - - Advance over an a. - - -
29 //!
30 //! Remaining input: a a a b
31 //! cur: [a · $( a )* a b]
32 //! Descend/Skip (first item).
33 //! next: [a $( · a )* a b] [a $( a )* · a b].
34 //!
35 //! - - - Advance over an a. - - -
36 //!
37 //! Remaining input: a a b
38 //! cur: [a $( a · )* a b] [a $( a )* a · b]
39 //! Follow epsilon transition: Finish/Repeat (first item)
40 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
41 //!
42 //! - - - Advance over an a. - - - (this looks exactly like the last step)
43 //!
44 //! Remaining input: a b
45 //! cur: [a $( a · )* a b] [a $( a )* a · b]
46 //! Follow epsilon transition: Finish/Repeat (first item)
47 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
48 //!
49 //! - - - Advance over an a. - - - (this looks exactly like the last step)
50 //!
51 //! Remaining input: b
52 //! cur: [a $( a · )* a b] [a $( a )* a · b]
53 //! Follow epsilon transition: Finish/Repeat (first item)
54 //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
55 //!
56 //! - - - Advance over a b. - - -
57 //!
58 //! Remaining input: ''
59 //! eof: [a $( a )* a b ·]
60 //! ```
61
62 use std::rc::Rc;
63
64 use smallvec::{smallvec, SmallVec};
65 use syntax::SmolStr;
66
67 use crate::{
68 expander::{Binding, Bindings, ExpandResult, Fragment},
69 parser::{MetaVarKind, Op, RepeatKind, Separator},
70 tt,
71 tt_iter::TtIter,
72 ExpandError, MetaTemplate, ValueResult,
73 };
74
75 impl Bindings {
push_optional(&mut self, name: &SmolStr)76 fn push_optional(&mut self, name: &SmolStr) {
77 // FIXME: Do we have a better way to represent an empty token ?
78 // Insert an empty subtree for empty token
79 let tt =
80 tt::Subtree { delimiter: tt::Delimiter::unspecified(), token_trees: vec![] }.into();
81 self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt)));
82 }
83
push_empty(&mut self, name: &SmolStr)84 fn push_empty(&mut self, name: &SmolStr) {
85 self.inner.insert(name.clone(), Binding::Empty);
86 }
87
bindings(&self) -> impl Iterator<Item = &Binding>88 fn bindings(&self) -> impl Iterator<Item = &Binding> {
89 self.inner.values()
90 }
91 }
92
93 #[derive(Clone, Debug, Default, PartialEq, Eq)]
94 pub(super) struct Match {
95 pub(super) bindings: Bindings,
96 /// We currently just keep the first error and count the rest to compare matches.
97 pub(super) err: Option<ExpandError>,
98 pub(super) err_count: usize,
99 /// How many top-level token trees were left to match.
100 pub(super) unmatched_tts: usize,
101 /// The number of bound variables
102 pub(super) bound_count: usize,
103 }
104
105 impl Match {
add_err(&mut self, err: ExpandError)106 fn add_err(&mut self, err: ExpandError) {
107 let prev_err = self.err.take();
108 self.err = prev_err.or(Some(err));
109 self.err_count += 1;
110 }
111 }
112
113 /// Matching errors are added to the `Match`.
match_(pattern: &MetaTemplate, input: &tt::Subtree, is_2021: bool) -> Match114 pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree, is_2021: bool) -> Match {
115 let mut res = match_loop(pattern, input, is_2021);
116 res.bound_count = count(res.bindings.bindings());
117 return res;
118
119 fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize {
120 bindings
121 .map(|it| match it {
122 Binding::Fragment(_) => 1,
123 Binding::Empty => 1,
124 Binding::Missing(_) => 1,
125 Binding::Nested(it) => count(it.iter()),
126 })
127 .sum()
128 }
129 }
130
131 #[derive(Debug, Clone)]
132 enum BindingKind {
133 Empty(SmolStr),
134 Optional(SmolStr),
135 Fragment(SmolStr, Fragment),
136 Missing(SmolStr, MetaVarKind),
137 Nested(usize, usize),
138 }
139
140 #[derive(Debug, Clone)]
141 struct BindingsIdx(usize, usize);
142
143 #[derive(Debug, Clone)]
144 enum LinkNode<T> {
145 Node(T),
146 Parent { idx: usize, len: usize },
147 }
148
149 #[derive(Default)]
150 struct BindingsBuilder {
151 nodes: Vec<Vec<LinkNode<Rc<BindingKind>>>>,
152 nested: Vec<Vec<LinkNode<usize>>>,
153 }
154
155 impl BindingsBuilder {
alloc(&mut self) -> BindingsIdx156 fn alloc(&mut self) -> BindingsIdx {
157 let idx = self.nodes.len();
158 self.nodes.push(Vec::new());
159 let nidx = self.nested.len();
160 self.nested.push(Vec::new());
161 BindingsIdx(idx, nidx)
162 }
163
copy(&mut self, bindings: &BindingsIdx) -> BindingsIdx164 fn copy(&mut self, bindings: &BindingsIdx) -> BindingsIdx {
165 let idx = copy_parent(bindings.0, &mut self.nodes);
166 let nidx = copy_parent(bindings.1, &mut self.nested);
167 return BindingsIdx(idx, nidx);
168
169 fn copy_parent<T>(idx: usize, target: &mut Vec<Vec<LinkNode<T>>>) -> usize
170 where
171 T: Clone,
172 {
173 let new_idx = target.len();
174 let len = target[idx].len();
175 if len < 4 {
176 target.push(target[idx].clone())
177 } else {
178 target.push(vec![LinkNode::Parent { idx, len }]);
179 }
180 new_idx
181 }
182 }
183
push_empty(&mut self, idx: &mut BindingsIdx, var: &SmolStr)184 fn push_empty(&mut self, idx: &mut BindingsIdx, var: &SmolStr) {
185 self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Empty(var.clone()))));
186 }
187
push_optional(&mut self, idx: &mut BindingsIdx, var: &SmolStr)188 fn push_optional(&mut self, idx: &mut BindingsIdx, var: &SmolStr) {
189 self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Optional(var.clone()))));
190 }
191
push_fragment(&mut self, idx: &mut BindingsIdx, var: &SmolStr, fragment: Fragment)192 fn push_fragment(&mut self, idx: &mut BindingsIdx, var: &SmolStr, fragment: Fragment) {
193 self.nodes[idx.0]
194 .push(LinkNode::Node(Rc::new(BindingKind::Fragment(var.clone(), fragment))));
195 }
196
push_missing(&mut self, idx: &mut BindingsIdx, var: &SmolStr, kind: MetaVarKind)197 fn push_missing(&mut self, idx: &mut BindingsIdx, var: &SmolStr, kind: MetaVarKind) {
198 self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Missing(var.clone(), kind))));
199 }
200
push_nested(&mut self, parent: &mut BindingsIdx, child: &BindingsIdx)201 fn push_nested(&mut self, parent: &mut BindingsIdx, child: &BindingsIdx) {
202 let BindingsIdx(idx, nidx) = self.copy(child);
203 self.nodes[parent.0].push(LinkNode::Node(Rc::new(BindingKind::Nested(idx, nidx))));
204 }
205
push_default(&mut self, idx: &mut BindingsIdx)206 fn push_default(&mut self, idx: &mut BindingsIdx) {
207 self.nested[idx.1].push(LinkNode::Node(idx.0));
208 let new_idx = self.nodes.len();
209 self.nodes.push(Vec::new());
210 idx.0 = new_idx;
211 }
212
build(self, idx: &BindingsIdx) -> Bindings213 fn build(self, idx: &BindingsIdx) -> Bindings {
214 self.build_inner(&self.nodes[idx.0])
215 }
216
build_inner(&self, link_nodes: &[LinkNode<Rc<BindingKind>>]) -> Bindings217 fn build_inner(&self, link_nodes: &[LinkNode<Rc<BindingKind>>]) -> Bindings {
218 let mut bindings = Bindings::default();
219 let mut nodes = Vec::new();
220 self.collect_nodes(link_nodes, &mut nodes);
221
222 for cmd in nodes {
223 match cmd {
224 BindingKind::Empty(name) => {
225 bindings.push_empty(name);
226 }
227 BindingKind::Optional(name) => {
228 bindings.push_optional(name);
229 }
230 BindingKind::Fragment(name, fragment) => {
231 bindings.inner.insert(name.clone(), Binding::Fragment(fragment.clone()));
232 }
233 BindingKind::Missing(name, kind) => {
234 bindings.inner.insert(name.clone(), Binding::Missing(*kind));
235 }
236 BindingKind::Nested(idx, nested_idx) => {
237 let mut nested_nodes = Vec::new();
238 self.collect_nested(*idx, *nested_idx, &mut nested_nodes);
239
240 for (idx, iter) in nested_nodes.into_iter().enumerate() {
241 for (key, value) in &iter.inner {
242 let bindings = bindings
243 .inner
244 .entry(key.clone())
245 .or_insert_with(|| Binding::Nested(Vec::new()));
246
247 if let Binding::Nested(it) = bindings {
248 // insert empty nested bindings before this one
249 while it.len() < idx {
250 it.push(Binding::Nested(Vec::new()));
251 }
252 it.push(value.clone());
253 }
254 }
255 }
256 }
257 }
258 }
259
260 bindings
261 }
262
collect_nested_ref<'a>( &'a self, id: usize, len: usize, nested_refs: &mut Vec<&'a [LinkNode<Rc<BindingKind>>]>, )263 fn collect_nested_ref<'a>(
264 &'a self,
265 id: usize,
266 len: usize,
267 nested_refs: &mut Vec<&'a [LinkNode<Rc<BindingKind>>]>,
268 ) {
269 self.nested[id].iter().take(len).for_each(|it| match it {
270 LinkNode::Node(id) => nested_refs.push(&self.nodes[*id]),
271 LinkNode::Parent { idx, len } => self.collect_nested_ref(*idx, *len, nested_refs),
272 });
273 }
274
collect_nested(&self, idx: usize, nested_idx: usize, nested: &mut Vec<Bindings>)275 fn collect_nested(&self, idx: usize, nested_idx: usize, nested: &mut Vec<Bindings>) {
276 let last = &self.nodes[idx];
277 let mut nested_refs: Vec<&[_]> = Vec::new();
278 self.nested[nested_idx].iter().for_each(|it| match *it {
279 LinkNode::Node(idx) => nested_refs.push(&self.nodes[idx]),
280 LinkNode::Parent { idx, len } => self.collect_nested_ref(idx, len, &mut nested_refs),
281 });
282 nested_refs.push(last);
283 nested.extend(nested_refs.into_iter().map(|iter| self.build_inner(iter)));
284 }
285
collect_nodes_ref<'a>(&'a self, id: usize, len: usize, nodes: &mut Vec<&'a BindingKind>)286 fn collect_nodes_ref<'a>(&'a self, id: usize, len: usize, nodes: &mut Vec<&'a BindingKind>) {
287 self.nodes[id].iter().take(len).for_each(|it| match it {
288 LinkNode::Node(it) => nodes.push(it),
289 LinkNode::Parent { idx, len } => self.collect_nodes_ref(*idx, *len, nodes),
290 });
291 }
292
collect_nodes<'a>( &'a self, link_nodes: &'a [LinkNode<Rc<BindingKind>>], nodes: &mut Vec<&'a BindingKind>, )293 fn collect_nodes<'a>(
294 &'a self,
295 link_nodes: &'a [LinkNode<Rc<BindingKind>>],
296 nodes: &mut Vec<&'a BindingKind>,
297 ) {
298 link_nodes.iter().for_each(|it| match it {
299 LinkNode::Node(it) => nodes.push(it),
300 LinkNode::Parent { idx, len } => self.collect_nodes_ref(*idx, *len, nodes),
301 });
302 }
303 }
304
305 #[derive(Debug, Clone)]
306 struct MatchState<'t> {
307 /// The position of the "dot" in this matcher
308 dot: OpDelimitedIter<'t>,
309
310 /// Token subtree stack
311 /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. )
312 /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
313 /// that where the bottom of the stack is the outermost matcher.
314 stack: SmallVec<[OpDelimitedIter<'t>; 4]>,
315
316 /// The "parent" matcher position if we are in a repetition. That is, the matcher position just
317 /// before we enter the repetition.
318 up: Option<Box<MatchState<'t>>>,
319
320 /// The separator if we are in a repetition.
321 sep: Option<Separator>,
322
323 /// The KleeneOp of this sequence if we are in a repetition.
324 sep_kind: Option<RepeatKind>,
325
326 /// Whether we already matched separator token.
327 sep_matched: bool,
328
329 /// Matched meta variables bindings
330 bindings: BindingsIdx,
331
332 /// Cached result of meta variable parsing
333 meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>,
334
335 /// Is error occurred in this state, will `poised` to "parent"
336 is_error: bool,
337 }
338
339 /// Process the matcher positions of `cur_items` until it is empty. In the process, this will
340 /// produce more items in `next_items`, `eof_items`, and `bb_items`.
341 ///
342 /// For more info about the how this happens, see the module-level doc comments and the inline
343 /// comments of this function.
344 ///
345 /// # Parameters
346 ///
347 /// - `src`: the current token of the parser.
348 /// - `stack`: the "parent" frames of the token tree
349 /// - `res`: the match result to store errors
350 /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a
351 /// successful execution of this function.
352 /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in
353 /// the function `parse`.
354 /// - `eof_items`: the set of items that would be valid if this was the EOF.
355 /// - `bb_items`: the set of items that are waiting for the black-box parser.
356 /// - `error_items`: the set of items in errors, used for error-resilient parsing
357 #[inline]
match_loop_inner<'t>( src: TtIter<'t>, stack: &[TtIter<'t>], res: &mut Match, bindings_builder: &mut BindingsBuilder, cur_items: &mut SmallVec<[MatchState<'t>; 1]>, bb_items: &mut SmallVec<[MatchState<'t>; 1]>, next_items: &mut Vec<MatchState<'t>>, eof_items: &mut SmallVec<[MatchState<'t>; 1]>, error_items: &mut SmallVec<[MatchState<'t>; 1]>, is_2021: bool, )358 fn match_loop_inner<'t>(
359 src: TtIter<'t>,
360 stack: &[TtIter<'t>],
361 res: &mut Match,
362 bindings_builder: &mut BindingsBuilder,
363 cur_items: &mut SmallVec<[MatchState<'t>; 1]>,
364 bb_items: &mut SmallVec<[MatchState<'t>; 1]>,
365 next_items: &mut Vec<MatchState<'t>>,
366 eof_items: &mut SmallVec<[MatchState<'t>; 1]>,
367 error_items: &mut SmallVec<[MatchState<'t>; 1]>,
368 is_2021: bool,
369 ) {
370 macro_rules! try_push {
371 ($items: expr, $it:expr) => {
372 if $it.is_error {
373 error_items.push($it);
374 } else {
375 $items.push($it);
376 }
377 };
378 }
379
380 while let Some(mut item) = cur_items.pop() {
381 while item.dot.is_eof() {
382 match item.stack.pop() {
383 Some(frame) => {
384 item.dot = frame;
385 item.dot.next();
386 }
387 None => break,
388 }
389 }
390 let op = match item.dot.peek() {
391 None => {
392 // We are at or past the end of the matcher of `item`.
393 if let Some(up) = &item.up {
394 if !item.sep_matched {
395 // Get the `up` matcher
396 let mut new_pos = (**up).clone();
397 new_pos.bindings = bindings_builder.copy(&new_pos.bindings);
398 // Add matches from this repetition to the `matches` of `up`
399 bindings_builder.push_nested(&mut new_pos.bindings, &item.bindings);
400
401 // Move the "dot" past the repetition in `up`
402 new_pos.dot.next();
403 new_pos.is_error = new_pos.is_error || item.is_error;
404 cur_items.push(new_pos);
405 }
406
407 // Check if we need a separator.
408 if item.sep.is_some() && !item.sep_matched {
409 let sep = item.sep.as_ref().unwrap();
410 let mut fork = src.clone();
411 if fork.expect_separator(sep) {
412 // HACK: here we use `meta_result` to pass `TtIter` back to caller because
413 // it might have been advanced multiple times. `ValueResult` is
414 // insignificant.
415 item.meta_result = Some((fork, ValueResult::ok(None)));
416 item.dot.next();
417 // item.sep_parsed = Some(sep_len);
418 item.sep_matched = true;
419 try_push!(next_items, item);
420 }
421 }
422 // We don't need a separator. Move the "dot" back to the beginning of the matcher
423 // and try to match again UNLESS we are only allowed to have _one_ repetition.
424 else if item.sep_kind != Some(RepeatKind::ZeroOrOne) {
425 item.dot = item.dot.reset();
426 item.sep_matched = false;
427 bindings_builder.push_default(&mut item.bindings);
428 cur_items.push(item);
429 }
430 } else {
431 // If we are not in a repetition, then being at the end of a matcher means that we have
432 // reached the potential end of the input.
433 try_push!(eof_items, item);
434 }
435 continue;
436 }
437 Some(it) => it,
438 };
439
440 // We are in the middle of a matcher.
441 match op {
442 OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => {
443 if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) {
444 let mut new_item = item.clone();
445 new_item.bindings = bindings_builder.copy(&new_item.bindings);
446 new_item.dot.next();
447 collect_vars(
448 &mut |s| {
449 bindings_builder.push_empty(&mut new_item.bindings, &s);
450 },
451 tokens,
452 );
453 cur_items.push(new_item);
454 }
455 cur_items.push(MatchState {
456 dot: tokens.iter_delimited(None),
457 stack: Default::default(),
458 up: Some(Box::new(item)),
459 sep: separator.clone(),
460 sep_kind: Some(*kind),
461 sep_matched: false,
462 bindings: bindings_builder.alloc(),
463 meta_result: None,
464 is_error: false,
465 })
466 }
467 OpDelimited::Op(Op::Subtree { tokens, delimiter }) => {
468 if let Ok(subtree) = src.clone().expect_subtree() {
469 if subtree.delimiter.kind == delimiter.kind {
470 item.stack.push(item.dot);
471 item.dot = tokens.iter_delimited(Some(delimiter));
472 cur_items.push(item);
473 }
474 }
475 }
476 OpDelimited::Op(Op::Var { kind, name, .. }) => {
477 if let &Some(kind) = kind {
478 let mut fork = src.clone();
479 let match_res = match_meta_var(kind, &mut fork, is_2021);
480 match match_res.err {
481 None => {
482 // Some meta variables are optional (e.g. vis)
483 if match_res.value.is_some() {
484 item.meta_result = Some((fork, match_res));
485 try_push!(bb_items, item);
486 } else {
487 bindings_builder.push_optional(&mut item.bindings, name);
488 item.dot.next();
489 cur_items.push(item);
490 }
491 }
492 Some(err) => {
493 res.add_err(err);
494 match match_res.value {
495 Some(fragment) => bindings_builder.push_fragment(
496 &mut item.bindings,
497 name,
498 fragment,
499 ),
500 None => {
501 bindings_builder.push_missing(&mut item.bindings, name, kind)
502 }
503 }
504 item.is_error = true;
505 error_items.push(item);
506 }
507 }
508 }
509 }
510 OpDelimited::Op(Op::Literal(lhs)) => {
511 if let Ok(rhs) = src.clone().expect_leaf() {
512 if matches!(rhs, tt::Leaf::Literal(it) if it.text == lhs.text) {
513 item.dot.next();
514 } else {
515 res.add_err(ExpandError::UnexpectedToken);
516 item.is_error = true;
517 }
518 } else {
519 res.add_err(ExpandError::binding_error(format!("expected literal: `{lhs}`")));
520 item.is_error = true;
521 }
522 try_push!(next_items, item);
523 }
524 OpDelimited::Op(Op::Ident(lhs)) => {
525 if let Ok(rhs) = src.clone().expect_leaf() {
526 if matches!(rhs, tt::Leaf::Ident(it) if it.text == lhs.text) {
527 item.dot.next();
528 } else {
529 res.add_err(ExpandError::UnexpectedToken);
530 item.is_error = true;
531 }
532 } else {
533 res.add_err(ExpandError::binding_error(format!("expected ident: `{lhs}`")));
534 item.is_error = true;
535 }
536 try_push!(next_items, item);
537 }
538 OpDelimited::Op(Op::Punct(lhs)) => {
539 let mut fork = src.clone();
540 let error = if let Ok(rhs) = fork.expect_glued_punct() {
541 let first_is_single_quote = rhs[0].char == '\'';
542 let lhs = lhs.iter().map(|it| it.char);
543 let rhs = rhs.iter().map(|it| it.char);
544 if lhs.clone().eq(rhs) {
545 // HACK: here we use `meta_result` to pass `TtIter` back to caller because
546 // it might have been advanced multiple times. `ValueResult` is
547 // insignificant.
548 item.meta_result = Some((fork, ValueResult::ok(None)));
549 item.dot.next();
550 next_items.push(item);
551 continue;
552 }
553
554 if first_is_single_quote {
555 // If the first punct token is a single quote, that's a part of a lifetime
556 // ident, not a punct.
557 ExpandError::UnexpectedToken
558 } else {
559 let lhs: SmolStr = lhs.collect();
560 ExpandError::binding_error(format!("expected punct: `{lhs}`"))
561 }
562 } else {
563 ExpandError::UnexpectedToken
564 };
565
566 res.add_err(error);
567 item.is_error = true;
568 error_items.push(item);
569 }
570 OpDelimited::Op(Op::Ignore { .. } | Op::Index { .. } | Op::Count { .. }) => {
571 stdx::never!("metavariable expression in lhs found");
572 }
573 OpDelimited::Open => {
574 if matches!(src.peek_n(0), Some(tt::TokenTree::Subtree(..))) {
575 item.dot.next();
576 try_push!(next_items, item);
577 }
578 }
579 OpDelimited::Close => {
580 let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty();
581 if is_delim_closed {
582 item.dot.next();
583 try_push!(next_items, item);
584 }
585 }
586 }
587 }
588 }
589
match_loop(pattern: &MetaTemplate, src: &tt::Subtree, is_2021: bool) -> Match590 fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree, is_2021: bool) -> Match {
591 let mut src = TtIter::new(src);
592 let mut stack: SmallVec<[TtIter<'_>; 1]> = SmallVec::new();
593 let mut res = Match::default();
594 let mut error_recover_item = None;
595
596 let mut bindings_builder = BindingsBuilder::default();
597
598 let mut cur_items = smallvec![MatchState {
599 dot: pattern.iter_delimited(None),
600 stack: Default::default(),
601 up: None,
602 sep: None,
603 sep_kind: None,
604 sep_matched: false,
605 bindings: bindings_builder.alloc(),
606 is_error: false,
607 meta_result: None,
608 }];
609
610 let mut next_items = vec![];
611
612 loop {
613 let mut bb_items = SmallVec::new();
614 let mut eof_items = SmallVec::new();
615 let mut error_items = SmallVec::new();
616
617 stdx::always!(next_items.is_empty());
618
619 match_loop_inner(
620 src.clone(),
621 &stack,
622 &mut res,
623 &mut bindings_builder,
624 &mut cur_items,
625 &mut bb_items,
626 &mut next_items,
627 &mut eof_items,
628 &mut error_items,
629 is_2021,
630 );
631 stdx::always!(cur_items.is_empty());
632
633 if !error_items.is_empty() {
634 error_recover_item = error_items.pop().map(|it| it.bindings);
635 } else if let [state, ..] = &*eof_items {
636 error_recover_item = Some(state.bindings.clone());
637 }
638
639 // We need to do some post processing after the `match_loop_inner`.
640 // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise,
641 // either the parse is ambiguous (which should never happen) or there is a syntax error.
642 if src.peek_n(0).is_none() && stack.is_empty() {
643 if let [state] = &*eof_items {
644 // remove all errors, because it is the correct answer !
645 res = Match::default();
646 res.bindings = bindings_builder.build(&state.bindings);
647 } else {
648 // Error recovery
649 if let Some(item) = error_recover_item {
650 res.bindings = bindings_builder.build(&item);
651 }
652 res.add_err(ExpandError::UnexpectedToken);
653 }
654 return res;
655 }
656
657 // If there are no possible next positions AND we aren't waiting for the black-box parser,
658 // then there is a syntax error.
659 //
660 // Another possibility is that we need to call out to parse some rust nonterminal
661 // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
662 let has_leftover_tokens = (bb_items.is_empty() && next_items.is_empty())
663 || !(bb_items.is_empty() || next_items.is_empty())
664 || bb_items.len() > 1;
665 if has_leftover_tokens {
666 res.unmatched_tts += src.len();
667 while let Some(it) = stack.pop() {
668 src = it;
669 res.unmatched_tts += src.len();
670 }
671 res.add_err(ExpandError::LeftoverTokens);
672
673 if let Some(error_recover_item) = error_recover_item {
674 res.bindings = bindings_builder.build(&error_recover_item);
675 }
676 return res;
677 }
678 // Dump all possible `next_items` into `cur_items` for the next iteration.
679 else if !next_items.is_empty() {
680 if let Some((iter, _)) = next_items[0].meta_result.take() {
681 // We've matched a possibly "glued" punct. The matched punct (hence
682 // `meta_result` also) must be the same for all items.
683 // FIXME: If there are multiple items, it's definitely redundant (and it's hacky!
684 // `meta_result` isn't supposed to be used this way).
685
686 // We already bumped, so no need to call `.next()` like in the other branch.
687 src = iter;
688 for item in next_items.iter_mut() {
689 item.meta_result = None;
690 }
691 } else {
692 match src.next() {
693 Some(tt::TokenTree::Subtree(subtree)) => {
694 stack.push(src.clone());
695 src = TtIter::new(subtree);
696 }
697 None => {
698 if let Some(iter) = stack.pop() {
699 src = iter;
700 }
701 }
702 _ => (),
703 }
704 }
705 // Now process the next token
706 cur_items.extend(next_items.drain(..));
707 }
708 // Finally, we have the case where we need to call the black-box parser to get some
709 // nonterminal.
710 else {
711 stdx::always!(bb_items.len() == 1);
712 let mut item = bb_items.pop().unwrap();
713
714 if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() {
715 let (iter, match_res) = item.meta_result.take().unwrap();
716 match match_res.value {
717 Some(fragment) => {
718 bindings_builder.push_fragment(&mut item.bindings, name, fragment);
719 }
720 None if match_res.err.is_none() => {
721 bindings_builder.push_optional(&mut item.bindings, name);
722 }
723 None => {}
724 }
725 if let Some(err) = match_res.err {
726 res.add_err(err);
727 }
728 src = iter.clone();
729 item.dot.next();
730 } else {
731 unreachable!()
732 }
733 cur_items.push(item);
734 }
735 stdx::always!(!cur_items.is_empty());
736 }
737 }
738
match_meta_var( kind: MetaVarKind, input: &mut TtIter<'_>, is_2021: bool, ) -> ExpandResult<Option<Fragment>>739 fn match_meta_var(
740 kind: MetaVarKind,
741 input: &mut TtIter<'_>,
742 is_2021: bool,
743 ) -> ExpandResult<Option<Fragment>> {
744 let fragment = match kind {
745 MetaVarKind::Path => parser::PrefixEntryPoint::Path,
746 MetaVarKind::Ty => parser::PrefixEntryPoint::Ty,
747 MetaVarKind::Pat if is_2021 => parser::PrefixEntryPoint::PatTop,
748 MetaVarKind::Pat => parser::PrefixEntryPoint::Pat,
749 MetaVarKind::PatParam => parser::PrefixEntryPoint::Pat,
750 MetaVarKind::Stmt => parser::PrefixEntryPoint::Stmt,
751 MetaVarKind::Block => parser::PrefixEntryPoint::Block,
752 MetaVarKind::Meta => parser::PrefixEntryPoint::MetaItem,
753 MetaVarKind::Item => parser::PrefixEntryPoint::Item,
754 MetaVarKind::Vis => parser::PrefixEntryPoint::Vis,
755 MetaVarKind::Expr => {
756 // `expr` should not match underscores, let expressions, or inline const. The latter
757 // two are for [backwards compatibility][0].
758 // HACK: Macro expansion should not be done using "rollback and try another alternative".
759 // rustc [explicitly checks the next token][1].
760 // [0]: https://github.com/rust-lang/rust/issues/86730
761 // [1]: https://github.com/rust-lang/rust/blob/f0c4da499/compiler/rustc_expand/src/mbe/macro_parser.rs#L576
762 match input.peek_n(0) {
763 Some(tt::TokenTree::Leaf(tt::Leaf::Ident(it)))
764 if it.text == "_" || it.text == "let" || it.text == "const" =>
765 {
766 return ExpandResult::only_err(ExpandError::NoMatchingRule)
767 }
768 _ => {}
769 };
770 return input
771 .expect_fragment(parser::PrefixEntryPoint::Expr)
772 .map(|tt| tt.map(Fragment::Expr));
773 }
774 _ => {
775 let tt_result = match kind {
776 MetaVarKind::Ident => input
777 .expect_ident()
778 .map(|ident| tt::Leaf::from(ident.clone()).into())
779 .map_err(|()| ExpandError::binding_error("expected ident")),
780 MetaVarKind::Tt => input
781 .expect_tt()
782 .map_err(|()| ExpandError::binding_error("expected token tree")),
783 MetaVarKind::Lifetime => input
784 .expect_lifetime()
785 .map_err(|()| ExpandError::binding_error("expected lifetime")),
786 MetaVarKind::Literal => {
787 let neg = input.eat_char('-');
788 input
789 .expect_literal()
790 .map(|literal| {
791 let lit = literal.clone();
792 match neg {
793 None => lit.into(),
794 Some(neg) => tt::TokenTree::Subtree(tt::Subtree {
795 delimiter: tt::Delimiter::unspecified(),
796 token_trees: vec![neg, lit.into()],
797 }),
798 }
799 })
800 .map_err(|()| ExpandError::binding_error("expected literal"))
801 }
802 _ => Err(ExpandError::UnexpectedToken),
803 };
804 return tt_result.map(|it| Some(Fragment::Tokens(it))).into();
805 }
806 };
807 input.expect_fragment(fragment).map(|it| it.map(Fragment::Tokens))
808 }
809
collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate)810 fn collect_vars(collector_fun: &mut impl FnMut(SmolStr), pattern: &MetaTemplate) {
811 for op in pattern.iter() {
812 match op {
813 Op::Var { name, .. } => collector_fun(name.clone()),
814 Op::Subtree { tokens, .. } => collect_vars(collector_fun, tokens),
815 Op::Repeat { tokens, .. } => collect_vars(collector_fun, tokens),
816 Op::Literal(_) | Op::Ident(_) | Op::Punct(_) => {}
817 Op::Ignore { .. } | Op::Index { .. } | Op::Count { .. } => {
818 stdx::never!("metavariable expression in lhs found");
819 }
820 }
821 }
822 }
823 impl MetaTemplate {
iter_delimited<'a>(&'a self, delimited: Option<&'a tt::Delimiter>) -> OpDelimitedIter<'a>824 fn iter_delimited<'a>(&'a self, delimited: Option<&'a tt::Delimiter>) -> OpDelimitedIter<'a> {
825 OpDelimitedIter {
826 inner: &self.0,
827 idx: 0,
828 delimited: delimited.unwrap_or(&tt::Delimiter::UNSPECIFIED),
829 }
830 }
831 }
832
833 #[derive(Debug, Clone, Copy)]
834 enum OpDelimited<'a> {
835 Op(&'a Op),
836 Open,
837 Close,
838 }
839
840 #[derive(Debug, Clone, Copy)]
841 struct OpDelimitedIter<'a> {
842 inner: &'a [Op],
843 delimited: &'a tt::Delimiter,
844 idx: usize,
845 }
846
847 impl<'a> OpDelimitedIter<'a> {
is_eof(&self) -> bool848 fn is_eof(&self) -> bool {
849 let len = self.inner.len()
850 + if self.delimited.kind != tt::DelimiterKind::Invisible { 2 } else { 0 };
851 self.idx >= len
852 }
853
peek(&self) -> Option<OpDelimited<'a>>854 fn peek(&self) -> Option<OpDelimited<'a>> {
855 match self.delimited.kind {
856 tt::DelimiterKind::Invisible => self.inner.get(self.idx).map(OpDelimited::Op),
857 _ => match self.idx {
858 0 => Some(OpDelimited::Open),
859 i if i == self.inner.len() + 1 => Some(OpDelimited::Close),
860 i => self.inner.get(i - 1).map(OpDelimited::Op),
861 },
862 }
863 }
864
reset(&self) -> Self865 fn reset(&self) -> Self {
866 Self { inner: self.inner, idx: 0, delimited: self.delimited }
867 }
868 }
869
870 impl<'a> Iterator for OpDelimitedIter<'a> {
871 type Item = OpDelimited<'a>;
872
next(&mut self) -> Option<Self::Item>873 fn next(&mut self) -> Option<Self::Item> {
874 let res = self.peek();
875 self.idx += 1;
876 res
877 }
878
size_hint(&self) -> (usize, Option<usize>)879 fn size_hint(&self) -> (usize, Option<usize>) {
880 let len = self.inner.len()
881 + if self.delimited.kind != tt::DelimiterKind::Invisible { 2 } else { 0 };
882 let remain = len.saturating_sub(self.idx);
883 (remain, Some(remain))
884 }
885 }
886
887 impl<'a> TtIter<'a> {
expect_separator(&mut self, separator: &Separator) -> bool888 fn expect_separator(&mut self, separator: &Separator) -> bool {
889 let mut fork = self.clone();
890 let ok = match separator {
891 Separator::Ident(lhs) => match fork.expect_ident_or_underscore() {
892 Ok(rhs) => rhs.text == lhs.text,
893 Err(_) => false,
894 },
895 Separator::Literal(lhs) => match fork.expect_literal() {
896 Ok(rhs) => match rhs {
897 tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
898 tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
899 tt::Leaf::Punct(_) => false,
900 },
901 Err(_) => false,
902 },
903 Separator::Puncts(lhs) => match fork.expect_glued_punct() {
904 Ok(rhs) => {
905 let lhs = lhs.iter().map(|it| it.char);
906 let rhs = rhs.iter().map(|it| it.char);
907 lhs.eq(rhs)
908 }
909 Err(_) => false,
910 },
911 };
912 if ok {
913 *self = fork;
914 }
915 ok
916 }
917
expect_tt(&mut self) -> Result<tt::TokenTree, ()>918 fn expect_tt(&mut self) -> Result<tt::TokenTree, ()> {
919 if let Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) = self.peek_n(0) {
920 if punct.char == '\'' {
921 self.expect_lifetime()
922 } else {
923 let puncts = self.expect_glued_punct()?;
924 let token_trees = puncts.into_iter().map(|p| tt::Leaf::Punct(p).into()).collect();
925 Ok(tt::TokenTree::Subtree(tt::Subtree {
926 delimiter: tt::Delimiter::unspecified(),
927 token_trees,
928 }))
929 }
930 } else {
931 self.next().ok_or(()).cloned()
932 }
933 }
934
expect_lifetime(&mut self) -> Result<tt::TokenTree, ()>935 fn expect_lifetime(&mut self) -> Result<tt::TokenTree, ()> {
936 let punct = self.expect_single_punct()?;
937 if punct.char != '\'' {
938 return Err(());
939 }
940 let ident = self.expect_ident_or_underscore()?;
941
942 Ok(tt::Subtree {
943 delimiter: tt::Delimiter::unspecified(),
944 token_trees: vec![
945 tt::Leaf::Punct(*punct).into(),
946 tt::Leaf::Ident(ident.clone()).into(),
947 ],
948 }
949 .into())
950 }
951
eat_char(&mut self, c: char) -> Option<tt::TokenTree>952 fn eat_char(&mut self, c: char) -> Option<tt::TokenTree> {
953 let mut fork = self.clone();
954 match fork.expect_char(c) {
955 Ok(_) => {
956 let tt = self.next().cloned();
957 *self = fork;
958 tt
959 }
960 Err(_) => None,
961 }
962 }
963 }
964