• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Extensions to the parsing API with niche applicability.
2 
3 use super::*;
4 
5 /// Extensions to the `ParseStream` API to support speculative parsing.
6 pub trait Speculative {
7     /// Advance this parse stream to the position of a forked parse stream.
8     ///
9     /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
10     /// parse stream, perform some speculative parsing, then join the original
11     /// stream to the fork to "commit" the parsing from the fork to the main
12     /// stream.
13     ///
14     /// If you can avoid doing this, you should, as it limits the ability to
15     /// generate useful errors. That said, it is often the only way to parse
16     /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
17     /// is that when the fork fails to parse an `A`, it's impossible to tell
18     /// whether that was because of a syntax error and the user meant to provide
19     /// an `A`, or that the `A`s are finished and it's time to start parsing
20     /// `B`s. Use with care.
21     ///
22     /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
23     /// parsing `B*` and removing the leading members of `A` from the
24     /// repetition, bypassing the need to involve the downsides associated with
25     /// speculative parsing.
26     ///
27     /// [`ParseStream::fork`]: ParseBuffer::fork
28     ///
29     /// # Example
30     ///
31     /// There has been chatter about the possibility of making the colons in the
32     /// turbofish syntax like `path::to::<T>` no longer required by accepting
33     /// `path::to<T>` in expression position. Specifically, according to [RFC
34     /// 2544], [`PathSegment`] parsing should always try to consume a following
35     /// `<` token as the start of generic arguments, and reset to the `<` if
36     /// that fails (e.g. the token is acting as a less-than operator).
37     ///
38     /// This is the exact kind of parsing behavior which requires the "fork,
39     /// try, commit" behavior that [`ParseStream::fork`] discourages. With
40     /// `advance_to`, we can avoid having to parse the speculatively parsed
41     /// content a second time.
42     ///
43     /// This change in behavior can be implemented in syn by replacing just the
44     /// `Parse` implementation for `PathSegment`:
45     ///
46     /// ```
47     /// # use syn::ext::IdentExt;
48     /// use syn::parse::discouraged::Speculative;
49     /// # use syn::parse::{Parse, ParseStream};
50     /// # use syn::{Ident, PathArguments, Result, Token};
51     ///
52     /// pub struct PathSegment {
53     ///     pub ident: Ident,
54     ///     pub arguments: PathArguments,
55     /// }
56     /// #
57     /// # impl<T> From<T> for PathSegment
58     /// # where
59     /// #     T: Into<Ident>,
60     /// # {
61     /// #     fn from(ident: T) -> Self {
62     /// #         PathSegment {
63     /// #             ident: ident.into(),
64     /// #             arguments: PathArguments::None,
65     /// #         }
66     /// #     }
67     /// # }
68     ///
69     /// impl Parse for PathSegment {
70     ///     fn parse(input: ParseStream) -> Result<Self> {
71     ///         if input.peek(Token![super])
72     ///             || input.peek(Token![self])
73     ///             || input.peek(Token![Self])
74     ///             || input.peek(Token![crate])
75     ///         {
76     ///             let ident = input.call(Ident::parse_any)?;
77     ///             return Ok(PathSegment::from(ident));
78     ///         }
79     ///
80     ///         let ident = input.parse()?;
81     ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
82     ///             return Ok(PathSegment {
83     ///                 ident,
84     ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
85     ///             });
86     ///         }
87     ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
88     ///             let fork = input.fork();
89     ///             if let Ok(arguments) = fork.parse() {
90     ///                 input.advance_to(&fork);
91     ///                 return Ok(PathSegment {
92     ///                     ident,
93     ///                     arguments: PathArguments::AngleBracketed(arguments),
94     ///                 });
95     ///             }
96     ///         }
97     ///         Ok(PathSegment::from(ident))
98     ///     }
99     /// }
100     ///
101     /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
102     /// ```
103     ///
104     /// # Drawbacks
105     ///
106     /// The main drawback of this style of speculative parsing is in error
107     /// presentation. Even if the lookahead is the "correct" parse, the error
108     /// that is shown is that of the "fallback" parse. To use the same example
109     /// as the turbofish above, take the following unfinished "turbofish":
110     ///
111     /// ```text
112     /// let _ = f<&'a fn(), for<'a> serde::>();
113     /// ```
114     ///
115     /// If this is parsed as generic arguments, we can provide the error message
116     ///
117     /// ```text
118     /// error: expected identifier
119     ///  --> src.rs:L:C
120     ///   |
121     /// L | let _ = f<&'a fn(), for<'a> serde::>();
122     ///   |                                    ^
123     /// ```
124     ///
125     /// but if parsed using the above speculative parsing, it falls back to
126     /// assuming that the `<` is a less-than when it fails to parse the generic
127     /// arguments, and tries to interpret the `&'a` as the start of a labelled
128     /// loop, resulting in the much less helpful error
129     ///
130     /// ```text
131     /// error: expected `:`
132     ///  --> src.rs:L:C
133     ///   |
134     /// L | let _ = f<&'a fn(), for<'a> serde::>();
135     ///   |               ^^
136     /// ```
137     ///
138     /// This can be mitigated with various heuristics (two examples: show both
139     /// forks' parse errors, or show the one that consumed more tokens), but
140     /// when you can control the grammar, sticking to something that can be
141     /// parsed LL(3) and without the LL(*) speculative parsing this makes
142     /// possible, displaying reasonable errors becomes much more simple.
143     ///
144     /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
145     /// [`PathSegment`]: crate::PathSegment
146     ///
147     /// # Performance
148     ///
149     /// This method performs a cheap fixed amount of work that does not depend
150     /// on how far apart the two streams are positioned.
151     ///
152     /// # Panics
153     ///
154     /// The forked stream in the argument of `advance_to` must have been
155     /// obtained by forking `self`. Attempting to advance to any other stream
156     /// will cause a panic.
advance_to(&self, fork: &Self)157     fn advance_to(&self, fork: &Self);
158 }
159 
160 impl<'a> Speculative for ParseBuffer<'a> {
advance_to(&self, fork: &Self)161     fn advance_to(&self, fork: &Self) {
162         if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
163             panic!("Fork was not derived from the advancing parse stream");
164         }
165 
166         let (self_unexp, self_sp) = inner_unexpected(self);
167         let (fork_unexp, fork_sp) = inner_unexpected(fork);
168         if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
169             match (fork_sp, self_sp) {
170                 // Unexpected set on the fork, but not on `self`, copy it over.
171                 (Some(span), None) => {
172                     self_unexp.set(Unexpected::Some(span));
173                 }
174                 // Unexpected unset. Use chain to propagate errors from fork.
175                 (None, None) => {
176                     fork_unexp.set(Unexpected::Chain(self_unexp));
177 
178                     // Ensure toplevel 'unexpected' tokens from the fork don't
179                     // bubble up the chain by replacing the root `unexpected`
180                     // pointer, only 'unexpected' tokens from existing group
181                     // parsers should bubble.
182                     fork.unexpected
183                         .set(Some(Rc::new(Cell::new(Unexpected::None))));
184                 }
185                 // Unexpected has been set on `self`. No changes needed.
186                 (_, Some(_)) => {}
187             }
188         }
189 
190         // See comment on `cell` in the struct definition.
191         self.cell
192             .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
193     }
194 }
195