1 //! Extensions to the parsing API with niche applicability. 2 3 use super::*; 4 use proc_macro2::extra::DelimSpan; 5 6 /// Extensions to the `ParseStream` API to support speculative parsing. 7 pub trait Speculative { 8 /// Advance this parse stream to the position of a forked parse stream. 9 /// 10 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a 11 /// parse stream, perform some speculative parsing, then join the original 12 /// stream to the fork to "commit" the parsing from the fork to the main 13 /// stream. 14 /// 15 /// If you can avoid doing this, you should, as it limits the ability to 16 /// generate useful errors. That said, it is often the only way to parse 17 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem 18 /// is that when the fork fails to parse an `A`, it's impossible to tell 19 /// whether that was because of a syntax error and the user meant to provide 20 /// an `A`, or that the `A`s are finished and it's time to start parsing 21 /// `B`s. Use with care. 22 /// 23 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by 24 /// parsing `B*` and removing the leading members of `A` from the 25 /// repetition, bypassing the need to involve the downsides associated with 26 /// speculative parsing. 27 /// 28 /// [`ParseStream::fork`]: ParseBuffer::fork 29 /// 30 /// # Example 31 /// 32 /// There has been chatter about the possibility of making the colons in the 33 /// turbofish syntax like `path::to::<T>` no longer required by accepting 34 /// `path::to<T>` in expression position. Specifically, according to [RFC 35 /// 2544], [`PathSegment`] parsing should always try to consume a following 36 /// `<` token as the start of generic arguments, and reset to the `<` if 37 /// that fails (e.g. the token is acting as a less-than operator). 38 /// 39 /// This is the exact kind of parsing behavior which requires the "fork, 40 /// try, commit" behavior that [`ParseStream::fork`] discourages. With 41 /// `advance_to`, we can avoid having to parse the speculatively parsed 42 /// content a second time. 43 /// 44 /// This change in behavior can be implemented in syn by replacing just the 45 /// `Parse` implementation for `PathSegment`: 46 /// 47 /// ``` 48 /// # use syn::ext::IdentExt; 49 /// use syn::parse::discouraged::Speculative; 50 /// # use syn::parse::{Parse, ParseStream}; 51 /// # use syn::{Ident, PathArguments, Result, Token}; 52 /// 53 /// pub struct PathSegment { 54 /// pub ident: Ident, 55 /// pub arguments: PathArguments, 56 /// } 57 /// # 58 /// # impl<T> From<T> for PathSegment 59 /// # where 60 /// # T: Into<Ident>, 61 /// # { 62 /// # fn from(ident: T) -> Self { 63 /// # PathSegment { 64 /// # ident: ident.into(), 65 /// # arguments: PathArguments::None, 66 /// # } 67 /// # } 68 /// # } 69 /// 70 /// impl Parse for PathSegment { 71 /// fn parse(input: ParseStream) -> Result<Self> { 72 /// if input.peek(Token![super]) 73 /// || input.peek(Token![self]) 74 /// || input.peek(Token![Self]) 75 /// || input.peek(Token![crate]) 76 /// { 77 /// let ident = input.call(Ident::parse_any)?; 78 /// return Ok(PathSegment::from(ident)); 79 /// } 80 /// 81 /// let ident = input.parse()?; 82 /// if input.peek(Token![::]) && input.peek3(Token![<]) { 83 /// return Ok(PathSegment { 84 /// ident, 85 /// arguments: PathArguments::AngleBracketed(input.parse()?), 86 /// }); 87 /// } 88 /// if input.peek(Token![<]) && !input.peek(Token![<=]) { 89 /// let fork = input.fork(); 90 /// if let Ok(arguments) = fork.parse() { 91 /// input.advance_to(&fork); 92 /// return Ok(PathSegment { 93 /// ident, 94 /// arguments: PathArguments::AngleBracketed(arguments), 95 /// }); 96 /// } 97 /// } 98 /// Ok(PathSegment::from(ident)) 99 /// } 100 /// } 101 /// 102 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); 103 /// ``` 104 /// 105 /// # Drawbacks 106 /// 107 /// The main drawback of this style of speculative parsing is in error 108 /// presentation. Even if the lookahead is the "correct" parse, the error 109 /// that is shown is that of the "fallback" parse. To use the same example 110 /// as the turbofish above, take the following unfinished "turbofish": 111 /// 112 /// ```text 113 /// let _ = f<&'a fn(), for<'a> serde::>(); 114 /// ``` 115 /// 116 /// If this is parsed as generic arguments, we can provide the error message 117 /// 118 /// ```text 119 /// error: expected identifier 120 /// --> src.rs:L:C 121 /// | 122 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 123 /// | ^ 124 /// ``` 125 /// 126 /// but if parsed using the above speculative parsing, it falls back to 127 /// assuming that the `<` is a less-than when it fails to parse the generic 128 /// arguments, and tries to interpret the `&'a` as the start of a labelled 129 /// loop, resulting in the much less helpful error 130 /// 131 /// ```text 132 /// error: expected `:` 133 /// --> src.rs:L:C 134 /// | 135 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 136 /// | ^^ 137 /// ``` 138 /// 139 /// This can be mitigated with various heuristics (two examples: show both 140 /// forks' parse errors, or show the one that consumed more tokens), but 141 /// when you can control the grammar, sticking to something that can be 142 /// parsed LL(3) and without the LL(*) speculative parsing this makes 143 /// possible, displaying reasonable errors becomes much more simple. 144 /// 145 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 146 /// [`PathSegment`]: crate::PathSegment 147 /// 148 /// # Performance 149 /// 150 /// This method performs a cheap fixed amount of work that does not depend 151 /// on how far apart the two streams are positioned. 152 /// 153 /// # Panics 154 /// 155 /// The forked stream in the argument of `advance_to` must have been 156 /// obtained by forking `self`. Attempting to advance to any other stream 157 /// will cause a panic. advance_to(&self, fork: &Self)158 fn advance_to(&self, fork: &Self); 159 } 160 161 impl<'a> Speculative for ParseBuffer<'a> { advance_to(&self, fork: &Self)162 fn advance_to(&self, fork: &Self) { 163 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { 164 panic!("Fork was not derived from the advancing parse stream"); 165 } 166 167 let (self_unexp, self_sp) = inner_unexpected(self); 168 let (fork_unexp, fork_sp) = inner_unexpected(fork); 169 if !Rc::ptr_eq(&self_unexp, &fork_unexp) { 170 match (fork_sp, self_sp) { 171 // Unexpected set on the fork, but not on `self`, copy it over. 172 (Some(span), None) => { 173 self_unexp.set(Unexpected::Some(span)); 174 } 175 // Unexpected unset. Use chain to propagate errors from fork. 176 (None, None) => { 177 fork_unexp.set(Unexpected::Chain(self_unexp)); 178 179 // Ensure toplevel 'unexpected' tokens from the fork don't 180 // bubble up the chain by replacing the root `unexpected` 181 // pointer, only 'unexpected' tokens from existing group 182 // parsers should bubble. 183 fork.unexpected 184 .set(Some(Rc::new(Cell::new(Unexpected::None)))); 185 } 186 // Unexpected has been set on `self`. No changes needed. 187 (_, Some(_)) => {} 188 } 189 } 190 191 // See comment on `cell` in the struct definition. 192 self.cell 193 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); 194 } 195 } 196 197 /// Extensions to the `ParseStream` API to support manipulating invisible 198 /// delimiters the same as if they were visible. 199 pub trait AnyDelimiter { 200 /// Returns the delimiter, the span of the delimiter token, and the nested 201 /// contents for further parsing. parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>202 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; 203 } 204 205 impl<'a> AnyDelimiter for ParseBuffer<'a> { parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>206 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { 207 self.step(|cursor| { 208 if let Some((content, delimiter, span, rest)) = cursor.any_group() { 209 let scope = crate::buffer::close_span_of_group(*cursor); 210 let nested = crate::parse::advance_step_cursor(cursor, content); 211 let unexpected = crate::parse::get_unexpected(self); 212 let content = crate::parse::new_parse_buffer(scope, nested, unexpected); 213 Ok(((delimiter, span, content), rest)) 214 } else { 215 Err(cursor.error("expected any delimiter")) 216 } 217 }) 218 } 219 } 220