1 //! Extensions to the parsing API with niche applicability. 2 3 use super::*; 4 5 /// Extensions to the `ParseStream` API to support speculative parsing. 6 pub trait Speculative { 7 /// Advance this parse stream to the position of a forked parse stream. 8 /// 9 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a 10 /// parse stream, perform some speculative parsing, then join the original 11 /// stream to the fork to "commit" the parsing from the fork to the main 12 /// stream. 13 /// 14 /// If you can avoid doing this, you should, as it limits the ability to 15 /// generate useful errors. That said, it is often the only way to parse 16 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem 17 /// is that when the fork fails to parse an `A`, it's impossible to tell 18 /// whether that was because of a syntax error and the user meant to provide 19 /// an `A`, or that the `A`s are finished and it's time to start parsing 20 /// `B`s. Use with care. 21 /// 22 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by 23 /// parsing `B*` and removing the leading members of `A` from the 24 /// repetition, bypassing the need to involve the downsides associated with 25 /// speculative parsing. 26 /// 27 /// [`ParseStream::fork`]: ParseBuffer::fork 28 /// 29 /// # Example 30 /// 31 /// There has been chatter about the possibility of making the colons in the 32 /// turbofish syntax like `path::to::<T>` no longer required by accepting 33 /// `path::to<T>` in expression position. Specifically, according to [RFC 34 /// 2544], [`PathSegment`] parsing should always try to consume a following 35 /// `<` token as the start of generic arguments, and reset to the `<` if 36 /// that fails (e.g. the token is acting as a less-than operator). 37 /// 38 /// This is the exact kind of parsing behavior which requires the "fork, 39 /// try, commit" behavior that [`ParseStream::fork`] discourages. With 40 /// `advance_to`, we can avoid having to parse the speculatively parsed 41 /// content a second time. 42 /// 43 /// This change in behavior can be implemented in syn by replacing just the 44 /// `Parse` implementation for `PathSegment`: 45 /// 46 /// ``` 47 /// # use syn::ext::IdentExt; 48 /// use syn::parse::discouraged::Speculative; 49 /// # use syn::parse::{Parse, ParseStream}; 50 /// # use syn::{Ident, PathArguments, Result, Token}; 51 /// 52 /// pub struct PathSegment { 53 /// pub ident: Ident, 54 /// pub arguments: PathArguments, 55 /// } 56 /// # 57 /// # impl<T> From<T> for PathSegment 58 /// # where 59 /// # T: Into<Ident>, 60 /// # { 61 /// # fn from(ident: T) -> Self { 62 /// # PathSegment { 63 /// # ident: ident.into(), 64 /// # arguments: PathArguments::None, 65 /// # } 66 /// # } 67 /// # } 68 /// 69 /// impl Parse for PathSegment { 70 /// fn parse(input: ParseStream) -> Result<Self> { 71 /// if input.peek(Token![super]) 72 /// || input.peek(Token![self]) 73 /// || input.peek(Token![Self]) 74 /// || input.peek(Token![crate]) 75 /// { 76 /// let ident = input.call(Ident::parse_any)?; 77 /// return Ok(PathSegment::from(ident)); 78 /// } 79 /// 80 /// let ident = input.parse()?; 81 /// if input.peek(Token![::]) && input.peek3(Token![<]) { 82 /// return Ok(PathSegment { 83 /// ident, 84 /// arguments: PathArguments::AngleBracketed(input.parse()?), 85 /// }); 86 /// } 87 /// if input.peek(Token![<]) && !input.peek(Token![<=]) { 88 /// let fork = input.fork(); 89 /// if let Ok(arguments) = fork.parse() { 90 /// input.advance_to(&fork); 91 /// return Ok(PathSegment { 92 /// ident, 93 /// arguments: PathArguments::AngleBracketed(arguments), 94 /// }); 95 /// } 96 /// } 97 /// Ok(PathSegment::from(ident)) 98 /// } 99 /// } 100 /// 101 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); 102 /// ``` 103 /// 104 /// # Drawbacks 105 /// 106 /// The main drawback of this style of speculative parsing is in error 107 /// presentation. Even if the lookahead is the "correct" parse, the error 108 /// that is shown is that of the "fallback" parse. To use the same example 109 /// as the turbofish above, take the following unfinished "turbofish": 110 /// 111 /// ```text 112 /// let _ = f<&'a fn(), for<'a> serde::>(); 113 /// ``` 114 /// 115 /// If this is parsed as generic arguments, we can provide the error message 116 /// 117 /// ```text 118 /// error: expected identifier 119 /// --> src.rs:L:C 120 /// | 121 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 122 /// | ^ 123 /// ``` 124 /// 125 /// but if parsed using the above speculative parsing, it falls back to 126 /// assuming that the `<` is a less-than when it fails to parse the generic 127 /// arguments, and tries to interpret the `&'a` as the start of a labelled 128 /// loop, resulting in the much less helpful error 129 /// 130 /// ```text 131 /// error: expected `:` 132 /// --> src.rs:L:C 133 /// | 134 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 135 /// | ^^ 136 /// ``` 137 /// 138 /// This can be mitigated with various heuristics (two examples: show both 139 /// forks' parse errors, or show the one that consumed more tokens), but 140 /// when you can control the grammar, sticking to something that can be 141 /// parsed LL(3) and without the LL(*) speculative parsing this makes 142 /// possible, displaying reasonable errors becomes much more simple. 143 /// 144 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 145 /// [`PathSegment`]: crate::PathSegment 146 /// 147 /// # Performance 148 /// 149 /// This method performs a cheap fixed amount of work that does not depend 150 /// on how far apart the two streams are positioned. 151 /// 152 /// # Panics 153 /// 154 /// The forked stream in the argument of `advance_to` must have been 155 /// obtained by forking `self`. Attempting to advance to any other stream 156 /// will cause a panic. advance_to(&self, fork: &Self)157 fn advance_to(&self, fork: &Self); 158 } 159 160 impl<'a> Speculative for ParseBuffer<'a> { advance_to(&self, fork: &Self)161 fn advance_to(&self, fork: &Self) { 162 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { 163 panic!("Fork was not derived from the advancing parse stream"); 164 } 165 166 let (self_unexp, self_sp) = inner_unexpected(self); 167 let (fork_unexp, fork_sp) = inner_unexpected(fork); 168 if !Rc::ptr_eq(&self_unexp, &fork_unexp) { 169 match (fork_sp, self_sp) { 170 // Unexpected set on the fork, but not on `self`, copy it over. 171 (Some(span), None) => { 172 self_unexp.set(Unexpected::Some(span)); 173 } 174 // Unexpected unset. Use chain to propagate errors from fork. 175 (None, None) => { 176 fork_unexp.set(Unexpected::Chain(self_unexp)); 177 178 // Ensure toplevel 'unexpected' tokens from the fork don't 179 // bubble up the chain by replacing the root `unexpected` 180 // pointer, only 'unexpected' tokens from existing group 181 // parsers should bubble. 182 fork.unexpected 183 .set(Some(Rc::new(Cell::new(Unexpected::None)))); 184 } 185 // Unexpected has been set on `self`. No changes needed. 186 (_, Some(_)) => {} 187 } 188 } 189 190 // See comment on `cell` in the struct definition. 191 self.cell 192 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); 193 } 194 } 195