• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! A stably addressed token buffer supporting efficient traversal based on a
2 //! cheaply copyable cursor.
3 //!
4 //! *This module is available only if Syn is built with the `"parsing"` feature.*
5 
6 // This module is heavily commented as it contains most of the unsafe code in
7 // Syn, and caution should be used when editing it. The public-facing interface
8 // is 100% safe but the implementation is fragile internally.
9 
10 #[cfg(all(
11     not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))),
12     feature = "proc-macro"
13 ))]
14 use crate::proc_macro as pm;
15 use crate::Lifetime;
16 use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
17 use std::cmp::Ordering;
18 use std::marker::PhantomData;
19 
20 /// Internal type which is used instead of `TokenTree` to represent a token tree
21 /// within a `TokenBuffer`.
22 enum Entry {
23     // Mimicking types from proc-macro.
24     // Group entries contain the offset to the matching End entry.
25     Group(Group, usize),
26     Ident(Ident),
27     Punct(Punct),
28     Literal(Literal),
29     // End entries contain the offset (negative) to the start of the buffer.
30     End(isize),
31 }
32 
33 /// A buffer that can be efficiently traversed multiple times, unlike
34 /// `TokenStream` which requires a deep copy in order to traverse more than
35 /// once.
36 ///
37 /// *This type is available only if Syn is built with the `"parsing"` feature.*
38 pub struct TokenBuffer {
39     // NOTE: Do not implement clone on this - while the current design could be
40     // cloned, other designs which could be desirable may not be cloneable.
41     entries: Box<[Entry]>,
42 }
43 
44 impl TokenBuffer {
recursive_new(entries: &mut Vec<Entry>, stream: TokenStream)45     fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
46         for tt in stream {
47             match tt {
48                 TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
49                 TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
50                 TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
51                 TokenTree::Group(group) => {
52                     let group_start_index = entries.len();
53                     entries.push(Entry::End(0)); // we replace this below
54                     Self::recursive_new(entries, group.stream());
55                     let group_end_index = entries.len();
56                     entries.push(Entry::End(-(group_end_index as isize)));
57                     let group_end_offset = group_end_index - group_start_index;
58                     entries[group_start_index] = Entry::Group(group, group_end_offset);
59                 }
60             }
61         }
62     }
63 
64     /// Creates a `TokenBuffer` containing all the tokens from the input
65     /// `proc_macro::TokenStream`.
66     ///
67     /// *This method is available only if Syn is built with both the `"parsing"` and
68     /// `"proc-macro"` features.*
69     #[cfg(all(
70         not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))),
71         feature = "proc-macro"
72     ))]
new(stream: pm::TokenStream) -> Self73     pub fn new(stream: pm::TokenStream) -> Self {
74         Self::new2(stream.into())
75     }
76 
77     /// Creates a `TokenBuffer` containing all the tokens from the input
78     /// `proc_macro2::TokenStream`.
new2(stream: TokenStream) -> Self79     pub fn new2(stream: TokenStream) -> Self {
80         let mut entries = Vec::new();
81         Self::recursive_new(&mut entries, stream);
82         entries.push(Entry::End(-(entries.len() as isize)));
83         Self {
84             entries: entries.into_boxed_slice(),
85         }
86     }
87 
88     /// Creates a cursor referencing the first token in the buffer and able to
89     /// traverse until the end of the buffer.
begin(&self) -> Cursor90     pub fn begin(&self) -> Cursor {
91         let ptr = self.entries.as_ptr();
92         unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
93     }
94 }
95 
96 /// A cheaply copyable cursor into a `TokenBuffer`.
97 ///
98 /// This cursor holds a shared reference into the immutable data which is used
99 /// internally to represent a `TokenStream`, and can be efficiently manipulated
100 /// and copied around.
101 ///
102 /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
103 /// object and get a cursor to its first token with `begin()`.
104 ///
105 /// Two cursors are equal if they have the same location in the same input
106 /// stream, and have the same scope.
107 ///
108 /// *This type is available only if Syn is built with the `"parsing"` feature.*
109 pub struct Cursor<'a> {
110     // The current entry which the `Cursor` is pointing at.
111     ptr: *const Entry,
112     // This is the only `Entry::End` object which this cursor is allowed to
113     // point at. All other `End` objects are skipped over in `Cursor::create`.
114     scope: *const Entry,
115     // Cursor is covariant in 'a. This field ensures that our pointers are still
116     // valid.
117     marker: PhantomData<&'a Entry>,
118 }
119 
120 impl<'a> Cursor<'a> {
121     /// Creates a cursor referencing a static empty TokenStream.
empty() -> Self122     pub fn empty() -> Self {
123         // It's safe in this situation for us to put an `Entry` object in global
124         // storage, despite it not actually being safe to send across threads
125         // (`Ident` is a reference into a thread-local table). This is because
126         // this entry never includes a `Ident` object.
127         //
128         // This wrapper struct allows us to break the rules and put a `Sync`
129         // object in global storage.
130         struct UnsafeSyncEntry(Entry);
131         unsafe impl Sync for UnsafeSyncEntry {}
132         static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0));
133 
134         Cursor {
135             ptr: &EMPTY_ENTRY.0,
136             scope: &EMPTY_ENTRY.0,
137             marker: PhantomData,
138         }
139     }
140 
141     /// This create method intelligently exits non-explicitly-entered
142     /// `None`-delimited scopes when the cursor reaches the end of them,
143     /// allowing for them to be treated transparently.
create(mut ptr: *const Entry, scope: *const Entry) -> Self144     unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
145         // NOTE: If we're looking at a `End`, we want to advance the cursor
146         // past it, unless `ptr == scope`, which means that we're at the edge of
147         // our cursor's scope. We should only have `ptr != scope` at the exit
148         // from None-delimited groups entered with `ignore_none`.
149         while let Entry::End(_) = *ptr {
150             if ptr == scope {
151                 break;
152             }
153             ptr = ptr.add(1);
154         }
155 
156         Cursor {
157             ptr,
158             scope,
159             marker: PhantomData,
160         }
161     }
162 
163     /// Get the current entry.
entry(self) -> &'a Entry164     fn entry(self) -> &'a Entry {
165         unsafe { &*self.ptr }
166     }
167 
168     /// Bump the cursor to point at the next token after the current one. This
169     /// is undefined behavior if the cursor is currently looking at an
170     /// `Entry::End`.
171     ///
172     /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
173     /// point at the first token in the group (with the same scope end).
bump_ignore_group(self) -> Cursor<'a>174     unsafe fn bump_ignore_group(self) -> Cursor<'a> {
175         Cursor::create(self.ptr.offset(1), self.scope)
176     }
177 
178     /// While the cursor is looking at a `None`-delimited group, move it to look
179     /// at the first token inside instead. If the group is empty, this will move
180     /// the cursor past the `None`-delimited group.
181     ///
182     /// WARNING: This mutates its argument.
ignore_none(&mut self)183     fn ignore_none(&mut self) {
184         while let Entry::Group(group, _) = self.entry() {
185             if group.delimiter() == Delimiter::None {
186                 unsafe { *self = self.bump_ignore_group() };
187             } else {
188                 break;
189             }
190         }
191     }
192 
193     /// Checks whether the cursor is currently pointing at the end of its valid
194     /// scope.
eof(self) -> bool195     pub fn eof(self) -> bool {
196         // We're at eof if we're at the end of our scope.
197         self.ptr == self.scope
198     }
199 
200     /// If the cursor is pointing at a `Group` with the given delimiter, returns
201     /// a cursor into that group and one pointing to the next `TokenTree`.
group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)>202     pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> {
203         // If we're not trying to enter a none-delimited group, we want to
204         // ignore them. We have to make sure to _not_ ignore them when we want
205         // to enter them, of course. For obvious reasons.
206         if delim != Delimiter::None {
207             self.ignore_none();
208         }
209 
210         if let Entry::Group(group, end_offset) = self.entry() {
211             if group.delimiter() == delim {
212                 let end_of_group = unsafe { self.ptr.add(*end_offset) };
213                 let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
214                 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
215                 return Some((inside_of_group, group.span(), after_group));
216             }
217         }
218 
219         None
220     }
221 
222     /// If the cursor is pointing at a `Ident`, returns it along with a cursor
223     /// pointing at the next `TokenTree`.
ident(mut self) -> Option<(Ident, Cursor<'a>)>224     pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
225         self.ignore_none();
226         match self.entry() {
227             Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
228             _ => None,
229         }
230     }
231 
232     /// If the cursor is pointing at a `Punct`, returns it along with a cursor
233     /// pointing at the next `TokenTree`.
punct(mut self) -> Option<(Punct, Cursor<'a>)>234     pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
235         self.ignore_none();
236         match self.entry() {
237             Entry::Punct(punct) if punct.as_char() != '\'' => {
238                 Some((punct.clone(), unsafe { self.bump_ignore_group() }))
239             }
240             _ => None,
241         }
242     }
243 
244     /// If the cursor is pointing at a `Literal`, return it along with a cursor
245     /// pointing at the next `TokenTree`.
literal(mut self) -> Option<(Literal, Cursor<'a>)>246     pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
247         self.ignore_none();
248         match self.entry() {
249             Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
250             _ => None,
251         }
252     }
253 
254     /// If the cursor is pointing at a `Lifetime`, returns it along with a
255     /// cursor pointing at the next `TokenTree`.
lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)>256     pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
257         self.ignore_none();
258         match self.entry() {
259             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
260                 let next = unsafe { self.bump_ignore_group() };
261                 let (ident, rest) = next.ident()?;
262                 let lifetime = Lifetime {
263                     apostrophe: punct.span(),
264                     ident,
265                 };
266                 Some((lifetime, rest))
267             }
268             _ => None,
269         }
270     }
271 
272     /// Copies all remaining tokens visible from this cursor into a
273     /// `TokenStream`.
token_stream(self) -> TokenStream274     pub fn token_stream(self) -> TokenStream {
275         let mut tts = Vec::new();
276         let mut cursor = self;
277         while let Some((tt, rest)) = cursor.token_tree() {
278             tts.push(tt);
279             cursor = rest;
280         }
281         tts.into_iter().collect()
282     }
283 
284     /// If the cursor is pointing at a `TokenTree`, returns it along with a
285     /// cursor pointing at the next `TokenTree`.
286     ///
287     /// Returns `None` if the cursor has reached the end of its stream.
288     ///
289     /// This method does not treat `None`-delimited groups as transparent, and
290     /// will return a `Group(None, ..)` if the cursor is looking at one.
token_tree(self) -> Option<(TokenTree, Cursor<'a>)>291     pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
292         let (tree, len) = match self.entry() {
293             Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
294             Entry::Literal(literal) => (literal.clone().into(), 1),
295             Entry::Ident(ident) => (ident.clone().into(), 1),
296             Entry::Punct(punct) => (punct.clone().into(), 1),
297             Entry::End(_) => return None,
298         };
299 
300         let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
301         Some((tree, rest))
302     }
303 
304     /// Returns the `Span` of the current token, or `Span::call_site()` if this
305     /// cursor points to eof.
span(self) -> Span306     pub fn span(self) -> Span {
307         match self.entry() {
308             Entry::Group(group, _) => group.span(),
309             Entry::Literal(literal) => literal.span(),
310             Entry::Ident(ident) => ident.span(),
311             Entry::Punct(punct) => punct.span(),
312             Entry::End(_) => Span::call_site(),
313         }
314     }
315 
316     /// Skip over the next token without cloning it. Returns `None` if this
317     /// cursor points to eof.
318     ///
319     /// This method treats `'lifetimes` as a single token.
skip(self) -> Option<Cursor<'a>>320     pub(crate) fn skip(self) -> Option<Cursor<'a>> {
321         let len = match self.entry() {
322             Entry::End(_) => return None,
323 
324             // Treat lifetimes as a single tt for the purposes of 'skip'.
325             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
326                 match unsafe { &*self.ptr.add(1) } {
327                     Entry::Ident(_) => 2,
328                     _ => 1,
329                 }
330             }
331 
332             Entry::Group(_, end_offset) => *end_offset,
333             _ => 1,
334         };
335 
336         Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
337     }
338 }
339 
340 impl<'a> Copy for Cursor<'a> {}
341 
342 impl<'a> Clone for Cursor<'a> {
clone(&self) -> Self343     fn clone(&self) -> Self {
344         *self
345     }
346 }
347 
348 impl<'a> Eq for Cursor<'a> {}
349 
350 impl<'a> PartialEq for Cursor<'a> {
eq(&self, other: &Self) -> bool351     fn eq(&self, other: &Self) -> bool {
352         self.ptr == other.ptr
353     }
354 }
355 
356 impl<'a> PartialOrd for Cursor<'a> {
partial_cmp(&self, other: &Self) -> Option<Ordering>357     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
358         if same_buffer(*self, *other) {
359             Some(self.ptr.cmp(&other.ptr))
360         } else {
361             None
362         }
363     }
364 }
365 
same_scope(a: Cursor, b: Cursor) -> bool366 pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
367     a.scope == b.scope
368 }
369 
same_buffer(a: Cursor, b: Cursor) -> bool370 pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
371     unsafe {
372         match (&*a.scope, &*b.scope) {
373             (Entry::End(a_offset), Entry::End(b_offset)) => {
374                 a.scope.offset(*a_offset) == b.scope.offset(*b_offset)
375             }
376             _ => unreachable!(),
377         }
378     }
379 }
380 
381 #[cfg(any(feature = "full", feature = "derive"))]
cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering382 pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
383     a.ptr.cmp(&b.ptr)
384 }
385 
open_span_of_group(cursor: Cursor) -> Span386 pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
387     match cursor.entry() {
388         Entry::Group(group, _) => group.span_open(),
389         _ => cursor.span(),
390     }
391 }
392 
close_span_of_group(cursor: Cursor) -> Span393 pub(crate) fn close_span_of_group(cursor: Cursor) -> Span {
394     match cursor.entry() {
395         Entry::Group(group, _) => group.span_close(),
396         _ => cursor.span(),
397     }
398 }
399