• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::fallback::{
2     is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3     TokenStreamBuilder,
4 };
5 use crate::{Delimiter, Punct, Spacing, TokenTree};
6 use core::char;
7 use core::str::{Bytes, CharIndices, Chars};
8 
9 #[derive(Copy, Clone, Eq, PartialEq)]
10 pub(crate) struct Cursor<'a> {
11     pub rest: &'a str,
12     #[cfg(span_locations)]
13     pub off: u32,
14 }
15 
16 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>17     pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18         let (_front, rest) = self.rest.split_at(bytes);
19         Cursor {
20             rest,
21             #[cfg(span_locations)]
22             off: self.off + _front.chars().count() as u32,
23         }
24     }
25 
starts_with(&self, s: &str) -> bool26     pub fn starts_with(&self, s: &str) -> bool {
27         self.rest.starts_with(s)
28     }
29 
is_empty(&self) -> bool30     fn is_empty(&self) -> bool {
31         self.rest.is_empty()
32     }
33 
len(&self) -> usize34     fn len(&self) -> usize {
35         self.rest.len()
36     }
37 
as_bytes(&self) -> &'a [u8]38     fn as_bytes(&self) -> &'a [u8] {
39         self.rest.as_bytes()
40     }
41 
bytes(&self) -> Bytes<'a>42     fn bytes(&self) -> Bytes<'a> {
43         self.rest.bytes()
44     }
45 
chars(&self) -> Chars<'a>46     fn chars(&self) -> Chars<'a> {
47         self.rest.chars()
48     }
49 
char_indices(&self) -> CharIndices<'a>50     fn char_indices(&self) -> CharIndices<'a> {
51         self.rest.char_indices()
52     }
53 
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>54     fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
55         if self.starts_with(tag) {
56             Ok(self.advance(tag.len()))
57         } else {
58             Err(Reject)
59         }
60     }
61 }
62 
63 pub(crate) struct Reject;
64 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
65 
skip_whitespace(input: Cursor) -> Cursor66 fn skip_whitespace(input: Cursor) -> Cursor {
67     let mut s = input;
68 
69     while !s.is_empty() {
70         let byte = s.as_bytes()[0];
71         if byte == b'/' {
72             if s.starts_with("//")
73                 && (!s.starts_with("///") || s.starts_with("////"))
74                 && !s.starts_with("//!")
75             {
76                 let (cursor, _) = take_until_newline_or_eof(s);
77                 s = cursor;
78                 continue;
79             } else if s.starts_with("/**/") {
80                 s = s.advance(4);
81                 continue;
82             } else if s.starts_with("/*")
83                 && (!s.starts_with("/**") || s.starts_with("/***"))
84                 && !s.starts_with("/*!")
85             {
86                 match block_comment(s) {
87                     Ok((rest, _)) => {
88                         s = rest;
89                         continue;
90                     }
91                     Err(Reject) => return s,
92                 }
93             }
94         }
95         match byte {
96             b' ' | 0x09..=0x0d => {
97                 s = s.advance(1);
98                 continue;
99             }
100             b if b <= 0x7f => {}
101             _ => {
102                 let ch = s.chars().next().unwrap();
103                 if is_whitespace(ch) {
104                     s = s.advance(ch.len_utf8());
105                     continue;
106                 }
107             }
108         }
109         return s;
110     }
111     s
112 }
113 
block_comment(input: Cursor) -> PResult<&str>114 fn block_comment(input: Cursor) -> PResult<&str> {
115     if !input.starts_with("/*") {
116         return Err(Reject);
117     }
118 
119     let mut depth = 0usize;
120     let bytes = input.as_bytes();
121     let mut i = 0usize;
122     let upper = bytes.len() - 1;
123 
124     while i < upper {
125         if bytes[i] == b'/' && bytes[i + 1] == b'*' {
126             depth += 1;
127             i += 1; // eat '*'
128         } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
129             depth -= 1;
130             if depth == 0 {
131                 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
132             }
133             i += 1; // eat '/'
134         }
135         i += 1;
136     }
137 
138     Err(Reject)
139 }
140 
is_whitespace(ch: char) -> bool141 fn is_whitespace(ch: char) -> bool {
142     // Rust treats left-to-right mark and right-to-left mark as whitespace
143     ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
144 }
145 
word_break(input: Cursor) -> Result<Cursor, Reject>146 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
147     match input.chars().next() {
148         Some(ch) if is_ident_continue(ch) => Err(Reject),
149         Some(_) | None => Ok(input),
150     }
151 }
152 
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>153 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
154     let mut trees = TokenStreamBuilder::new();
155     let mut stack = Vec::new();
156 
157     loop {
158         input = skip_whitespace(input);
159 
160         if let Ok((rest, ())) = doc_comment(input, &mut trees) {
161             input = rest;
162             continue;
163         }
164 
165         #[cfg(span_locations)]
166         let lo = input.off;
167 
168         let first = match input.bytes().next() {
169             Some(first) => first,
170             None => match stack.last() {
171                 None => return Ok(trees.build()),
172                 #[cfg(span_locations)]
173                 Some((lo, _frame)) => {
174                     return Err(LexError {
175                         span: Span { lo: *lo, hi: *lo },
176                     })
177                 }
178                 #[cfg(not(span_locations))]
179                 Some(_frame) => return Err(LexError { span: Span {} }),
180             },
181         };
182 
183         if let Some(open_delimiter) = match first {
184             b'(' => Some(Delimiter::Parenthesis),
185             b'[' => Some(Delimiter::Bracket),
186             b'{' => Some(Delimiter::Brace),
187             _ => None,
188         } {
189             input = input.advance(1);
190             let frame = (open_delimiter, trees);
191             #[cfg(span_locations)]
192             let frame = (lo, frame);
193             stack.push(frame);
194             trees = TokenStreamBuilder::new();
195         } else if let Some(close_delimiter) = match first {
196             b')' => Some(Delimiter::Parenthesis),
197             b']' => Some(Delimiter::Bracket),
198             b'}' => Some(Delimiter::Brace),
199             _ => None,
200         } {
201             let frame = match stack.pop() {
202                 Some(frame) => frame,
203                 None => return Err(lex_error(input)),
204             };
205             #[cfg(span_locations)]
206             let (lo, frame) = frame;
207             let (open_delimiter, outer) = frame;
208             if open_delimiter != close_delimiter {
209                 return Err(lex_error(input));
210             }
211             input = input.advance(1);
212             let mut g = Group::new(open_delimiter, trees.build());
213             g.set_span(Span {
214                 #[cfg(span_locations)]
215                 lo,
216                 #[cfg(span_locations)]
217                 hi: input.off,
218             });
219             trees = outer;
220             trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
221         } else {
222             let (rest, mut tt) = match leaf_token(input) {
223                 Ok((rest, tt)) => (rest, tt),
224                 Err(Reject) => return Err(lex_error(input)),
225             };
226             tt.set_span(crate::Span::_new_fallback(Span {
227                 #[cfg(span_locations)]
228                 lo,
229                 #[cfg(span_locations)]
230                 hi: rest.off,
231             }));
232             trees.push_token_from_parser(tt);
233             input = rest;
234         }
235     }
236 }
237 
lex_error(cursor: Cursor) -> LexError238 fn lex_error(cursor: Cursor) -> LexError {
239     #[cfg(not(span_locations))]
240     let _ = cursor;
241     LexError {
242         span: Span {
243             #[cfg(span_locations)]
244             lo: cursor.off,
245             #[cfg(span_locations)]
246             hi: cursor.off,
247         },
248     }
249 }
250 
leaf_token(input: Cursor) -> PResult<TokenTree>251 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
252     if let Ok((input, l)) = literal(input) {
253         // must be parsed before ident
254         Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
255     } else if let Ok((input, p)) = punct(input) {
256         Ok((input, TokenTree::Punct(p)))
257     } else if let Ok((input, i)) = ident(input) {
258         Ok((input, TokenTree::Ident(i)))
259     } else {
260         Err(Reject)
261     }
262 }
263 
ident(input: Cursor) -> PResult<crate::Ident>264 fn ident(input: Cursor) -> PResult<crate::Ident> {
265     if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"]
266         .iter()
267         .any(|prefix| input.starts_with(prefix))
268     {
269         Err(Reject)
270     } else {
271         ident_any(input)
272     }
273 }
274 
ident_any(input: Cursor) -> PResult<crate::Ident>275 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
276     let raw = input.starts_with("r#");
277     let rest = input.advance((raw as usize) << 1);
278 
279     let (rest, sym) = ident_not_raw(rest)?;
280 
281     if !raw {
282         let ident = crate::Ident::new(sym, crate::Span::call_site());
283         return Ok((rest, ident));
284     }
285 
286     match sym {
287         "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
288         _ => {}
289     }
290 
291     let ident = crate::Ident::_new_raw(sym, crate::Span::call_site());
292     Ok((rest, ident))
293 }
294 
ident_not_raw(input: Cursor) -> PResult<&str>295 fn ident_not_raw(input: Cursor) -> PResult<&str> {
296     let mut chars = input.char_indices();
297 
298     match chars.next() {
299         Some((_, ch)) if is_ident_start(ch) => {}
300         _ => return Err(Reject),
301     }
302 
303     let mut end = input.len();
304     for (i, ch) in chars {
305         if !is_ident_continue(ch) {
306             end = i;
307             break;
308         }
309     }
310 
311     Ok((input.advance(end), &input.rest[..end]))
312 }
313 
literal(input: Cursor) -> PResult<Literal>314 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
315     let rest = literal_nocapture(input)?;
316     let end = input.len() - rest.len();
317     Ok((rest, Literal::_new(input.rest[..end].to_string())))
318 }
319 
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>320 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
321     if let Ok(ok) = string(input) {
322         Ok(ok)
323     } else if let Ok(ok) = byte_string(input) {
324         Ok(ok)
325     } else if let Ok(ok) = byte(input) {
326         Ok(ok)
327     } else if let Ok(ok) = character(input) {
328         Ok(ok)
329     } else if let Ok(ok) = float(input) {
330         Ok(ok)
331     } else if let Ok(ok) = int(input) {
332         Ok(ok)
333     } else {
334         Err(Reject)
335     }
336 }
337 
literal_suffix(input: Cursor) -> Cursor338 fn literal_suffix(input: Cursor) -> Cursor {
339     match ident_not_raw(input) {
340         Ok((input, _)) => input,
341         Err(Reject) => input,
342     }
343 }
344 
string(input: Cursor) -> Result<Cursor, Reject>345 fn string(input: Cursor) -> Result<Cursor, Reject> {
346     if let Ok(input) = input.parse("\"") {
347         cooked_string(input)
348     } else if let Ok(input) = input.parse("r") {
349         raw_string(input)
350     } else {
351         Err(Reject)
352     }
353 }
354 
cooked_string(input: Cursor) -> Result<Cursor, Reject>355 fn cooked_string(input: Cursor) -> Result<Cursor, Reject> {
356     let mut chars = input.char_indices().peekable();
357 
358     while let Some((i, ch)) = chars.next() {
359         match ch {
360             '"' => {
361                 let input = input.advance(i + 1);
362                 return Ok(literal_suffix(input));
363             }
364             '\r' => match chars.next() {
365                 Some((_, '\n')) => {}
366                 _ => break,
367             },
368             '\\' => match chars.next() {
369                 Some((_, 'x')) => {
370                     if !backslash_x_char(&mut chars) {
371                         break;
372                     }
373                 }
374                 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
375                 | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
376                 Some((_, 'u')) => {
377                     if !backslash_u(&mut chars) {
378                         break;
379                     }
380                 }
381                 Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
382                     let mut last = ch;
383                     loop {
384                         if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
385                             return Err(Reject);
386                         }
387                         match chars.peek() {
388                             Some((_, ch)) if ch.is_whitespace() => {
389                                 last = *ch;
390                                 chars.next();
391                             }
392                             _ => break,
393                         }
394                     }
395                 }
396                 _ => break,
397             },
398             _ch => {}
399         }
400     }
401     Err(Reject)
402 }
403 
byte_string(input: Cursor) -> Result<Cursor, Reject>404 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
405     if let Ok(input) = input.parse("b\"") {
406         cooked_byte_string(input)
407     } else if let Ok(input) = input.parse("br") {
408         raw_string(input)
409     } else {
410         Err(Reject)
411     }
412 }
413 
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>414 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
415     let mut bytes = input.bytes().enumerate();
416     while let Some((offset, b)) = bytes.next() {
417         match b {
418             b'"' => {
419                 let input = input.advance(offset + 1);
420                 return Ok(literal_suffix(input));
421             }
422             b'\r' => match bytes.next() {
423                 Some((_, b'\n')) => {}
424                 _ => break,
425             },
426             b'\\' => match bytes.next() {
427                 Some((_, b'x')) => {
428                     if !backslash_x_byte(&mut bytes) {
429                         break;
430                     }
431                 }
432                 Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\'))
433                 | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {}
434                 Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => {
435                     let mut last = b as char;
436                     let rest = input.advance(newline + 1);
437                     let mut chars = rest.char_indices();
438                     loop {
439                         if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
440                             return Err(Reject);
441                         }
442                         match chars.next() {
443                             Some((_, ch)) if ch.is_whitespace() => last = ch,
444                             Some((offset, _)) => {
445                                 input = rest.advance(offset);
446                                 bytes = input.bytes().enumerate();
447                                 break;
448                             }
449                             None => return Err(Reject),
450                         }
451                     }
452                 }
453                 _ => break,
454             },
455             b if b < 0x80 => {}
456             _ => break,
457         }
458     }
459     Err(Reject)
460 }
461 
raw_string(input: Cursor) -> Result<Cursor, Reject>462 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
463     let mut chars = input.char_indices();
464     let mut n = 0;
465     for (i, ch) in &mut chars {
466         match ch {
467             '"' => {
468                 n = i;
469                 break;
470             }
471             '#' => {}
472             _ => return Err(Reject),
473         }
474     }
475     if n > 255 {
476         // https://github.com/rust-lang/rust/pull/95251
477         return Err(Reject);
478     }
479     while let Some((i, ch)) = chars.next() {
480         match ch {
481             '"' if input.rest[i + 1..].starts_with(&input.rest[..n]) => {
482                 let rest = input.advance(i + 1 + n);
483                 return Ok(literal_suffix(rest));
484             }
485             '\r' => match chars.next() {
486                 Some((_, '\n')) => {}
487                 _ => break,
488             },
489             _ => {}
490         }
491     }
492     Err(Reject)
493 }
494 
byte(input: Cursor) -> Result<Cursor, Reject>495 fn byte(input: Cursor) -> Result<Cursor, Reject> {
496     let input = input.parse("b'")?;
497     let mut bytes = input.bytes().enumerate();
498     let ok = match bytes.next().map(|(_, b)| b) {
499         Some(b'\\') => match bytes.next().map(|(_, b)| b) {
500             Some(b'x') => backslash_x_byte(&mut bytes),
501             Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'')
502             | Some(b'"') => true,
503             _ => false,
504         },
505         b => b.is_some(),
506     };
507     if !ok {
508         return Err(Reject);
509     }
510     let (offset, _) = bytes.next().ok_or(Reject)?;
511     if !input.chars().as_str().is_char_boundary(offset) {
512         return Err(Reject);
513     }
514     let input = input.advance(offset).parse("'")?;
515     Ok(literal_suffix(input))
516 }
517 
character(input: Cursor) -> Result<Cursor, Reject>518 fn character(input: Cursor) -> Result<Cursor, Reject> {
519     let input = input.parse("'")?;
520     let mut chars = input.char_indices();
521     let ok = match chars.next().map(|(_, ch)| ch) {
522         Some('\\') => match chars.next().map(|(_, ch)| ch) {
523             Some('x') => backslash_x_char(&mut chars),
524             Some('u') => backslash_u(&mut chars),
525             Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
526                 true
527             }
528             _ => false,
529         },
530         ch => ch.is_some(),
531     };
532     if !ok {
533         return Err(Reject);
534     }
535     let (idx, _) = chars.next().ok_or(Reject)?;
536     let input = input.advance(idx).parse("'")?;
537     Ok(literal_suffix(input))
538 }
539 
540 macro_rules! next_ch {
541     ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
542         match $chars.next() {
543             Some((_, ch)) => match ch {
544                 $pat $(| $rest)* => ch,
545                 _ => return false,
546             },
547             None => return false,
548         }
549     };
550 }
551 
backslash_x_char<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,552 fn backslash_x_char<I>(chars: &mut I) -> bool
553 where
554     I: Iterator<Item = (usize, char)>,
555 {
556     next_ch!(chars @ '0'..='7');
557     next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
558     true
559 }
560 
backslash_x_byte<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, u8)>,561 fn backslash_x_byte<I>(chars: &mut I) -> bool
562 where
563     I: Iterator<Item = (usize, u8)>,
564 {
565     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
566     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
567     true
568 }
569 
backslash_u<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,570 fn backslash_u<I>(chars: &mut I) -> bool
571 where
572     I: Iterator<Item = (usize, char)>,
573 {
574     next_ch!(chars @ '{');
575     let mut value = 0;
576     let mut len = 0;
577     for (_, ch) in chars {
578         let digit = match ch {
579             '0'..='9' => ch as u8 - b'0',
580             'a'..='f' => 10 + ch as u8 - b'a',
581             'A'..='F' => 10 + ch as u8 - b'A',
582             '_' if len > 0 => continue,
583             '}' if len > 0 => return char::from_u32(value).is_some(),
584             _ => return false,
585         };
586         if len == 6 {
587             return false;
588         }
589         value *= 0x10;
590         value += u32::from(digit);
591         len += 1;
592     }
593     false
594 }
595 
float(input: Cursor) -> Result<Cursor, Reject>596 fn float(input: Cursor) -> Result<Cursor, Reject> {
597     let mut rest = float_digits(input)?;
598     if let Some(ch) = rest.chars().next() {
599         if is_ident_start(ch) {
600             rest = ident_not_raw(rest)?.0;
601         }
602     }
603     word_break(rest)
604 }
605 
float_digits(input: Cursor) -> Result<Cursor, Reject>606 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
607     let mut chars = input.chars().peekable();
608     match chars.next() {
609         Some(ch) if ch >= '0' && ch <= '9' => {}
610         _ => return Err(Reject),
611     }
612 
613     let mut len = 1;
614     let mut has_dot = false;
615     let mut has_exp = false;
616     while let Some(&ch) = chars.peek() {
617         match ch {
618             '0'..='9' | '_' => {
619                 chars.next();
620                 len += 1;
621             }
622             '.' => {
623                 if has_dot {
624                     break;
625                 }
626                 chars.next();
627                 if chars
628                     .peek()
629                     .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
630                 {
631                     return Err(Reject);
632                 }
633                 len += 1;
634                 has_dot = true;
635             }
636             'e' | 'E' => {
637                 chars.next();
638                 len += 1;
639                 has_exp = true;
640                 break;
641             }
642             _ => break,
643         }
644     }
645 
646     if !(has_dot || has_exp) {
647         return Err(Reject);
648     }
649 
650     if has_exp {
651         let token_before_exp = if has_dot {
652             Ok(input.advance(len - 1))
653         } else {
654             Err(Reject)
655         };
656         let mut has_sign = false;
657         let mut has_exp_value = false;
658         while let Some(&ch) = chars.peek() {
659             match ch {
660                 '+' | '-' => {
661                     if has_exp_value {
662                         break;
663                     }
664                     if has_sign {
665                         return token_before_exp;
666                     }
667                     chars.next();
668                     len += 1;
669                     has_sign = true;
670                 }
671                 '0'..='9' => {
672                     chars.next();
673                     len += 1;
674                     has_exp_value = true;
675                 }
676                 '_' => {
677                     chars.next();
678                     len += 1;
679                 }
680                 _ => break,
681             }
682         }
683         if !has_exp_value {
684             return token_before_exp;
685         }
686     }
687 
688     Ok(input.advance(len))
689 }
690 
int(input: Cursor) -> Result<Cursor, Reject>691 fn int(input: Cursor) -> Result<Cursor, Reject> {
692     let mut rest = digits(input)?;
693     if let Some(ch) = rest.chars().next() {
694         if is_ident_start(ch) {
695             rest = ident_not_raw(rest)?.0;
696         }
697     }
698     word_break(rest)
699 }
700 
digits(mut input: Cursor) -> Result<Cursor, Reject>701 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
702     let base = if input.starts_with("0x") {
703         input = input.advance(2);
704         16
705     } else if input.starts_with("0o") {
706         input = input.advance(2);
707         8
708     } else if input.starts_with("0b") {
709         input = input.advance(2);
710         2
711     } else {
712         10
713     };
714 
715     let mut len = 0;
716     let mut empty = true;
717     for b in input.bytes() {
718         match b {
719             b'0'..=b'9' => {
720                 let digit = (b - b'0') as u64;
721                 if digit >= base {
722                     return Err(Reject);
723                 }
724             }
725             b'a'..=b'f' => {
726                 let digit = 10 + (b - b'a') as u64;
727                 if digit >= base {
728                     break;
729                 }
730             }
731             b'A'..=b'F' => {
732                 let digit = 10 + (b - b'A') as u64;
733                 if digit >= base {
734                     break;
735                 }
736             }
737             b'_' => {
738                 if empty && base == 10 {
739                     return Err(Reject);
740                 }
741                 len += 1;
742                 continue;
743             }
744             _ => break,
745         };
746         len += 1;
747         empty = false;
748     }
749     if empty {
750         Err(Reject)
751     } else {
752         Ok(input.advance(len))
753     }
754 }
755 
punct(input: Cursor) -> PResult<Punct>756 fn punct(input: Cursor) -> PResult<Punct> {
757     let (rest, ch) = punct_char(input)?;
758     if ch == '\'' {
759         if ident_any(rest)?.0.starts_with("'") {
760             Err(Reject)
761         } else {
762             Ok((rest, Punct::new('\'', Spacing::Joint)))
763         }
764     } else {
765         let kind = match punct_char(rest) {
766             Ok(_) => Spacing::Joint,
767             Err(Reject) => Spacing::Alone,
768         };
769         Ok((rest, Punct::new(ch, kind)))
770     }
771 }
772 
punct_char(input: Cursor) -> PResult<char>773 fn punct_char(input: Cursor) -> PResult<char> {
774     if input.starts_with("//") || input.starts_with("/*") {
775         // Do not accept `/` of a comment as a punct.
776         return Err(Reject);
777     }
778 
779     let mut chars = input.chars();
780     let first = match chars.next() {
781         Some(ch) => ch,
782         None => {
783             return Err(Reject);
784         }
785     };
786     let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
787     if recognized.contains(first) {
788         Ok((input.advance(first.len_utf8()), first))
789     } else {
790         Err(Reject)
791     }
792 }
793 
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>794 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
795     #[cfg(span_locations)]
796     let lo = input.off;
797     let (rest, (comment, inner)) = doc_comment_contents(input)?;
798     let span = crate::Span::_new_fallback(Span {
799         #[cfg(span_locations)]
800         lo,
801         #[cfg(span_locations)]
802         hi: rest.off,
803     });
804 
805     let mut scan_for_bare_cr = comment;
806     while let Some(cr) = scan_for_bare_cr.find('\r') {
807         let rest = &scan_for_bare_cr[cr + 1..];
808         if !rest.starts_with('\n') {
809             return Err(Reject);
810         }
811         scan_for_bare_cr = rest;
812     }
813 
814     let mut pound = Punct::new('#', Spacing::Alone);
815     pound.set_span(span);
816     trees.push_token_from_parser(TokenTree::Punct(pound));
817 
818     if inner {
819         let mut bang = Punct::new('!', Spacing::Alone);
820         bang.set_span(span);
821         trees.push_token_from_parser(TokenTree::Punct(bang));
822     }
823 
824     let doc_ident = crate::Ident::new("doc", span);
825     let mut equal = Punct::new('=', Spacing::Alone);
826     equal.set_span(span);
827     let mut literal = crate::Literal::string(comment);
828     literal.set_span(span);
829     let mut bracketed = TokenStreamBuilder::with_capacity(3);
830     bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
831     bracketed.push_token_from_parser(TokenTree::Punct(equal));
832     bracketed.push_token_from_parser(TokenTree::Literal(literal));
833     let group = Group::new(Delimiter::Bracket, bracketed.build());
834     let mut group = crate::Group::_new_fallback(group);
835     group.set_span(span);
836     trees.push_token_from_parser(TokenTree::Group(group));
837 
838     Ok((rest, ()))
839 }
840 
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>841 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
842     if input.starts_with("//!") {
843         let input = input.advance(3);
844         let (input, s) = take_until_newline_or_eof(input);
845         Ok((input, (s, true)))
846     } else if input.starts_with("/*!") {
847         let (input, s) = block_comment(input)?;
848         Ok((input, (&s[3..s.len() - 2], true)))
849     } else if input.starts_with("///") {
850         let input = input.advance(3);
851         if input.starts_with("/") {
852             return Err(Reject);
853         }
854         let (input, s) = take_until_newline_or_eof(input);
855         Ok((input, (s, false)))
856     } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
857         let (input, s) = block_comment(input)?;
858         Ok((input, (&s[3..s.len() - 2], false)))
859     } else {
860         Err(Reject)
861     }
862 }
863 
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)864 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
865     let chars = input.char_indices();
866 
867     for (i, ch) in chars {
868         if ch == '\n' {
869             return (input.advance(i), &input.rest[..i]);
870         } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
871             return (input.advance(i + 1), &input.rest[..i]);
872         }
873     }
874 
875     (input.advance(input.len()), input.rest)
876 }
877