• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::fallback::{
2     self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
3     TokenStreamBuilder,
4 };
5 use crate::{Delimiter, Punct, Spacing, TokenTree};
6 use core::char;
7 use core::str::{Bytes, CharIndices, Chars};
8 
9 #[derive(Copy, Clone, Eq, PartialEq)]
10 pub(crate) struct Cursor<'a> {
11     pub(crate) rest: &'a str,
12     #[cfg(span_locations)]
13     pub(crate) off: u32,
14 }
15 
16 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>17     pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
18         let (_front, rest) = self.rest.split_at(bytes);
19         Cursor {
20             rest,
21             #[cfg(span_locations)]
22             off: self.off + _front.chars().count() as u32,
23         }
24     }
25 
starts_with(&self, s: &str) -> bool26     pub(crate) fn starts_with(&self, s: &str) -> bool {
27         self.rest.starts_with(s)
28     }
29 
starts_with_char(&self, ch: char) -> bool30     pub(crate) fn starts_with_char(&self, ch: char) -> bool {
31         self.rest.starts_with(ch)
32     }
33 
starts_with_fn<Pattern>(&self, f: Pattern) -> bool where Pattern: FnMut(char) -> bool,34     pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35     where
36         Pattern: FnMut(char) -> bool,
37     {
38         self.rest.starts_with(f)
39     }
40 
is_empty(&self) -> bool41     pub(crate) fn is_empty(&self) -> bool {
42         self.rest.is_empty()
43     }
44 
len(&self) -> usize45     fn len(&self) -> usize {
46         self.rest.len()
47     }
48 
as_bytes(&self) -> &'a [u8]49     fn as_bytes(&self) -> &'a [u8] {
50         self.rest.as_bytes()
51     }
52 
bytes(&self) -> Bytes<'a>53     fn bytes(&self) -> Bytes<'a> {
54         self.rest.bytes()
55     }
56 
chars(&self) -> Chars<'a>57     fn chars(&self) -> Chars<'a> {
58         self.rest.chars()
59     }
60 
char_indices(&self) -> CharIndices<'a>61     fn char_indices(&self) -> CharIndices<'a> {
62         self.rest.char_indices()
63     }
64 
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>65     fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66         if self.starts_with(tag) {
67             Ok(self.advance(tag.len()))
68         } else {
69             Err(Reject)
70         }
71     }
72 }
73 
74 pub(crate) struct Reject;
75 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76 
skip_whitespace(input: Cursor) -> Cursor77 fn skip_whitespace(input: Cursor) -> Cursor {
78     let mut s = input;
79 
80     while !s.is_empty() {
81         let byte = s.as_bytes()[0];
82         if byte == b'/' {
83             if s.starts_with("//")
84                 && (!s.starts_with("///") || s.starts_with("////"))
85                 && !s.starts_with("//!")
86             {
87                 let (cursor, _) = take_until_newline_or_eof(s);
88                 s = cursor;
89                 continue;
90             } else if s.starts_with("/**/") {
91                 s = s.advance(4);
92                 continue;
93             } else if s.starts_with("/*")
94                 && (!s.starts_with("/**") || s.starts_with("/***"))
95                 && !s.starts_with("/*!")
96             {
97                 match block_comment(s) {
98                     Ok((rest, _)) => {
99                         s = rest;
100                         continue;
101                     }
102                     Err(Reject) => return s,
103                 }
104             }
105         }
106         match byte {
107             b' ' | 0x09..=0x0d => {
108                 s = s.advance(1);
109                 continue;
110             }
111             b if b.is_ascii() => {}
112             _ => {
113                 let ch = s.chars().next().unwrap();
114                 if is_whitespace(ch) {
115                     s = s.advance(ch.len_utf8());
116                     continue;
117                 }
118             }
119         }
120         return s;
121     }
122     s
123 }
124 
block_comment(input: Cursor) -> PResult<&str>125 fn block_comment(input: Cursor) -> PResult<&str> {
126     if !input.starts_with("/*") {
127         return Err(Reject);
128     }
129 
130     let mut depth = 0usize;
131     let bytes = input.as_bytes();
132     let mut i = 0usize;
133     let upper = bytes.len() - 1;
134 
135     while i < upper {
136         if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137             depth += 1;
138             i += 1; // eat '*'
139         } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140             depth -= 1;
141             if depth == 0 {
142                 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143             }
144             i += 1; // eat '/'
145         }
146         i += 1;
147     }
148 
149     Err(Reject)
150 }
151 
is_whitespace(ch: char) -> bool152 fn is_whitespace(ch: char) -> bool {
153     // Rust treats left-to-right mark and right-to-left mark as whitespace
154     ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155 }
156 
word_break(input: Cursor) -> Result<Cursor, Reject>157 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158     match input.chars().next() {
159         Some(ch) if is_ident_continue(ch) => Err(Reject),
160         Some(_) | None => Ok(input),
161     }
162 }
163 
164 // Rustc's representation of a macro expansion error in expression position or
165 // type position.
166 const ERROR: &str = "(/*ERROR*/)";
167 
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>168 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169     let mut trees = TokenStreamBuilder::new();
170     let mut stack = Vec::new();
171 
172     loop {
173         input = skip_whitespace(input);
174 
175         if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176             input = rest;
177             continue;
178         }
179 
180         #[cfg(span_locations)]
181         let lo = input.off;
182 
183         let first = match input.bytes().next() {
184             Some(first) => first,
185             None => match stack.last() {
186                 None => return Ok(trees.build()),
187                 #[cfg(span_locations)]
188                 Some((lo, _frame)) => {
189                     return Err(LexError {
190                         span: Span { lo: *lo, hi: *lo },
191                     })
192                 }
193                 #[cfg(not(span_locations))]
194                 Some(_frame) => return Err(LexError { span: Span {} }),
195             },
196         };
197 
198         if let Some(open_delimiter) = match first {
199             b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200             b'[' => Some(Delimiter::Bracket),
201             b'{' => Some(Delimiter::Brace),
202             _ => None,
203         } {
204             input = input.advance(1);
205             let frame = (open_delimiter, trees);
206             #[cfg(span_locations)]
207             let frame = (lo, frame);
208             stack.push(frame);
209             trees = TokenStreamBuilder::new();
210         } else if let Some(close_delimiter) = match first {
211             b')' => Some(Delimiter::Parenthesis),
212             b']' => Some(Delimiter::Bracket),
213             b'}' => Some(Delimiter::Brace),
214             _ => None,
215         } {
216             let frame = match stack.pop() {
217                 Some(frame) => frame,
218                 None => return Err(lex_error(input)),
219             };
220             #[cfg(span_locations)]
221             let (lo, frame) = frame;
222             let (open_delimiter, outer) = frame;
223             if open_delimiter != close_delimiter {
224                 return Err(lex_error(input));
225             }
226             input = input.advance(1);
227             let mut g = Group::new(open_delimiter, trees.build());
228             g.set_span(Span {
229                 #[cfg(span_locations)]
230                 lo,
231                 #[cfg(span_locations)]
232                 hi: input.off,
233             });
234             trees = outer;
235             trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236         } else {
237             let (rest, mut tt) = match leaf_token(input) {
238                 Ok((rest, tt)) => (rest, tt),
239                 Err(Reject) => return Err(lex_error(input)),
240             };
241             tt.set_span(crate::Span::_new_fallback(Span {
242                 #[cfg(span_locations)]
243                 lo,
244                 #[cfg(span_locations)]
245                 hi: rest.off,
246             }));
247             trees.push_token_from_parser(tt);
248             input = rest;
249         }
250     }
251 }
252 
lex_error(cursor: Cursor) -> LexError253 fn lex_error(cursor: Cursor) -> LexError {
254     #[cfg(not(span_locations))]
255     let _ = cursor;
256     LexError {
257         span: Span {
258             #[cfg(span_locations)]
259             lo: cursor.off,
260             #[cfg(span_locations)]
261             hi: cursor.off,
262         },
263     }
264 }
265 
leaf_token(input: Cursor) -> PResult<TokenTree>266 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267     if let Ok((input, l)) = literal(input) {
268         // must be parsed before ident
269         Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
270     } else if let Ok((input, p)) = punct(input) {
271         Ok((input, TokenTree::Punct(p)))
272     } else if let Ok((input, i)) = ident(input) {
273         Ok((input, TokenTree::Ident(i)))
274     } else if input.starts_with(ERROR) {
275         let rest = input.advance(ERROR.len());
276         let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
277         Ok((rest, TokenTree::Literal(repr)))
278     } else {
279         Err(Reject)
280     }
281 }
282 
ident(input: Cursor) -> PResult<crate::Ident>283 fn ident(input: Cursor) -> PResult<crate::Ident> {
284     if [
285         "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
286     ]
287     .iter()
288     .any(|prefix| input.starts_with(prefix))
289     {
290         Err(Reject)
291     } else {
292         ident_any(input)
293     }
294 }
295 
ident_any(input: Cursor) -> PResult<crate::Ident>296 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
297     let raw = input.starts_with("r#");
298     let rest = input.advance((raw as usize) << 1);
299 
300     let (rest, sym) = ident_not_raw(rest)?;
301 
302     if !raw {
303         let ident =
304             crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
305         return Ok((rest, ident));
306     }
307 
308     match sym {
309         "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
310         _ => {}
311     }
312 
313     let ident =
314         crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
315     Ok((rest, ident))
316 }
317 
ident_not_raw(input: Cursor) -> PResult<&str>318 fn ident_not_raw(input: Cursor) -> PResult<&str> {
319     let mut chars = input.char_indices();
320 
321     match chars.next() {
322         Some((_, ch)) if is_ident_start(ch) => {}
323         _ => return Err(Reject),
324     }
325 
326     let mut end = input.len();
327     for (i, ch) in chars {
328         if !is_ident_continue(ch) {
329             end = i;
330             break;
331         }
332     }
333 
334     Ok((input.advance(end), &input.rest[..end]))
335 }
336 
literal(input: Cursor) -> PResult<Literal>337 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
338     let rest = literal_nocapture(input)?;
339     let end = input.len() - rest.len();
340     Ok((rest, Literal::_new(input.rest[..end].to_string())))
341 }
342 
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>343 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
344     if let Ok(ok) = string(input) {
345         Ok(ok)
346     } else if let Ok(ok) = byte_string(input) {
347         Ok(ok)
348     } else if let Ok(ok) = c_string(input) {
349         Ok(ok)
350     } else if let Ok(ok) = byte(input) {
351         Ok(ok)
352     } else if let Ok(ok) = character(input) {
353         Ok(ok)
354     } else if let Ok(ok) = float(input) {
355         Ok(ok)
356     } else if let Ok(ok) = int(input) {
357         Ok(ok)
358     } else {
359         Err(Reject)
360     }
361 }
362 
literal_suffix(input: Cursor) -> Cursor363 fn literal_suffix(input: Cursor) -> Cursor {
364     match ident_not_raw(input) {
365         Ok((input, _)) => input,
366         Err(Reject) => input,
367     }
368 }
369 
string(input: Cursor) -> Result<Cursor, Reject>370 fn string(input: Cursor) -> Result<Cursor, Reject> {
371     if let Ok(input) = input.parse("\"") {
372         cooked_string(input)
373     } else if let Ok(input) = input.parse("r") {
374         raw_string(input)
375     } else {
376         Err(Reject)
377     }
378 }
379 
cooked_string(mut input: Cursor) -> Result<Cursor, Reject>380 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
381     let mut chars = input.char_indices();
382 
383     while let Some((i, ch)) = chars.next() {
384         match ch {
385             '"' => {
386                 let input = input.advance(i + 1);
387                 return Ok(literal_suffix(input));
388             }
389             '\r' => match chars.next() {
390                 Some((_, '\n')) => {}
391                 _ => break,
392             },
393             '\\' => match chars.next() {
394                 Some((_, 'x')) => {
395                     backslash_x_char(&mut chars)?;
396                 }
397                 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
398                 Some((_, 'u')) => {
399                     backslash_u(&mut chars)?;
400                 }
401                 Some((newline, ch @ ('\n' | '\r'))) => {
402                     input = input.advance(newline + 1);
403                     trailing_backslash(&mut input, ch as u8)?;
404                     chars = input.char_indices();
405                 }
406                 _ => break,
407             },
408             _ch => {}
409         }
410     }
411     Err(Reject)
412 }
413 
raw_string(input: Cursor) -> Result<Cursor, Reject>414 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
415     let (input, delimiter) = delimiter_of_raw_string(input)?;
416     let mut bytes = input.bytes().enumerate();
417     while let Some((i, byte)) = bytes.next() {
418         match byte {
419             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
420                 let rest = input.advance(i + 1 + delimiter.len());
421                 return Ok(literal_suffix(rest));
422             }
423             b'\r' => match bytes.next() {
424                 Some((_, b'\n')) => {}
425                 _ => break,
426             },
427             _ => {}
428         }
429     }
430     Err(Reject)
431 }
432 
byte_string(input: Cursor) -> Result<Cursor, Reject>433 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
434     if let Ok(input) = input.parse("b\"") {
435         cooked_byte_string(input)
436     } else if let Ok(input) = input.parse("br") {
437         raw_byte_string(input)
438     } else {
439         Err(Reject)
440     }
441 }
442 
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>443 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
444     let mut bytes = input.bytes().enumerate();
445     while let Some((offset, b)) = bytes.next() {
446         match b {
447             b'"' => {
448                 let input = input.advance(offset + 1);
449                 return Ok(literal_suffix(input));
450             }
451             b'\r' => match bytes.next() {
452                 Some((_, b'\n')) => {}
453                 _ => break,
454             },
455             b'\\' => match bytes.next() {
456                 Some((_, b'x')) => {
457                     backslash_x_byte(&mut bytes)?;
458                 }
459                 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
460                 Some((newline, b @ (b'\n' | b'\r'))) => {
461                     input = input.advance(newline + 1);
462                     trailing_backslash(&mut input, b)?;
463                     bytes = input.bytes().enumerate();
464                 }
465                 _ => break,
466             },
467             b if b.is_ascii() => {}
468             _ => break,
469         }
470     }
471     Err(Reject)
472 }
473 
delimiter_of_raw_string(input: Cursor) -> PResult<&str>474 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
475     for (i, byte) in input.bytes().enumerate() {
476         match byte {
477             b'"' => {
478                 if i > 255 {
479                     // https://github.com/rust-lang/rust/pull/95251
480                     return Err(Reject);
481                 }
482                 return Ok((input.advance(i + 1), &input.rest[..i]));
483             }
484             b'#' => {}
485             _ => break,
486         }
487     }
488     Err(Reject)
489 }
490 
raw_byte_string(input: Cursor) -> Result<Cursor, Reject>491 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
492     let (input, delimiter) = delimiter_of_raw_string(input)?;
493     let mut bytes = input.bytes().enumerate();
494     while let Some((i, byte)) = bytes.next() {
495         match byte {
496             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
497                 let rest = input.advance(i + 1 + delimiter.len());
498                 return Ok(literal_suffix(rest));
499             }
500             b'\r' => match bytes.next() {
501                 Some((_, b'\n')) => {}
502                 _ => break,
503             },
504             other => {
505                 if !other.is_ascii() {
506                     break;
507                 }
508             }
509         }
510     }
511     Err(Reject)
512 }
513 
c_string(input: Cursor) -> Result<Cursor, Reject>514 fn c_string(input: Cursor) -> Result<Cursor, Reject> {
515     if let Ok(input) = input.parse("c\"") {
516         cooked_c_string(input)
517     } else if let Ok(input) = input.parse("cr") {
518         raw_c_string(input)
519     } else {
520         Err(Reject)
521     }
522 }
523 
raw_c_string(input: Cursor) -> Result<Cursor, Reject>524 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
525     let (input, delimiter) = delimiter_of_raw_string(input)?;
526     let mut bytes = input.bytes().enumerate();
527     while let Some((i, byte)) = bytes.next() {
528         match byte {
529             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
530                 let rest = input.advance(i + 1 + delimiter.len());
531                 return Ok(literal_suffix(rest));
532             }
533             b'\r' => match bytes.next() {
534                 Some((_, b'\n')) => {}
535                 _ => break,
536             },
537             b'\0' => break,
538             _ => {}
539         }
540     }
541     Err(Reject)
542 }
543 
cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject>544 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
545     let mut chars = input.char_indices();
546 
547     while let Some((i, ch)) = chars.next() {
548         match ch {
549             '"' => {
550                 let input = input.advance(i + 1);
551                 return Ok(literal_suffix(input));
552             }
553             '\r' => match chars.next() {
554                 Some((_, '\n')) => {}
555                 _ => break,
556             },
557             '\\' => match chars.next() {
558                 Some((_, 'x')) => {
559                     backslash_x_nonzero(&mut chars)?;
560                 }
561                 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
562                 Some((_, 'u')) => {
563                     if backslash_u(&mut chars)? == '\0' {
564                         break;
565                     }
566                 }
567                 Some((newline, ch @ ('\n' | '\r'))) => {
568                     input = input.advance(newline + 1);
569                     trailing_backslash(&mut input, ch as u8)?;
570                     chars = input.char_indices();
571                 }
572                 _ => break,
573             },
574             '\0' => break,
575             _ch => {}
576         }
577     }
578     Err(Reject)
579 }
580 
byte(input: Cursor) -> Result<Cursor, Reject>581 fn byte(input: Cursor) -> Result<Cursor, Reject> {
582     let input = input.parse("b'")?;
583     let mut bytes = input.bytes().enumerate();
584     let ok = match bytes.next().map(|(_, b)| b) {
585         Some(b'\\') => match bytes.next().map(|(_, b)| b) {
586             Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
587             Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
588             _ => false,
589         },
590         b => b.is_some(),
591     };
592     if !ok {
593         return Err(Reject);
594     }
595     let (offset, _) = bytes.next().ok_or(Reject)?;
596     if !input.chars().as_str().is_char_boundary(offset) {
597         return Err(Reject);
598     }
599     let input = input.advance(offset).parse("'")?;
600     Ok(literal_suffix(input))
601 }
602 
character(input: Cursor) -> Result<Cursor, Reject>603 fn character(input: Cursor) -> Result<Cursor, Reject> {
604     let input = input.parse("'")?;
605     let mut chars = input.char_indices();
606     let ok = match chars.next().map(|(_, ch)| ch) {
607         Some('\\') => match chars.next().map(|(_, ch)| ch) {
608             Some('x') => backslash_x_char(&mut chars).is_ok(),
609             Some('u') => backslash_u(&mut chars).is_ok(),
610             Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
611             _ => false,
612         },
613         ch => ch.is_some(),
614     };
615     if !ok {
616         return Err(Reject);
617     }
618     let (idx, _) = chars.next().ok_or(Reject)?;
619     let input = input.advance(idx).parse("'")?;
620     Ok(literal_suffix(input))
621 }
622 
623 macro_rules! next_ch {
624     ($chars:ident @ $pat:pat) => {
625         match $chars.next() {
626             Some((_, ch)) => match ch {
627                 $pat => ch,
628                 _ => return Err(Reject),
629             },
630             None => return Err(Reject),
631         }
632     };
633 }
634 
backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,635 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
636 where
637     I: Iterator<Item = (usize, char)>,
638 {
639     next_ch!(chars @ '0'..='7');
640     next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
641     Ok(())
642 }
643 
backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, u8)>,644 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
645 where
646     I: Iterator<Item = (usize, u8)>,
647 {
648     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
649     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
650     Ok(())
651 }
652 
backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,653 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
654 where
655     I: Iterator<Item = (usize, char)>,
656 {
657     let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
658     let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
659     if first == '0' && second == '0' {
660         Err(Reject)
661     } else {
662         Ok(())
663     }
664 }
665 
backslash_u<I>(chars: &mut I) -> Result<char, Reject> where I: Iterator<Item = (usize, char)>,666 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
667 where
668     I: Iterator<Item = (usize, char)>,
669 {
670     next_ch!(chars @ '{');
671     let mut value = 0;
672     let mut len = 0;
673     for (_, ch) in chars {
674         let digit = match ch {
675             '0'..='9' => ch as u8 - b'0',
676             'a'..='f' => 10 + ch as u8 - b'a',
677             'A'..='F' => 10 + ch as u8 - b'A',
678             '_' if len > 0 => continue,
679             '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
680             _ => break,
681         };
682         if len == 6 {
683             break;
684         }
685         value *= 0x10;
686         value += u32::from(digit);
687         len += 1;
688     }
689     Err(Reject)
690 }
691 
trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject>692 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
693     let mut whitespace = input.bytes().enumerate();
694     loop {
695         if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
696             return Err(Reject);
697         }
698         match whitespace.next() {
699             Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
700                 last = b;
701             }
702             Some((offset, _)) => {
703                 *input = input.advance(offset);
704                 return Ok(());
705             }
706             None => return Err(Reject),
707         }
708     }
709 }
710 
float(input: Cursor) -> Result<Cursor, Reject>711 fn float(input: Cursor) -> Result<Cursor, Reject> {
712     let mut rest = float_digits(input)?;
713     if let Some(ch) = rest.chars().next() {
714         if is_ident_start(ch) {
715             rest = ident_not_raw(rest)?.0;
716         }
717     }
718     word_break(rest)
719 }
720 
float_digits(input: Cursor) -> Result<Cursor, Reject>721 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
722     let mut chars = input.chars().peekable();
723     match chars.next() {
724         Some(ch) if '0' <= ch && ch <= '9' => {}
725         _ => return Err(Reject),
726     }
727 
728     let mut len = 1;
729     let mut has_dot = false;
730     let mut has_exp = false;
731     while let Some(&ch) = chars.peek() {
732         match ch {
733             '0'..='9' | '_' => {
734                 chars.next();
735                 len += 1;
736             }
737             '.' => {
738                 if has_dot {
739                     break;
740                 }
741                 chars.next();
742                 if chars
743                     .peek()
744                     .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
745                 {
746                     return Err(Reject);
747                 }
748                 len += 1;
749                 has_dot = true;
750             }
751             'e' | 'E' => {
752                 chars.next();
753                 len += 1;
754                 has_exp = true;
755                 break;
756             }
757             _ => break,
758         }
759     }
760 
761     if !(has_dot || has_exp) {
762         return Err(Reject);
763     }
764 
765     if has_exp {
766         let token_before_exp = if has_dot {
767             Ok(input.advance(len - 1))
768         } else {
769             Err(Reject)
770         };
771         let mut has_sign = false;
772         let mut has_exp_value = false;
773         while let Some(&ch) = chars.peek() {
774             match ch {
775                 '+' | '-' => {
776                     if has_exp_value {
777                         break;
778                     }
779                     if has_sign {
780                         return token_before_exp;
781                     }
782                     chars.next();
783                     len += 1;
784                     has_sign = true;
785                 }
786                 '0'..='9' => {
787                     chars.next();
788                     len += 1;
789                     has_exp_value = true;
790                 }
791                 '_' => {
792                     chars.next();
793                     len += 1;
794                 }
795                 _ => break,
796             }
797         }
798         if !has_exp_value {
799             return token_before_exp;
800         }
801     }
802 
803     Ok(input.advance(len))
804 }
805 
int(input: Cursor) -> Result<Cursor, Reject>806 fn int(input: Cursor) -> Result<Cursor, Reject> {
807     let mut rest = digits(input)?;
808     if let Some(ch) = rest.chars().next() {
809         if is_ident_start(ch) {
810             rest = ident_not_raw(rest)?.0;
811         }
812     }
813     word_break(rest)
814 }
815 
digits(mut input: Cursor) -> Result<Cursor, Reject>816 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
817     let base = if input.starts_with("0x") {
818         input = input.advance(2);
819         16
820     } else if input.starts_with("0o") {
821         input = input.advance(2);
822         8
823     } else if input.starts_with("0b") {
824         input = input.advance(2);
825         2
826     } else {
827         10
828     };
829 
830     let mut len = 0;
831     let mut empty = true;
832     for b in input.bytes() {
833         match b {
834             b'0'..=b'9' => {
835                 let digit = (b - b'0') as u64;
836                 if digit >= base {
837                     return Err(Reject);
838                 }
839             }
840             b'a'..=b'f' => {
841                 let digit = 10 + (b - b'a') as u64;
842                 if digit >= base {
843                     break;
844                 }
845             }
846             b'A'..=b'F' => {
847                 let digit = 10 + (b - b'A') as u64;
848                 if digit >= base {
849                     break;
850                 }
851             }
852             b'_' => {
853                 if empty && base == 10 {
854                     return Err(Reject);
855                 }
856                 len += 1;
857                 continue;
858             }
859             _ => break,
860         }
861         len += 1;
862         empty = false;
863     }
864     if empty {
865         Err(Reject)
866     } else {
867         Ok(input.advance(len))
868     }
869 }
870 
punct(input: Cursor) -> PResult<Punct>871 fn punct(input: Cursor) -> PResult<Punct> {
872     let (rest, ch) = punct_char(input)?;
873     if ch == '\'' {
874         if ident_any(rest)?.0.starts_with_char('\'') {
875             Err(Reject)
876         } else {
877             Ok((rest, Punct::new('\'', Spacing::Joint)))
878         }
879     } else {
880         let kind = match punct_char(rest) {
881             Ok(_) => Spacing::Joint,
882             Err(Reject) => Spacing::Alone,
883         };
884         Ok((rest, Punct::new(ch, kind)))
885     }
886 }
887 
punct_char(input: Cursor) -> PResult<char>888 fn punct_char(input: Cursor) -> PResult<char> {
889     if input.starts_with("//") || input.starts_with("/*") {
890         // Do not accept `/` of a comment as a punct.
891         return Err(Reject);
892     }
893 
894     let mut chars = input.chars();
895     let first = match chars.next() {
896         Some(ch) => ch,
897         None => {
898             return Err(Reject);
899         }
900     };
901     let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
902     if recognized.contains(first) {
903         Ok((input.advance(first.len_utf8()), first))
904     } else {
905         Err(Reject)
906     }
907 }
908 
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>909 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
910     #[cfg(span_locations)]
911     let lo = input.off;
912     let (rest, (comment, inner)) = doc_comment_contents(input)?;
913     let fallback_span = Span {
914         #[cfg(span_locations)]
915         lo,
916         #[cfg(span_locations)]
917         hi: rest.off,
918     };
919     let span = crate::Span::_new_fallback(fallback_span);
920 
921     let mut scan_for_bare_cr = comment;
922     while let Some(cr) = scan_for_bare_cr.find('\r') {
923         let rest = &scan_for_bare_cr[cr + 1..];
924         if !rest.starts_with('\n') {
925             return Err(Reject);
926         }
927         scan_for_bare_cr = rest;
928     }
929 
930     let mut pound = Punct::new('#', Spacing::Alone);
931     pound.set_span(span);
932     trees.push_token_from_parser(TokenTree::Punct(pound));
933 
934     if inner {
935         let mut bang = Punct::new('!', Spacing::Alone);
936         bang.set_span(span);
937         trees.push_token_from_parser(TokenTree::Punct(bang));
938     }
939 
940     let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
941     let mut equal = Punct::new('=', Spacing::Alone);
942     equal.set_span(span);
943     let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
944     literal.set_span(span);
945     let mut bracketed = TokenStreamBuilder::with_capacity(3);
946     bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
947     bracketed.push_token_from_parser(TokenTree::Punct(equal));
948     bracketed.push_token_from_parser(TokenTree::Literal(literal));
949     let group = Group::new(Delimiter::Bracket, bracketed.build());
950     let mut group = crate::Group::_new_fallback(group);
951     group.set_span(span);
952     trees.push_token_from_parser(TokenTree::Group(group));
953 
954     Ok((rest, ()))
955 }
956 
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>957 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
958     if input.starts_with("//!") {
959         let input = input.advance(3);
960         let (input, s) = take_until_newline_or_eof(input);
961         Ok((input, (s, true)))
962     } else if input.starts_with("/*!") {
963         let (input, s) = block_comment(input)?;
964         Ok((input, (&s[3..s.len() - 2], true)))
965     } else if input.starts_with("///") {
966         let input = input.advance(3);
967         if input.starts_with_char('/') {
968             return Err(Reject);
969         }
970         let (input, s) = take_until_newline_or_eof(input);
971         Ok((input, (s, false)))
972     } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
973         let (input, s) = block_comment(input)?;
974         Ok((input, (&s[3..s.len() - 2], false)))
975     } else {
976         Err(Reject)
977     }
978 }
979 
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)980 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
981     let chars = input.char_indices();
982 
983     for (i, ch) in chars {
984         if ch == '\n' {
985             return (input.advance(i), &input.rest[..i]);
986         } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
987             return (input.advance(i + 1), &input.rest[..i]);
988         }
989     }
990 
991     (input.advance(input.len()), input.rest)
992 }
993