1 use crate::fallback::{
2 self, is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3 TokenStreamBuilder,
4 };
5 use crate::{Delimiter, Punct, Spacing, TokenTree};
6 use core::char;
7 use core::str::{Bytes, CharIndices, Chars};
8
9 #[derive(Copy, Clone, Eq, PartialEq)]
10 pub(crate) struct Cursor<'a> {
11 pub rest: &'a str,
12 #[cfg(span_locations)]
13 pub off: u32,
14 }
15
16 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>17 pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18 let (_front, rest) = self.rest.split_at(bytes);
19 Cursor {
20 rest,
21 #[cfg(span_locations)]
22 off: self.off + _front.chars().count() as u32,
23 }
24 }
25
starts_with(&self, s: &str) -> bool26 pub fn starts_with(&self, s: &str) -> bool {
27 self.rest.starts_with(s)
28 }
29
starts_with_char(&self, ch: char) -> bool30 pub fn starts_with_char(&self, ch: char) -> bool {
31 self.rest.starts_with(ch)
32 }
33
starts_with_fn<Pattern>(&self, f: Pattern) -> bool where Pattern: FnMut(char) -> bool,34 pub fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35 where
36 Pattern: FnMut(char) -> bool,
37 {
38 self.rest.starts_with(f)
39 }
40
is_empty(&self) -> bool41 pub fn is_empty(&self) -> bool {
42 self.rest.is_empty()
43 }
44
len(&self) -> usize45 fn len(&self) -> usize {
46 self.rest.len()
47 }
48
as_bytes(&self) -> &'a [u8]49 fn as_bytes(&self) -> &'a [u8] {
50 self.rest.as_bytes()
51 }
52
bytes(&self) -> Bytes<'a>53 fn bytes(&self) -> Bytes<'a> {
54 self.rest.bytes()
55 }
56
chars(&self) -> Chars<'a>57 fn chars(&self) -> Chars<'a> {
58 self.rest.chars()
59 }
60
char_indices(&self) -> CharIndices<'a>61 fn char_indices(&self) -> CharIndices<'a> {
62 self.rest.char_indices()
63 }
64
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>65 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66 if self.starts_with(tag) {
67 Ok(self.advance(tag.len()))
68 } else {
69 Err(Reject)
70 }
71 }
72 }
73
74 pub(crate) struct Reject;
75 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
skip_whitespace(input: Cursor) -> Cursor77 fn skip_whitespace(input: Cursor) -> Cursor {
78 let mut s = input;
79
80 while !s.is_empty() {
81 let byte = s.as_bytes()[0];
82 if byte == b'/' {
83 if s.starts_with("//")
84 && (!s.starts_with("///") || s.starts_with("////"))
85 && !s.starts_with("//!")
86 {
87 let (cursor, _) = take_until_newline_or_eof(s);
88 s = cursor;
89 continue;
90 } else if s.starts_with("/**/") {
91 s = s.advance(4);
92 continue;
93 } else if s.starts_with("/*")
94 && (!s.starts_with("/**") || s.starts_with("/***"))
95 && !s.starts_with("/*!")
96 {
97 match block_comment(s) {
98 Ok((rest, _)) => {
99 s = rest;
100 continue;
101 }
102 Err(Reject) => return s,
103 }
104 }
105 }
106 match byte {
107 b' ' | 0x09..=0x0d => {
108 s = s.advance(1);
109 continue;
110 }
111 b if b.is_ascii() => {}
112 _ => {
113 let ch = s.chars().next().unwrap();
114 if is_whitespace(ch) {
115 s = s.advance(ch.len_utf8());
116 continue;
117 }
118 }
119 }
120 return s;
121 }
122 s
123 }
124
block_comment(input: Cursor) -> PResult<&str>125 fn block_comment(input: Cursor) -> PResult<&str> {
126 if !input.starts_with("/*") {
127 return Err(Reject);
128 }
129
130 let mut depth = 0usize;
131 let bytes = input.as_bytes();
132 let mut i = 0usize;
133 let upper = bytes.len() - 1;
134
135 while i < upper {
136 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137 depth += 1;
138 i += 1; // eat '*'
139 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140 depth -= 1;
141 if depth == 0 {
142 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143 }
144 i += 1; // eat '/'
145 }
146 i += 1;
147 }
148
149 Err(Reject)
150 }
151
is_whitespace(ch: char) -> bool152 fn is_whitespace(ch: char) -> bool {
153 // Rust treats left-to-right mark and right-to-left mark as whitespace
154 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155 }
156
word_break(input: Cursor) -> Result<Cursor, Reject>157 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158 match input.chars().next() {
159 Some(ch) if is_ident_continue(ch) => Err(Reject),
160 Some(_) | None => Ok(input),
161 }
162 }
163
164 // Rustc's representation of a macro expansion error in expression position or
165 // type position.
166 const ERROR: &str = "(/*ERROR*/)";
167
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>168 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169 let mut trees = TokenStreamBuilder::new();
170 let mut stack = Vec::new();
171
172 loop {
173 input = skip_whitespace(input);
174
175 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176 input = rest;
177 continue;
178 }
179
180 #[cfg(span_locations)]
181 let lo = input.off;
182
183 let first = match input.bytes().next() {
184 Some(first) => first,
185 None => match stack.last() {
186 None => return Ok(trees.build()),
187 #[cfg(span_locations)]
188 Some((lo, _frame)) => {
189 return Err(LexError {
190 span: Span { lo: *lo, hi: *lo },
191 })
192 }
193 #[cfg(not(span_locations))]
194 Some(_frame) => return Err(LexError { span: Span {} }),
195 },
196 };
197
198 if let Some(open_delimiter) = match first {
199 b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200 b'[' => Some(Delimiter::Bracket),
201 b'{' => Some(Delimiter::Brace),
202 _ => None,
203 } {
204 input = input.advance(1);
205 let frame = (open_delimiter, trees);
206 #[cfg(span_locations)]
207 let frame = (lo, frame);
208 stack.push(frame);
209 trees = TokenStreamBuilder::new();
210 } else if let Some(close_delimiter) = match first {
211 b')' => Some(Delimiter::Parenthesis),
212 b']' => Some(Delimiter::Bracket),
213 b'}' => Some(Delimiter::Brace),
214 _ => None,
215 } {
216 let frame = match stack.pop() {
217 Some(frame) => frame,
218 None => return Err(lex_error(input)),
219 };
220 #[cfg(span_locations)]
221 let (lo, frame) = frame;
222 let (open_delimiter, outer) = frame;
223 if open_delimiter != close_delimiter {
224 return Err(lex_error(input));
225 }
226 input = input.advance(1);
227 let mut g = Group::new(open_delimiter, trees.build());
228 g.set_span(Span {
229 #[cfg(span_locations)]
230 lo,
231 #[cfg(span_locations)]
232 hi: input.off,
233 });
234 trees = outer;
235 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236 } else {
237 let (rest, mut tt) = match leaf_token(input) {
238 Ok((rest, tt)) => (rest, tt),
239 Err(Reject) => return Err(lex_error(input)),
240 };
241 tt.set_span(crate::Span::_new_fallback(Span {
242 #[cfg(span_locations)]
243 lo,
244 #[cfg(span_locations)]
245 hi: rest.off,
246 }));
247 trees.push_token_from_parser(tt);
248 input = rest;
249 }
250 }
251 }
252
lex_error(cursor: Cursor) -> LexError253 fn lex_error(cursor: Cursor) -> LexError {
254 #[cfg(not(span_locations))]
255 let _ = cursor;
256 LexError {
257 span: Span {
258 #[cfg(span_locations)]
259 lo: cursor.off,
260 #[cfg(span_locations)]
261 hi: cursor.off,
262 },
263 }
264 }
265
leaf_token(input: Cursor) -> PResult<TokenTree>266 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267 if let Ok((input, l)) = literal(input) {
268 // must be parsed before ident
269 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
270 } else if let Ok((input, p)) = punct(input) {
271 Ok((input, TokenTree::Punct(p)))
272 } else if let Ok((input, i)) = ident(input) {
273 Ok((input, TokenTree::Ident(i)))
274 } else if input.starts_with(ERROR) {
275 let rest = input.advance(ERROR.len());
276 let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
277 Ok((rest, TokenTree::Literal(repr)))
278 } else {
279 Err(Reject)
280 }
281 }
282
ident(input: Cursor) -> PResult<crate::Ident>283 fn ident(input: Cursor) -> PResult<crate::Ident> {
284 if [
285 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
286 ]
287 .iter()
288 .any(|prefix| input.starts_with(prefix))
289 {
290 Err(Reject)
291 } else {
292 ident_any(input)
293 }
294 }
295
ident_any(input: Cursor) -> PResult<crate::Ident>296 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
297 let raw = input.starts_with("r#");
298 let rest = input.advance((raw as usize) << 1);
299
300 let (rest, sym) = ident_not_raw(rest)?;
301
302 if !raw {
303 let ident = crate::Ident::_new(crate::imp::Ident::new_unchecked(
304 sym,
305 fallback::Span::call_site(),
306 ));
307 return Ok((rest, ident));
308 }
309
310 match sym {
311 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312 _ => {}
313 }
314
315 let ident = crate::Ident::_new(crate::imp::Ident::new_raw_unchecked(
316 sym,
317 fallback::Span::call_site(),
318 ));
319 Ok((rest, ident))
320 }
321
ident_not_raw(input: Cursor) -> PResult<&str>322 fn ident_not_raw(input: Cursor) -> PResult<&str> {
323 let mut chars = input.char_indices();
324
325 match chars.next() {
326 Some((_, ch)) if is_ident_start(ch) => {}
327 _ => return Err(Reject),
328 }
329
330 let mut end = input.len();
331 for (i, ch) in chars {
332 if !is_ident_continue(ch) {
333 end = i;
334 break;
335 }
336 }
337
338 Ok((input.advance(end), &input.rest[..end]))
339 }
340
literal(input: Cursor) -> PResult<Literal>341 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
342 let rest = literal_nocapture(input)?;
343 let end = input.len() - rest.len();
344 Ok((rest, Literal::_new(input.rest[..end].to_string())))
345 }
346
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>347 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
348 if let Ok(ok) = string(input) {
349 Ok(ok)
350 } else if let Ok(ok) = byte_string(input) {
351 Ok(ok)
352 } else if let Ok(ok) = c_string(input) {
353 Ok(ok)
354 } else if let Ok(ok) = byte(input) {
355 Ok(ok)
356 } else if let Ok(ok) = character(input) {
357 Ok(ok)
358 } else if let Ok(ok) = float(input) {
359 Ok(ok)
360 } else if let Ok(ok) = int(input) {
361 Ok(ok)
362 } else {
363 Err(Reject)
364 }
365 }
366
literal_suffix(input: Cursor) -> Cursor367 fn literal_suffix(input: Cursor) -> Cursor {
368 match ident_not_raw(input) {
369 Ok((input, _)) => input,
370 Err(Reject) => input,
371 }
372 }
373
string(input: Cursor) -> Result<Cursor, Reject>374 fn string(input: Cursor) -> Result<Cursor, Reject> {
375 if let Ok(input) = input.parse("\"") {
376 cooked_string(input)
377 } else if let Ok(input) = input.parse("r") {
378 raw_string(input)
379 } else {
380 Err(Reject)
381 }
382 }
383
cooked_string(mut input: Cursor) -> Result<Cursor, Reject>384 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
385 let mut chars = input.char_indices();
386
387 while let Some((i, ch)) = chars.next() {
388 match ch {
389 '"' => {
390 let input = input.advance(i + 1);
391 return Ok(literal_suffix(input));
392 }
393 '\r' => match chars.next() {
394 Some((_, '\n')) => {}
395 _ => break,
396 },
397 '\\' => match chars.next() {
398 Some((_, 'x')) => {
399 backslash_x_char(&mut chars)?;
400 }
401 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
402 Some((_, 'u')) => {
403 backslash_u(&mut chars)?;
404 }
405 Some((newline, ch @ ('\n' | '\r'))) => {
406 input = input.advance(newline + 1);
407 trailing_backslash(&mut input, ch as u8)?;
408 chars = input.char_indices();
409 }
410 _ => break,
411 },
412 _ch => {}
413 }
414 }
415 Err(Reject)
416 }
417
raw_string(input: Cursor) -> Result<Cursor, Reject>418 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
419 let (input, delimiter) = delimiter_of_raw_string(input)?;
420 let mut bytes = input.bytes().enumerate();
421 while let Some((i, byte)) = bytes.next() {
422 match byte {
423 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
424 let rest = input.advance(i + 1 + delimiter.len());
425 return Ok(literal_suffix(rest));
426 }
427 b'\r' => match bytes.next() {
428 Some((_, b'\n')) => {}
429 _ => break,
430 },
431 _ => {}
432 }
433 }
434 Err(Reject)
435 }
436
byte_string(input: Cursor) -> Result<Cursor, Reject>437 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
438 if let Ok(input) = input.parse("b\"") {
439 cooked_byte_string(input)
440 } else if let Ok(input) = input.parse("br") {
441 raw_byte_string(input)
442 } else {
443 Err(Reject)
444 }
445 }
446
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>447 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
448 let mut bytes = input.bytes().enumerate();
449 while let Some((offset, b)) = bytes.next() {
450 match b {
451 b'"' => {
452 let input = input.advance(offset + 1);
453 return Ok(literal_suffix(input));
454 }
455 b'\r' => match bytes.next() {
456 Some((_, b'\n')) => {}
457 _ => break,
458 },
459 b'\\' => match bytes.next() {
460 Some((_, b'x')) => {
461 backslash_x_byte(&mut bytes)?;
462 }
463 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
464 Some((newline, b @ (b'\n' | b'\r'))) => {
465 input = input.advance(newline + 1);
466 trailing_backslash(&mut input, b)?;
467 bytes = input.bytes().enumerate();
468 }
469 _ => break,
470 },
471 b if b.is_ascii() => {}
472 _ => break,
473 }
474 }
475 Err(Reject)
476 }
477
delimiter_of_raw_string(input: Cursor) -> PResult<&str>478 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
479 for (i, byte) in input.bytes().enumerate() {
480 match byte {
481 b'"' => {
482 if i > 255 {
483 // https://github.com/rust-lang/rust/pull/95251
484 return Err(Reject);
485 }
486 return Ok((input.advance(i + 1), &input.rest[..i]));
487 }
488 b'#' => {}
489 _ => break,
490 }
491 }
492 Err(Reject)
493 }
494
raw_byte_string(input: Cursor) -> Result<Cursor, Reject>495 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
496 let (input, delimiter) = delimiter_of_raw_string(input)?;
497 let mut bytes = input.bytes().enumerate();
498 while let Some((i, byte)) = bytes.next() {
499 match byte {
500 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
501 let rest = input.advance(i + 1 + delimiter.len());
502 return Ok(literal_suffix(rest));
503 }
504 b'\r' => match bytes.next() {
505 Some((_, b'\n')) => {}
506 _ => break,
507 },
508 other => {
509 if !other.is_ascii() {
510 break;
511 }
512 }
513 }
514 }
515 Err(Reject)
516 }
517
c_string(input: Cursor) -> Result<Cursor, Reject>518 fn c_string(input: Cursor) -> Result<Cursor, Reject> {
519 if let Ok(input) = input.parse("c\"") {
520 cooked_c_string(input)
521 } else if let Ok(input) = input.parse("cr") {
522 raw_c_string(input)
523 } else {
524 Err(Reject)
525 }
526 }
527
raw_c_string(input: Cursor) -> Result<Cursor, Reject>528 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
529 let (input, delimiter) = delimiter_of_raw_string(input)?;
530 let mut bytes = input.bytes().enumerate();
531 while let Some((i, byte)) = bytes.next() {
532 match byte {
533 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
534 let rest = input.advance(i + 1 + delimiter.len());
535 return Ok(literal_suffix(rest));
536 }
537 b'\r' => match bytes.next() {
538 Some((_, b'\n')) => {}
539 _ => break,
540 },
541 b'\0' => break,
542 _ => {}
543 }
544 }
545 Err(Reject)
546 }
547
cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject>548 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
549 let mut chars = input.char_indices();
550
551 while let Some((i, ch)) = chars.next() {
552 match ch {
553 '"' => {
554 let input = input.advance(i + 1);
555 return Ok(literal_suffix(input));
556 }
557 '\r' => match chars.next() {
558 Some((_, '\n')) => {}
559 _ => break,
560 },
561 '\\' => match chars.next() {
562 Some((_, 'x')) => {
563 backslash_x_nonzero(&mut chars)?;
564 }
565 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
566 Some((_, 'u')) => {
567 if backslash_u(&mut chars)? == '\0' {
568 break;
569 }
570 }
571 Some((newline, ch @ ('\n' | '\r'))) => {
572 input = input.advance(newline + 1);
573 trailing_backslash(&mut input, ch as u8)?;
574 chars = input.char_indices();
575 }
576 _ => break,
577 },
578 '\0' => break,
579 _ch => {}
580 }
581 }
582 Err(Reject)
583 }
584
byte(input: Cursor) -> Result<Cursor, Reject>585 fn byte(input: Cursor) -> Result<Cursor, Reject> {
586 let input = input.parse("b'")?;
587 let mut bytes = input.bytes().enumerate();
588 let ok = match bytes.next().map(|(_, b)| b) {
589 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
590 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
591 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
592 _ => false,
593 },
594 b => b.is_some(),
595 };
596 if !ok {
597 return Err(Reject);
598 }
599 let (offset, _) = bytes.next().ok_or(Reject)?;
600 if !input.chars().as_str().is_char_boundary(offset) {
601 return Err(Reject);
602 }
603 let input = input.advance(offset).parse("'")?;
604 Ok(literal_suffix(input))
605 }
606
character(input: Cursor) -> Result<Cursor, Reject>607 fn character(input: Cursor) -> Result<Cursor, Reject> {
608 let input = input.parse("'")?;
609 let mut chars = input.char_indices();
610 let ok = match chars.next().map(|(_, ch)| ch) {
611 Some('\\') => match chars.next().map(|(_, ch)| ch) {
612 Some('x') => backslash_x_char(&mut chars).is_ok(),
613 Some('u') => backslash_u(&mut chars).is_ok(),
614 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
615 _ => false,
616 },
617 ch => ch.is_some(),
618 };
619 if !ok {
620 return Err(Reject);
621 }
622 let (idx, _) = chars.next().ok_or(Reject)?;
623 let input = input.advance(idx).parse("'")?;
624 Ok(literal_suffix(input))
625 }
626
627 macro_rules! next_ch {
628 ($chars:ident @ $pat:pat) => {
629 match $chars.next() {
630 Some((_, ch)) => match ch {
631 $pat => ch,
632 _ => return Err(Reject),
633 },
634 None => return Err(Reject),
635 }
636 };
637 }
638
backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,639 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
640 where
641 I: Iterator<Item = (usize, char)>,
642 {
643 next_ch!(chars @ '0'..='7');
644 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
645 Ok(())
646 }
647
backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, u8)>,648 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
649 where
650 I: Iterator<Item = (usize, u8)>,
651 {
652 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
653 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
654 Ok(())
655 }
656
backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,657 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
658 where
659 I: Iterator<Item = (usize, char)>,
660 {
661 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
662 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
663 if first == '0' && second == '0' {
664 Err(Reject)
665 } else {
666 Ok(())
667 }
668 }
669
backslash_u<I>(chars: &mut I) -> Result<char, Reject> where I: Iterator<Item = (usize, char)>,670 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
671 where
672 I: Iterator<Item = (usize, char)>,
673 {
674 next_ch!(chars @ '{');
675 let mut value = 0;
676 let mut len = 0;
677 for (_, ch) in chars {
678 let digit = match ch {
679 '0'..='9' => ch as u8 - b'0',
680 'a'..='f' => 10 + ch as u8 - b'a',
681 'A'..='F' => 10 + ch as u8 - b'A',
682 '_' if len > 0 => continue,
683 '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
684 _ => break,
685 };
686 if len == 6 {
687 break;
688 }
689 value *= 0x10;
690 value += u32::from(digit);
691 len += 1;
692 }
693 Err(Reject)
694 }
695
trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject>696 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
697 let mut whitespace = input.bytes().enumerate();
698 loop {
699 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
700 return Err(Reject);
701 }
702 match whitespace.next() {
703 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
704 last = b;
705 }
706 Some((offset, _)) => {
707 *input = input.advance(offset);
708 return Ok(());
709 }
710 None => return Err(Reject),
711 }
712 }
713 }
714
float(input: Cursor) -> Result<Cursor, Reject>715 fn float(input: Cursor) -> Result<Cursor, Reject> {
716 let mut rest = float_digits(input)?;
717 if let Some(ch) = rest.chars().next() {
718 if is_ident_start(ch) {
719 rest = ident_not_raw(rest)?.0;
720 }
721 }
722 word_break(rest)
723 }
724
float_digits(input: Cursor) -> Result<Cursor, Reject>725 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
726 let mut chars = input.chars().peekable();
727 match chars.next() {
728 Some(ch) if '0' <= ch && ch <= '9' => {}
729 _ => return Err(Reject),
730 }
731
732 let mut len = 1;
733 let mut has_dot = false;
734 let mut has_exp = false;
735 while let Some(&ch) = chars.peek() {
736 match ch {
737 '0'..='9' | '_' => {
738 chars.next();
739 len += 1;
740 }
741 '.' => {
742 if has_dot {
743 break;
744 }
745 chars.next();
746 if chars
747 .peek()
748 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
749 {
750 return Err(Reject);
751 }
752 len += 1;
753 has_dot = true;
754 }
755 'e' | 'E' => {
756 chars.next();
757 len += 1;
758 has_exp = true;
759 break;
760 }
761 _ => break,
762 }
763 }
764
765 if !(has_dot || has_exp) {
766 return Err(Reject);
767 }
768
769 if has_exp {
770 let token_before_exp = if has_dot {
771 Ok(input.advance(len - 1))
772 } else {
773 Err(Reject)
774 };
775 let mut has_sign = false;
776 let mut has_exp_value = false;
777 while let Some(&ch) = chars.peek() {
778 match ch {
779 '+' | '-' => {
780 if has_exp_value {
781 break;
782 }
783 if has_sign {
784 return token_before_exp;
785 }
786 chars.next();
787 len += 1;
788 has_sign = true;
789 }
790 '0'..='9' => {
791 chars.next();
792 len += 1;
793 has_exp_value = true;
794 }
795 '_' => {
796 chars.next();
797 len += 1;
798 }
799 _ => break,
800 }
801 }
802 if !has_exp_value {
803 return token_before_exp;
804 }
805 }
806
807 Ok(input.advance(len))
808 }
809
int(input: Cursor) -> Result<Cursor, Reject>810 fn int(input: Cursor) -> Result<Cursor, Reject> {
811 let mut rest = digits(input)?;
812 if let Some(ch) = rest.chars().next() {
813 if is_ident_start(ch) {
814 rest = ident_not_raw(rest)?.0;
815 }
816 }
817 word_break(rest)
818 }
819
digits(mut input: Cursor) -> Result<Cursor, Reject>820 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
821 let base = if input.starts_with("0x") {
822 input = input.advance(2);
823 16
824 } else if input.starts_with("0o") {
825 input = input.advance(2);
826 8
827 } else if input.starts_with("0b") {
828 input = input.advance(2);
829 2
830 } else {
831 10
832 };
833
834 let mut len = 0;
835 let mut empty = true;
836 for b in input.bytes() {
837 match b {
838 b'0'..=b'9' => {
839 let digit = (b - b'0') as u64;
840 if digit >= base {
841 return Err(Reject);
842 }
843 }
844 b'a'..=b'f' => {
845 let digit = 10 + (b - b'a') as u64;
846 if digit >= base {
847 break;
848 }
849 }
850 b'A'..=b'F' => {
851 let digit = 10 + (b - b'A') as u64;
852 if digit >= base {
853 break;
854 }
855 }
856 b'_' => {
857 if empty && base == 10 {
858 return Err(Reject);
859 }
860 len += 1;
861 continue;
862 }
863 _ => break,
864 };
865 len += 1;
866 empty = false;
867 }
868 if empty {
869 Err(Reject)
870 } else {
871 Ok(input.advance(len))
872 }
873 }
874
punct(input: Cursor) -> PResult<Punct>875 fn punct(input: Cursor) -> PResult<Punct> {
876 let (rest, ch) = punct_char(input)?;
877 if ch == '\'' {
878 if ident_any(rest)?.0.starts_with_char('\'') {
879 Err(Reject)
880 } else {
881 Ok((rest, Punct::new('\'', Spacing::Joint)))
882 }
883 } else {
884 let kind = match punct_char(rest) {
885 Ok(_) => Spacing::Joint,
886 Err(Reject) => Spacing::Alone,
887 };
888 Ok((rest, Punct::new(ch, kind)))
889 }
890 }
891
punct_char(input: Cursor) -> PResult<char>892 fn punct_char(input: Cursor) -> PResult<char> {
893 if input.starts_with("//") || input.starts_with("/*") {
894 // Do not accept `/` of a comment as a punct.
895 return Err(Reject);
896 }
897
898 let mut chars = input.chars();
899 let first = match chars.next() {
900 Some(ch) => ch,
901 None => {
902 return Err(Reject);
903 }
904 };
905 let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
906 if recognized.contains(first) {
907 Ok((input.advance(first.len_utf8()), first))
908 } else {
909 Err(Reject)
910 }
911 }
912
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>913 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
914 #[cfg(span_locations)]
915 let lo = input.off;
916 let (rest, (comment, inner)) = doc_comment_contents(input)?;
917 let fallback_span = Span {
918 #[cfg(span_locations)]
919 lo,
920 #[cfg(span_locations)]
921 hi: rest.off,
922 };
923 let span = crate::Span::_new_fallback(fallback_span);
924
925 let mut scan_for_bare_cr = comment;
926 while let Some(cr) = scan_for_bare_cr.find('\r') {
927 let rest = &scan_for_bare_cr[cr + 1..];
928 if !rest.starts_with('\n') {
929 return Err(Reject);
930 }
931 scan_for_bare_cr = rest;
932 }
933
934 let mut pound = Punct::new('#', Spacing::Alone);
935 pound.set_span(span);
936 trees.push_token_from_parser(TokenTree::Punct(pound));
937
938 if inner {
939 let mut bang = Punct::new('!', Spacing::Alone);
940 bang.set_span(span);
941 trees.push_token_from_parser(TokenTree::Punct(bang));
942 }
943
944 let doc_ident = crate::Ident::_new(crate::imp::Ident::new_unchecked("doc", fallback_span));
945 let mut equal = Punct::new('=', Spacing::Alone);
946 equal.set_span(span);
947 let mut literal = crate::Literal::string(comment);
948 literal.set_span(span);
949 let mut bracketed = TokenStreamBuilder::with_capacity(3);
950 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
951 bracketed.push_token_from_parser(TokenTree::Punct(equal));
952 bracketed.push_token_from_parser(TokenTree::Literal(literal));
953 let group = Group::new(Delimiter::Bracket, bracketed.build());
954 let mut group = crate::Group::_new_fallback(group);
955 group.set_span(span);
956 trees.push_token_from_parser(TokenTree::Group(group));
957
958 Ok((rest, ()))
959 }
960
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>961 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
962 if input.starts_with("//!") {
963 let input = input.advance(3);
964 let (input, s) = take_until_newline_or_eof(input);
965 Ok((input, (s, true)))
966 } else if input.starts_with("/*!") {
967 let (input, s) = block_comment(input)?;
968 Ok((input, (&s[3..s.len() - 2], true)))
969 } else if input.starts_with("///") {
970 let input = input.advance(3);
971 if input.starts_with_char('/') {
972 return Err(Reject);
973 }
974 let (input, s) = take_until_newline_or_eof(input);
975 Ok((input, (s, false)))
976 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
977 let (input, s) = block_comment(input)?;
978 Ok((input, (&s[3..s.len() - 2], false)))
979 } else {
980 Err(Reject)
981 }
982 }
983
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)984 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
985 let chars = input.char_indices();
986
987 for (i, ch) in chars {
988 if ch == '\n' {
989 return (input.advance(i), &input.rest[..i]);
990 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
991 return (input.advance(i + 1), &input.rest[..i]);
992 }
993 }
994
995 (input.advance(input.len()), input.rest)
996 }
997