1 use crate::fallback::{
2 is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream,
3 TokenStreamBuilder,
4 };
5 use crate::{Delimiter, Punct, Spacing, TokenTree};
6 use core::char;
7 use core::str::{Bytes, CharIndices, Chars};
8
9 #[derive(Copy, Clone, Eq, PartialEq)]
10 pub(crate) struct Cursor<'a> {
11 pub rest: &'a str,
12 #[cfg(span_locations)]
13 pub off: u32,
14 }
15
16 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>17 pub fn advance(&self, bytes: usize) -> Cursor<'a> {
18 let (_front, rest) = self.rest.split_at(bytes);
19 Cursor {
20 rest,
21 #[cfg(span_locations)]
22 off: self.off + _front.chars().count() as u32,
23 }
24 }
25
starts_with(&self, s: &str) -> bool26 pub fn starts_with(&self, s: &str) -> bool {
27 self.rest.starts_with(s)
28 }
29
is_empty(&self) -> bool30 fn is_empty(&self) -> bool {
31 self.rest.is_empty()
32 }
33
len(&self) -> usize34 fn len(&self) -> usize {
35 self.rest.len()
36 }
37
as_bytes(&self) -> &'a [u8]38 fn as_bytes(&self) -> &'a [u8] {
39 self.rest.as_bytes()
40 }
41
bytes(&self) -> Bytes<'a>42 fn bytes(&self) -> Bytes<'a> {
43 self.rest.bytes()
44 }
45
chars(&self) -> Chars<'a>46 fn chars(&self) -> Chars<'a> {
47 self.rest.chars()
48 }
49
char_indices(&self) -> CharIndices<'a>50 fn char_indices(&self) -> CharIndices<'a> {
51 self.rest.char_indices()
52 }
53
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>54 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
55 if self.starts_with(tag) {
56 Ok(self.advance(tag.len()))
57 } else {
58 Err(Reject)
59 }
60 }
61 }
62
63 pub(crate) struct Reject;
64 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
65
skip_whitespace(input: Cursor) -> Cursor66 fn skip_whitespace(input: Cursor) -> Cursor {
67 let mut s = input;
68
69 while !s.is_empty() {
70 let byte = s.as_bytes()[0];
71 if byte == b'/' {
72 if s.starts_with("//")
73 && (!s.starts_with("///") || s.starts_with("////"))
74 && !s.starts_with("//!")
75 {
76 let (cursor, _) = take_until_newline_or_eof(s);
77 s = cursor;
78 continue;
79 } else if s.starts_with("/**/") {
80 s = s.advance(4);
81 continue;
82 } else if s.starts_with("/*")
83 && (!s.starts_with("/**") || s.starts_with("/***"))
84 && !s.starts_with("/*!")
85 {
86 match block_comment(s) {
87 Ok((rest, _)) => {
88 s = rest;
89 continue;
90 }
91 Err(Reject) => return s,
92 }
93 }
94 }
95 match byte {
96 b' ' | 0x09..=0x0d => {
97 s = s.advance(1);
98 continue;
99 }
100 b if b <= 0x7f => {}
101 _ => {
102 let ch = s.chars().next().unwrap();
103 if is_whitespace(ch) {
104 s = s.advance(ch.len_utf8());
105 continue;
106 }
107 }
108 }
109 return s;
110 }
111 s
112 }
113
block_comment(input: Cursor) -> PResult<&str>114 fn block_comment(input: Cursor) -> PResult<&str> {
115 if !input.starts_with("/*") {
116 return Err(Reject);
117 }
118
119 let mut depth = 0usize;
120 let bytes = input.as_bytes();
121 let mut i = 0usize;
122 let upper = bytes.len() - 1;
123
124 while i < upper {
125 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
126 depth += 1;
127 i += 1; // eat '*'
128 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
129 depth -= 1;
130 if depth == 0 {
131 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
132 }
133 i += 1; // eat '/'
134 }
135 i += 1;
136 }
137
138 Err(Reject)
139 }
140
is_whitespace(ch: char) -> bool141 fn is_whitespace(ch: char) -> bool {
142 // Rust treats left-to-right mark and right-to-left mark as whitespace
143 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
144 }
145
word_break(input: Cursor) -> Result<Cursor, Reject>146 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
147 match input.chars().next() {
148 Some(ch) if is_ident_continue(ch) => Err(Reject),
149 Some(_) | None => Ok(input),
150 }
151 }
152
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>153 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
154 let mut trees = TokenStreamBuilder::new();
155 let mut stack = Vec::new();
156
157 loop {
158 input = skip_whitespace(input);
159
160 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
161 input = rest;
162 continue;
163 }
164
165 #[cfg(span_locations)]
166 let lo = input.off;
167
168 let first = match input.bytes().next() {
169 Some(first) => first,
170 None => match stack.last() {
171 None => return Ok(trees.build()),
172 #[cfg(span_locations)]
173 Some((lo, _frame)) => {
174 return Err(LexError {
175 span: Span { lo: *lo, hi: *lo },
176 })
177 }
178 #[cfg(not(span_locations))]
179 Some(_frame) => return Err(LexError { span: Span {} }),
180 },
181 };
182
183 if let Some(open_delimiter) = match first {
184 b'(' => Some(Delimiter::Parenthesis),
185 b'[' => Some(Delimiter::Bracket),
186 b'{' => Some(Delimiter::Brace),
187 _ => None,
188 } {
189 input = input.advance(1);
190 let frame = (open_delimiter, trees);
191 #[cfg(span_locations)]
192 let frame = (lo, frame);
193 stack.push(frame);
194 trees = TokenStreamBuilder::new();
195 } else if let Some(close_delimiter) = match first {
196 b')' => Some(Delimiter::Parenthesis),
197 b']' => Some(Delimiter::Bracket),
198 b'}' => Some(Delimiter::Brace),
199 _ => None,
200 } {
201 let frame = match stack.pop() {
202 Some(frame) => frame,
203 None => return Err(lex_error(input)),
204 };
205 #[cfg(span_locations)]
206 let (lo, frame) = frame;
207 let (open_delimiter, outer) = frame;
208 if open_delimiter != close_delimiter {
209 return Err(lex_error(input));
210 }
211 input = input.advance(1);
212 let mut g = Group::new(open_delimiter, trees.build());
213 g.set_span(Span {
214 #[cfg(span_locations)]
215 lo,
216 #[cfg(span_locations)]
217 hi: input.off,
218 });
219 trees = outer;
220 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
221 } else {
222 let (rest, mut tt) = match leaf_token(input) {
223 Ok((rest, tt)) => (rest, tt),
224 Err(Reject) => return Err(lex_error(input)),
225 };
226 tt.set_span(crate::Span::_new_fallback(Span {
227 #[cfg(span_locations)]
228 lo,
229 #[cfg(span_locations)]
230 hi: rest.off,
231 }));
232 trees.push_token_from_parser(tt);
233 input = rest;
234 }
235 }
236 }
237
lex_error(cursor: Cursor) -> LexError238 fn lex_error(cursor: Cursor) -> LexError {
239 #[cfg(not(span_locations))]
240 let _ = cursor;
241 LexError {
242 span: Span {
243 #[cfg(span_locations)]
244 lo: cursor.off,
245 #[cfg(span_locations)]
246 hi: cursor.off,
247 },
248 }
249 }
250
leaf_token(input: Cursor) -> PResult<TokenTree>251 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
252 if let Ok((input, l)) = literal(input) {
253 // must be parsed before ident
254 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
255 } else if let Ok((input, p)) = punct(input) {
256 Ok((input, TokenTree::Punct(p)))
257 } else if let Ok((input, i)) = ident(input) {
258 Ok((input, TokenTree::Ident(i)))
259 } else {
260 Err(Reject)
261 }
262 }
263
ident(input: Cursor) -> PResult<crate::Ident>264 fn ident(input: Cursor) -> PResult<crate::Ident> {
265 if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"]
266 .iter()
267 .any(|prefix| input.starts_with(prefix))
268 {
269 Err(Reject)
270 } else {
271 ident_any(input)
272 }
273 }
274
ident_any(input: Cursor) -> PResult<crate::Ident>275 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
276 let raw = input.starts_with("r#");
277 let rest = input.advance((raw as usize) << 1);
278
279 let (rest, sym) = ident_not_raw(rest)?;
280
281 if !raw {
282 let ident = crate::Ident::new(sym, crate::Span::call_site());
283 return Ok((rest, ident));
284 }
285
286 match sym {
287 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
288 _ => {}
289 }
290
291 let ident = crate::Ident::_new_raw(sym, crate::Span::call_site());
292 Ok((rest, ident))
293 }
294
ident_not_raw(input: Cursor) -> PResult<&str>295 fn ident_not_raw(input: Cursor) -> PResult<&str> {
296 let mut chars = input.char_indices();
297
298 match chars.next() {
299 Some((_, ch)) if is_ident_start(ch) => {}
300 _ => return Err(Reject),
301 }
302
303 let mut end = input.len();
304 for (i, ch) in chars {
305 if !is_ident_continue(ch) {
306 end = i;
307 break;
308 }
309 }
310
311 Ok((input.advance(end), &input.rest[..end]))
312 }
313
literal(input: Cursor) -> PResult<Literal>314 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
315 let rest = literal_nocapture(input)?;
316 let end = input.len() - rest.len();
317 Ok((rest, Literal::_new(input.rest[..end].to_string())))
318 }
319
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>320 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
321 if let Ok(ok) = string(input) {
322 Ok(ok)
323 } else if let Ok(ok) = byte_string(input) {
324 Ok(ok)
325 } else if let Ok(ok) = byte(input) {
326 Ok(ok)
327 } else if let Ok(ok) = character(input) {
328 Ok(ok)
329 } else if let Ok(ok) = float(input) {
330 Ok(ok)
331 } else if let Ok(ok) = int(input) {
332 Ok(ok)
333 } else {
334 Err(Reject)
335 }
336 }
337
literal_suffix(input: Cursor) -> Cursor338 fn literal_suffix(input: Cursor) -> Cursor {
339 match ident_not_raw(input) {
340 Ok((input, _)) => input,
341 Err(Reject) => input,
342 }
343 }
344
string(input: Cursor) -> Result<Cursor, Reject>345 fn string(input: Cursor) -> Result<Cursor, Reject> {
346 if let Ok(input) = input.parse("\"") {
347 cooked_string(input)
348 } else if let Ok(input) = input.parse("r") {
349 raw_string(input)
350 } else {
351 Err(Reject)
352 }
353 }
354
cooked_string(input: Cursor) -> Result<Cursor, Reject>355 fn cooked_string(input: Cursor) -> Result<Cursor, Reject> {
356 let mut chars = input.char_indices().peekable();
357
358 while let Some((i, ch)) = chars.next() {
359 match ch {
360 '"' => {
361 let input = input.advance(i + 1);
362 return Ok(literal_suffix(input));
363 }
364 '\r' => match chars.next() {
365 Some((_, '\n')) => {}
366 _ => break,
367 },
368 '\\' => match chars.next() {
369 Some((_, 'x')) => {
370 if !backslash_x_char(&mut chars) {
371 break;
372 }
373 }
374 Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
375 | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
376 Some((_, 'u')) => {
377 if !backslash_u(&mut chars) {
378 break;
379 }
380 }
381 Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
382 let mut last = ch;
383 loop {
384 if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
385 return Err(Reject);
386 }
387 match chars.peek() {
388 Some((_, ch)) if ch.is_whitespace() => {
389 last = *ch;
390 chars.next();
391 }
392 _ => break,
393 }
394 }
395 }
396 _ => break,
397 },
398 _ch => {}
399 }
400 }
401 Err(Reject)
402 }
403
byte_string(input: Cursor) -> Result<Cursor, Reject>404 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
405 if let Ok(input) = input.parse("b\"") {
406 cooked_byte_string(input)
407 } else if let Ok(input) = input.parse("br") {
408 raw_string(input)
409 } else {
410 Err(Reject)
411 }
412 }
413
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>414 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
415 let mut bytes = input.bytes().enumerate();
416 while let Some((offset, b)) = bytes.next() {
417 match b {
418 b'"' => {
419 let input = input.advance(offset + 1);
420 return Ok(literal_suffix(input));
421 }
422 b'\r' => match bytes.next() {
423 Some((_, b'\n')) => {}
424 _ => break,
425 },
426 b'\\' => match bytes.next() {
427 Some((_, b'x')) => {
428 if !backslash_x_byte(&mut bytes) {
429 break;
430 }
431 }
432 Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\'))
433 | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {}
434 Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => {
435 let mut last = b as char;
436 let rest = input.advance(newline + 1);
437 let mut chars = rest.char_indices();
438 loop {
439 if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
440 return Err(Reject);
441 }
442 match chars.next() {
443 Some((_, ch)) if ch.is_whitespace() => last = ch,
444 Some((offset, _)) => {
445 input = rest.advance(offset);
446 bytes = input.bytes().enumerate();
447 break;
448 }
449 None => return Err(Reject),
450 }
451 }
452 }
453 _ => break,
454 },
455 b if b < 0x80 => {}
456 _ => break,
457 }
458 }
459 Err(Reject)
460 }
461
raw_string(input: Cursor) -> Result<Cursor, Reject>462 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
463 let mut chars = input.char_indices();
464 let mut n = 0;
465 for (i, ch) in &mut chars {
466 match ch {
467 '"' => {
468 n = i;
469 break;
470 }
471 '#' => {}
472 _ => return Err(Reject),
473 }
474 }
475 if n > 255 {
476 // https://github.com/rust-lang/rust/pull/95251
477 return Err(Reject);
478 }
479 while let Some((i, ch)) = chars.next() {
480 match ch {
481 '"' if input.rest[i + 1..].starts_with(&input.rest[..n]) => {
482 let rest = input.advance(i + 1 + n);
483 return Ok(literal_suffix(rest));
484 }
485 '\r' => match chars.next() {
486 Some((_, '\n')) => {}
487 _ => break,
488 },
489 _ => {}
490 }
491 }
492 Err(Reject)
493 }
494
byte(input: Cursor) -> Result<Cursor, Reject>495 fn byte(input: Cursor) -> Result<Cursor, Reject> {
496 let input = input.parse("b'")?;
497 let mut bytes = input.bytes().enumerate();
498 let ok = match bytes.next().map(|(_, b)| b) {
499 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
500 Some(b'x') => backslash_x_byte(&mut bytes),
501 Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'')
502 | Some(b'"') => true,
503 _ => false,
504 },
505 b => b.is_some(),
506 };
507 if !ok {
508 return Err(Reject);
509 }
510 let (offset, _) = bytes.next().ok_or(Reject)?;
511 if !input.chars().as_str().is_char_boundary(offset) {
512 return Err(Reject);
513 }
514 let input = input.advance(offset).parse("'")?;
515 Ok(literal_suffix(input))
516 }
517
character(input: Cursor) -> Result<Cursor, Reject>518 fn character(input: Cursor) -> Result<Cursor, Reject> {
519 let input = input.parse("'")?;
520 let mut chars = input.char_indices();
521 let ok = match chars.next().map(|(_, ch)| ch) {
522 Some('\\') => match chars.next().map(|(_, ch)| ch) {
523 Some('x') => backslash_x_char(&mut chars),
524 Some('u') => backslash_u(&mut chars),
525 Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
526 true
527 }
528 _ => false,
529 },
530 ch => ch.is_some(),
531 };
532 if !ok {
533 return Err(Reject);
534 }
535 let (idx, _) = chars.next().ok_or(Reject)?;
536 let input = input.advance(idx).parse("'")?;
537 Ok(literal_suffix(input))
538 }
539
540 macro_rules! next_ch {
541 ($chars:ident @ $pat:pat $(| $rest:pat)*) => {
542 match $chars.next() {
543 Some((_, ch)) => match ch {
544 $pat $(| $rest)* => ch,
545 _ => return false,
546 },
547 None => return false,
548 }
549 };
550 }
551
backslash_x_char<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,552 fn backslash_x_char<I>(chars: &mut I) -> bool
553 where
554 I: Iterator<Item = (usize, char)>,
555 {
556 next_ch!(chars @ '0'..='7');
557 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
558 true
559 }
560
backslash_x_byte<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, u8)>,561 fn backslash_x_byte<I>(chars: &mut I) -> bool
562 where
563 I: Iterator<Item = (usize, u8)>,
564 {
565 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
566 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
567 true
568 }
569
backslash_u<I>(chars: &mut I) -> bool where I: Iterator<Item = (usize, char)>,570 fn backslash_u<I>(chars: &mut I) -> bool
571 where
572 I: Iterator<Item = (usize, char)>,
573 {
574 next_ch!(chars @ '{');
575 let mut value = 0;
576 let mut len = 0;
577 for (_, ch) in chars {
578 let digit = match ch {
579 '0'..='9' => ch as u8 - b'0',
580 'a'..='f' => 10 + ch as u8 - b'a',
581 'A'..='F' => 10 + ch as u8 - b'A',
582 '_' if len > 0 => continue,
583 '}' if len > 0 => return char::from_u32(value).is_some(),
584 _ => return false,
585 };
586 if len == 6 {
587 return false;
588 }
589 value *= 0x10;
590 value += u32::from(digit);
591 len += 1;
592 }
593 false
594 }
595
float(input: Cursor) -> Result<Cursor, Reject>596 fn float(input: Cursor) -> Result<Cursor, Reject> {
597 let mut rest = float_digits(input)?;
598 if let Some(ch) = rest.chars().next() {
599 if is_ident_start(ch) {
600 rest = ident_not_raw(rest)?.0;
601 }
602 }
603 word_break(rest)
604 }
605
float_digits(input: Cursor) -> Result<Cursor, Reject>606 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
607 let mut chars = input.chars().peekable();
608 match chars.next() {
609 Some(ch) if ch >= '0' && ch <= '9' => {}
610 _ => return Err(Reject),
611 }
612
613 let mut len = 1;
614 let mut has_dot = false;
615 let mut has_exp = false;
616 while let Some(&ch) = chars.peek() {
617 match ch {
618 '0'..='9' | '_' => {
619 chars.next();
620 len += 1;
621 }
622 '.' => {
623 if has_dot {
624 break;
625 }
626 chars.next();
627 if chars
628 .peek()
629 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
630 {
631 return Err(Reject);
632 }
633 len += 1;
634 has_dot = true;
635 }
636 'e' | 'E' => {
637 chars.next();
638 len += 1;
639 has_exp = true;
640 break;
641 }
642 _ => break,
643 }
644 }
645
646 if !(has_dot || has_exp) {
647 return Err(Reject);
648 }
649
650 if has_exp {
651 let token_before_exp = if has_dot {
652 Ok(input.advance(len - 1))
653 } else {
654 Err(Reject)
655 };
656 let mut has_sign = false;
657 let mut has_exp_value = false;
658 while let Some(&ch) = chars.peek() {
659 match ch {
660 '+' | '-' => {
661 if has_exp_value {
662 break;
663 }
664 if has_sign {
665 return token_before_exp;
666 }
667 chars.next();
668 len += 1;
669 has_sign = true;
670 }
671 '0'..='9' => {
672 chars.next();
673 len += 1;
674 has_exp_value = true;
675 }
676 '_' => {
677 chars.next();
678 len += 1;
679 }
680 _ => break,
681 }
682 }
683 if !has_exp_value {
684 return token_before_exp;
685 }
686 }
687
688 Ok(input.advance(len))
689 }
690
int(input: Cursor) -> Result<Cursor, Reject>691 fn int(input: Cursor) -> Result<Cursor, Reject> {
692 let mut rest = digits(input)?;
693 if let Some(ch) = rest.chars().next() {
694 if is_ident_start(ch) {
695 rest = ident_not_raw(rest)?.0;
696 }
697 }
698 word_break(rest)
699 }
700
digits(mut input: Cursor) -> Result<Cursor, Reject>701 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
702 let base = if input.starts_with("0x") {
703 input = input.advance(2);
704 16
705 } else if input.starts_with("0o") {
706 input = input.advance(2);
707 8
708 } else if input.starts_with("0b") {
709 input = input.advance(2);
710 2
711 } else {
712 10
713 };
714
715 let mut len = 0;
716 let mut empty = true;
717 for b in input.bytes() {
718 match b {
719 b'0'..=b'9' => {
720 let digit = (b - b'0') as u64;
721 if digit >= base {
722 return Err(Reject);
723 }
724 }
725 b'a'..=b'f' => {
726 let digit = 10 + (b - b'a') as u64;
727 if digit >= base {
728 break;
729 }
730 }
731 b'A'..=b'F' => {
732 let digit = 10 + (b - b'A') as u64;
733 if digit >= base {
734 break;
735 }
736 }
737 b'_' => {
738 if empty && base == 10 {
739 return Err(Reject);
740 }
741 len += 1;
742 continue;
743 }
744 _ => break,
745 };
746 len += 1;
747 empty = false;
748 }
749 if empty {
750 Err(Reject)
751 } else {
752 Ok(input.advance(len))
753 }
754 }
755
punct(input: Cursor) -> PResult<Punct>756 fn punct(input: Cursor) -> PResult<Punct> {
757 let (rest, ch) = punct_char(input)?;
758 if ch == '\'' {
759 if ident_any(rest)?.0.starts_with("'") {
760 Err(Reject)
761 } else {
762 Ok((rest, Punct::new('\'', Spacing::Joint)))
763 }
764 } else {
765 let kind = match punct_char(rest) {
766 Ok(_) => Spacing::Joint,
767 Err(Reject) => Spacing::Alone,
768 };
769 Ok((rest, Punct::new(ch, kind)))
770 }
771 }
772
punct_char(input: Cursor) -> PResult<char>773 fn punct_char(input: Cursor) -> PResult<char> {
774 if input.starts_with("//") || input.starts_with("/*") {
775 // Do not accept `/` of a comment as a punct.
776 return Err(Reject);
777 }
778
779 let mut chars = input.chars();
780 let first = match chars.next() {
781 Some(ch) => ch,
782 None => {
783 return Err(Reject);
784 }
785 };
786 let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
787 if recognized.contains(first) {
788 Ok((input.advance(first.len_utf8()), first))
789 } else {
790 Err(Reject)
791 }
792 }
793
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>794 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
795 #[cfg(span_locations)]
796 let lo = input.off;
797 let (rest, (comment, inner)) = doc_comment_contents(input)?;
798 let span = crate::Span::_new_fallback(Span {
799 #[cfg(span_locations)]
800 lo,
801 #[cfg(span_locations)]
802 hi: rest.off,
803 });
804
805 let mut scan_for_bare_cr = comment;
806 while let Some(cr) = scan_for_bare_cr.find('\r') {
807 let rest = &scan_for_bare_cr[cr + 1..];
808 if !rest.starts_with('\n') {
809 return Err(Reject);
810 }
811 scan_for_bare_cr = rest;
812 }
813
814 let mut pound = Punct::new('#', Spacing::Alone);
815 pound.set_span(span);
816 trees.push_token_from_parser(TokenTree::Punct(pound));
817
818 if inner {
819 let mut bang = Punct::new('!', Spacing::Alone);
820 bang.set_span(span);
821 trees.push_token_from_parser(TokenTree::Punct(bang));
822 }
823
824 let doc_ident = crate::Ident::new("doc", span);
825 let mut equal = Punct::new('=', Spacing::Alone);
826 equal.set_span(span);
827 let mut literal = crate::Literal::string(comment);
828 literal.set_span(span);
829 let mut bracketed = TokenStreamBuilder::with_capacity(3);
830 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
831 bracketed.push_token_from_parser(TokenTree::Punct(equal));
832 bracketed.push_token_from_parser(TokenTree::Literal(literal));
833 let group = Group::new(Delimiter::Bracket, bracketed.build());
834 let mut group = crate::Group::_new_fallback(group);
835 group.set_span(span);
836 trees.push_token_from_parser(TokenTree::Group(group));
837
838 Ok((rest, ()))
839 }
840
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>841 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
842 if input.starts_with("//!") {
843 let input = input.advance(3);
844 let (input, s) = take_until_newline_or_eof(input);
845 Ok((input, (s, true)))
846 } else if input.starts_with("/*!") {
847 let (input, s) = block_comment(input)?;
848 Ok((input, (&s[3..s.len() - 2], true)))
849 } else if input.starts_with("///") {
850 let input = input.advance(3);
851 if input.starts_with("/") {
852 return Err(Reject);
853 }
854 let (input, s) = take_until_newline_or_eof(input);
855 Ok((input, (s, false)))
856 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
857 let (input, s) = block_comment(input)?;
858 Ok((input, (&s[3..s.len() - 2], false)))
859 } else {
860 Err(Reject)
861 }
862 }
863
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)864 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
865 let chars = input.char_indices();
866
867 for (i, ch) in chars {
868 if ch == '\n' {
869 return (input.advance(i), &input.rest[..i]);
870 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
871 return (input.advance(i + 1), &input.rest[..i]);
872 }
873 }
874
875 (input.advance(input.len()), input.rest)
876 }
877