• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::ops::RangeInclusive;
2 
3 use winnow::combinator::alt;
4 use winnow::combinator::empty;
5 use winnow::combinator::eof;
6 use winnow::combinator::fail;
7 use winnow::combinator::opt;
8 use winnow::combinator::peek;
9 use winnow::combinator::repeat;
10 use winnow::combinator::terminated;
11 use winnow::prelude::*;
12 use winnow::stream::Stream as _;
13 use winnow::token::any;
14 use winnow::token::one_of;
15 use winnow::token::take_while;
16 
17 use crate::parser::prelude::*;
18 
from_utf8_unchecked<'b>( bytes: &'b [u8], safety_justification: &'static str, ) -> &'b str19 pub(crate) unsafe fn from_utf8_unchecked<'b>(
20     bytes: &'b [u8],
21     safety_justification: &'static str,
22 ) -> &'b str {
23     unsafe {
24         if cfg!(debug_assertions) {
25             // Catch problems more quickly when testing
26             std::str::from_utf8(bytes).expect(safety_justification)
27         } else {
28             std::str::from_utf8_unchecked(bytes)
29         }
30     }
31 }
32 
33 // wschar = ( %x20 /              ; Space
34 //            %x09 )              ; Horizontal tab
35 pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
36 
37 // ws = *wschar
ws<'i>(input: &mut Input<'i>) -> PResult<&'i str>38 pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
39     take_while(0.., WSCHAR)
40         .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
41         .parse_next(input)
42 }
43 
44 // non-ascii = %x80-D7FF / %xE000-10FFFF
45 // - ASCII is 0xxxxxxx
46 // - First byte for UTF-8 is 11xxxxxx
47 // - Subsequent UTF-8 bytes are 10xxxxxx
48 pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
49 
50 // non-eol = %x09 / %x20-7E / non-ascii
51 pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
52     (0x09, 0x20..=0x7E, NON_ASCII);
53 
54 // comment-start-symbol = %x23 ; #
55 pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
56 
57 // comment = comment-start-symbol *non-eol
comment(input: &mut Input<'_>) -> PResult<()>58 pub(crate) fn comment(input: &mut Input<'_>) -> PResult<()> {
59     (COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
60         .void()
61         .parse_next(input)
62 }
63 
64 // newline = ( %x0A /              ; LF
65 //             %x0D.0A )           ; CRLF
newline(input: &mut Input<'_>) -> PResult<()>66 pub(crate) fn newline(input: &mut Input<'_>) -> PResult<()> {
67     dispatch! {any;
68         b'\n' => empty,
69         b'\r' => one_of(LF).void(),
70         _ => fail,
71     }
72     .parse_next(input)
73 }
74 pub(crate) const LF: u8 = b'\n';
75 pub(crate) const CR: u8 = b'\r';
76 
77 // ws-newline       = *( wschar / newline )
ws_newline(input: &mut Input<'_>) -> PResult<()>78 pub(crate) fn ws_newline(input: &mut Input<'_>) -> PResult<()> {
79     repeat(
80         0..,
81         alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
82     )
83     .map(|()| ())
84     .parse_next(input)
85 }
86 
87 // ws-newlines      = newline *( wschar / newline )
ws_newlines(input: &mut Input<'_>) -> PResult<()>88 pub(crate) fn ws_newlines(input: &mut Input<'_>) -> PResult<()> {
89     (newline, ws_newline).void().parse_next(input)
90 }
91 
92 // note: this rule is not present in the original grammar
93 // ws-comment-newline = *( ws-newline-nonempty / comment )
ws_comment_newline(input: &mut Input<'_>) -> PResult<()>94 pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> PResult<()> {
95     let mut start = input.checkpoint();
96     loop {
97         let _ = ws.parse_next(input)?;
98 
99         let next_token = opt(peek(any)).parse_next(input)?;
100         match next_token {
101             Some(b'#') => (comment, newline).void().parse_next(input)?,
102             Some(b'\n') => (newline).void().parse_next(input)?,
103             Some(b'\r') => (newline).void().parse_next(input)?,
104             _ => break,
105         }
106 
107         let end = input.checkpoint();
108         if start == end {
109             break;
110         }
111         start = end;
112     }
113 
114     Ok(())
115 }
116 
117 // note: this rule is not present in the original grammar
118 // line-ending = newline / eof
line_ending(input: &mut Input<'_>) -> PResult<()>119 pub(crate) fn line_ending(input: &mut Input<'_>) -> PResult<()> {
120     alt((newline.value("\n"), eof.value("")))
121         .void()
122         .parse_next(input)
123 }
124 
125 // note: this rule is not present in the original grammar
126 // line-trailing = ws [comment] skip-line-ending
line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>>127 pub(crate) fn line_trailing(input: &mut Input<'_>) -> PResult<std::ops::Range<usize>> {
128     terminated((ws, opt(comment)).span(), line_ending).parse_next(input)
129 }
130 
131 #[cfg(test)]
132 #[cfg(feature = "parse")]
133 #[cfg(feature = "display")]
134 mod test {
135     use super::*;
136 
137     #[test]
trivia()138     fn trivia() {
139         let inputs = [
140             "",
141             r#" "#,
142             r#"
143 "#,
144             r#"
145 # comment
146 
147 # comment2
148 
149 
150 "#,
151             r#"
152         "#,
153             r#"# comment
154 # comment2
155 
156 
157    "#,
158         ];
159         for input in inputs {
160             dbg!(input);
161             let parsed = ws_comment_newline.take().parse(new_input(input));
162             assert!(parsed.is_ok(), "{:?}", parsed);
163             let parsed = parsed.unwrap();
164             assert_eq!(parsed, input.as_bytes());
165         }
166     }
167 }
168