• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::{hex_digit_value, check_suffix}};
2 
3 
4 /// Must start with `\`
unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError>5 pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> {
6     let first = input.as_bytes().get(1)
7         .ok_or(perr(offset, UnterminatedEscape))?;
8     let out = match first {
9         // Quote escapes
10         b'\'' => (E::from_byte(b'\''), 2),
11         b'"' => (E::from_byte(b'"'), 2),
12 
13         // Ascii escapes
14         b'n' => (E::from_byte(b'\n'), 2),
15         b'r' => (E::from_byte(b'\r'), 2),
16         b't' => (E::from_byte(b'\t'), 2),
17         b'\\' => (E::from_byte(b'\\'), 2),
18         b'0' => (E::from_byte(b'\0'), 2),
19         b'x' => {
20             let hex_string = input.get(2..4)
21                 .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
22                 .as_bytes();
23             let first = hex_digit_value(hex_string[0])
24                 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
25             let second = hex_digit_value(hex_string[1])
26                 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
27             let value = second + 16 * first;
28 
29             if E::SUPPORTS_UNICODE && value > 0x7F {
30                 return Err(perr(offset..offset + 4, NonAsciiXEscape));
31             }
32 
33             (E::from_byte(value), 4)
34         },
35 
36         // Unicode escape
37         b'u' => {
38             if !E::SUPPORTS_UNICODE {
39                 return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
40             }
41 
42             if input.as_bytes().get(2) != Some(&b'{') {
43                 return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
44             }
45 
46             let closing_pos = input.bytes().position(|b| b == b'}')
47                 .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;
48 
49             let inner = &input[3..closing_pos];
50             if inner.as_bytes().first() == Some(&b'_') {
51                 return Err(perr(4, InvalidStartOfUnicodeEscape));
52             }
53 
54             let mut v: u32 = 0;
55             let mut digit_count = 0;
56             for (i, b) in inner.bytes().enumerate() {
57                 if b == b'_'{
58                     continue;
59                 }
60 
61                 let digit = hex_digit_value(b)
62                     .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;
63 
64                 if digit_count == 6 {
65                     return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
66                 }
67                 digit_count += 1;
68                 v = 16 * v + digit as u32;
69             }
70 
71             let c = std::char::from_u32(v)
72                 .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?;
73 
74             (E::from_char(c), closing_pos + 1)
75         }
76 
77         _ => return Err(perr(offset..offset + 2, UnknownEscape)),
78     };
79 
80     Ok(out)
81 }
82 
83 pub(crate) trait Escapee: Into<char> {
84     const SUPPORTS_UNICODE: bool;
from_byte(b: u8) -> Self85     fn from_byte(b: u8) -> Self;
from_char(c: char) -> Self86     fn from_char(c: char) -> Self;
87 }
88 
89 impl Escapee for u8 {
90     const SUPPORTS_UNICODE: bool = false;
from_byte(b: u8) -> Self91     fn from_byte(b: u8) -> Self {
92         b
93     }
from_char(_: char) -> Self94     fn from_char(_: char) -> Self {
95         panic!("bug: `<u8 as Escapee>::from_char` was called");
96     }
97 }
98 
99 impl Escapee for char {
100     const SUPPORTS_UNICODE: bool = true;
from_byte(b: u8) -> Self101     fn from_byte(b: u8) -> Self {
102         b.into()
103     }
from_char(c: char) -> Self104     fn from_char(c: char) -> Self {
105         c
106     }
107 }
108 
109 /// Checks whether the character is skipped after a string continue start
110 /// (unescaped backlash followed by `\n`).
is_string_continue_skipable_whitespace(b: u8) -> bool111 fn is_string_continue_skipable_whitespace(b: u8) -> bool {
112     b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
113 }
114 
115 /// Unescapes a whole string or byte string.
116 #[inline(never)]
unescape_string<E: Escapee>( input: &str, offset: usize, ) -> Result<(Option<String>, usize), ParseError>117 pub(crate) fn unescape_string<E: Escapee>(
118     input: &str,
119     offset: usize,
120 ) -> Result<(Option<String>, usize), ParseError> {
121     let mut closing_quote_pos = None;
122     let mut i = offset;
123     let mut end_last_escape = offset;
124     let mut value = String::new();
125     while i < input.len() {
126         match input.as_bytes()[i] {
127             // Handle "string continue".
128             b'\\' if input.as_bytes().get(i + 1) == Some(&b'\n') => {
129                 value.push_str(&input[end_last_escape..i]);
130 
131                 // Find the first non-whitespace character.
132                 let end_escape = input[i + 2..].bytes()
133                     .position(|b| !is_string_continue_skipable_whitespace(b))
134                     .ok_or(perr(None, UnterminatedString))?;
135 
136                 i += 2 + end_escape;
137                 end_last_escape = i;
138             }
139             b'\\' => {
140                 let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?;
141                 value.push_str(&input[end_last_escape..i]);
142                 value.push(c.into());
143                 i += len;
144                 end_last_escape = i;
145             }
146             b'\r' => {
147                 if input.as_bytes().get(i + 1) == Some(&b'\n') {
148                     value.push_str(&input[end_last_escape..i]);
149                     value.push('\n');
150                     i += 2;
151                     end_last_escape = i;
152                 } else {
153                     return Err(perr(i, IsolatedCr))
154                 }
155             }
156             b'"' => {
157                 closing_quote_pos = Some(i);
158                 break;
159             },
160             b if !E::SUPPORTS_UNICODE && !b.is_ascii()
161                 => return Err(perr(i, NonAsciiInByteLiteral)),
162             _ => i += 1,
163         }
164     }
165 
166     let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedString))?;
167 
168     let start_suffix = closing_quote_pos + 1;
169     let suffix = &input[start_suffix..];
170     check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
171 
172     // `value` is only empty if there was no escape in the input string
173     // (with the special case of the input being empty). This means the
174     // string value basically equals the input, so we store `None`.
175     let value = if value.is_empty() {
176         None
177     } else {
178         // There was an escape in the string, so we need to push the
179         // remaining unescaped part of the string still.
180         value.push_str(&input[end_last_escape..closing_quote_pos]);
181         Some(value)
182     };
183 
184     Ok((value, start_suffix))
185 }
186 
187 /// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
188 /// just `\n` sequences. Returns an optional new string (if the input contained
189 /// any `\r\n`) and the number of hashes used by the literal.
190 #[inline(never)]
scan_raw_string<E: Escapee>( input: &str, offset: usize, ) -> Result<(Option<String>, u32, usize), ParseError>191 pub(crate) fn scan_raw_string<E: Escapee>(
192     input: &str,
193     offset: usize,
194 ) -> Result<(Option<String>, u32, usize), ParseError> {
195     // Raw string literal
196     let num_hashes = input[offset..].bytes().position(|b| b != b'#')
197         .ok_or(perr(None, InvalidLiteral))?;
198 
199     if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
200         return Err(perr(None, InvalidLiteral));
201     }
202     let start_inner = offset + num_hashes + 1;
203     let hashes = &input[offset..num_hashes + offset];
204 
205     let mut closing_quote_pos = None;
206     let mut i = start_inner;
207     let mut end_last_escape = start_inner;
208     let mut value = String::new();
209     while i < input.len() {
210         let b = input.as_bytes()[i];
211         if b == b'"' && input[i + 1..].starts_with(hashes) {
212             closing_quote_pos = Some(i);
213             break;
214         }
215 
216         if b == b'\r' {
217             // Convert `\r\n` into `\n`. This is currently not well documented
218             // in the Rust reference, but is done even for raw strings. That's
219             // because rustc simply converts all line endings when reading
220             // source files.
221             if input.as_bytes().get(i + 1) == Some(&b'\n') {
222                 value.push_str(&input[end_last_escape..i]);
223                 value.push('\n');
224                 i += 2;
225                 end_last_escape = i;
226                 continue;
227             } else if E::SUPPORTS_UNICODE {
228                 // If no \n follows the \r and we are scanning a raw string
229                 // (not raw byte string), we error.
230                 return Err(perr(i, IsolatedCr))
231             }
232         }
233 
234         if !E::SUPPORTS_UNICODE {
235             if !b.is_ascii() {
236                 return Err(perr(i, NonAsciiInByteLiteral));
237             }
238         }
239 
240         i += 1;
241     }
242 
243     let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedRawString))?;
244 
245     let start_suffix = closing_quote_pos + num_hashes + 1;
246     let suffix = &input[start_suffix..];
247     check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
248 
249     // `value` is only empty if there was no \r\n in the input string (with the
250     // special case of the input being empty). This means the string value
251     // equals the input, so we store `None`.
252     let value = if value.is_empty() {
253         None
254     } else {
255         // There was an \r\n in the string, so we need to push the remaining
256         // unescaped part of the string still.
257         value.push_str(&input[end_last_escape..closing_quote_pos]);
258         Some(value)
259     };
260 
261     Ok((value, num_hashes as u32, start_suffix))
262 }
263