1 //! Code related to parsing literals.
2
3 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
4 use crate::token::{self, Token};
5 use rustc_lexer::unescape::{
6 byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
7 Mode,
8 };
9 use rustc_span::symbol::{kw, sym, Symbol};
10 use rustc_span::Span;
11 use std::ops::Range;
12 use std::{ascii, fmt, str};
13
14 // Escapes a string, represented as a symbol. Reuses the original symbol,
15 // avoiding interning, if no changes are required.
escape_string_symbol(symbol: Symbol) -> Symbol16 pub fn escape_string_symbol(symbol: Symbol) -> Symbol {
17 let s = symbol.as_str();
18 let escaped = s.escape_default().to_string();
19 if s == escaped { symbol } else { Symbol::intern(&escaped) }
20 }
21
22 // Escapes a char.
escape_char_symbol(ch: char) -> Symbol23 pub fn escape_char_symbol(ch: char) -> Symbol {
24 let s: String = ch.escape_default().map(Into::<char>::into).collect();
25 Symbol::intern(&s)
26 }
27
28 // Escapes a byte string.
escape_byte_str_symbol(bytes: &[u8]) -> Symbol29 pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol {
30 let s = bytes.escape_ascii().to_string();
31 Symbol::intern(&s)
32 }
33
34 #[derive(Debug)]
35 pub enum LitError {
36 LexerError,
37 InvalidSuffix,
38 InvalidIntSuffix,
39 InvalidFloatSuffix,
40 NonDecimalFloat(u32),
41 IntTooLarge(u32),
42 NulInCStr(Range<usize>),
43 }
44
45 impl LitKind {
46 /// Converts literal token into a semantic literal.
from_token_lit(lit: token::Lit) -> Result<LitKind, LitError>47 pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
48 let token::Lit { kind, symbol, suffix } = lit;
49 if suffix.is_some() && !kind.may_have_suffix() {
50 return Err(LitError::InvalidSuffix);
51 }
52
53 Ok(match kind {
54 token::Bool => {
55 assert!(symbol.is_bool_lit());
56 LitKind::Bool(symbol == kw::True)
57 }
58 token::Byte => {
59 return unescape_byte(symbol.as_str())
60 .map(LitKind::Byte)
61 .map_err(|_| LitError::LexerError);
62 }
63 token::Char => {
64 return unescape_char(symbol.as_str())
65 .map(LitKind::Char)
66 .map_err(|_| LitError::LexerError);
67 }
68
69 // There are some valid suffixes for integer and float literals,
70 // so all the handling is done internally.
71 token::Integer => return integer_lit(symbol, suffix),
72 token::Float => return float_lit(symbol, suffix),
73
74 token::Str => {
75 // If there are no characters requiring special treatment we can
76 // reuse the symbol from the token. Otherwise, we must generate a
77 // new symbol because the string in the LitKind is different to the
78 // string in the token.
79 let s = symbol.as_str();
80 let symbol = if s.contains(['\\', '\r']) {
81 let mut buf = String::with_capacity(s.len());
82 let mut error = Ok(());
83 // Force-inlining here is aggressive but the closure is
84 // called on every char in the string, so it can be
85 // hot in programs with many long strings.
86 unescape_literal(
87 s,
88 Mode::Str,
89 &mut #[inline(always)]
90 |_, unescaped_char| match unescaped_char {
91 Ok(c) => buf.push(c),
92 Err(err) => {
93 if err.is_fatal() {
94 error = Err(LitError::LexerError);
95 }
96 }
97 },
98 );
99 error?;
100 Symbol::intern(&buf)
101 } else {
102 symbol
103 };
104 LitKind::Str(symbol, ast::StrStyle::Cooked)
105 }
106 token::StrRaw(n) => {
107 // Ditto.
108 let s = symbol.as_str();
109 let symbol =
110 if s.contains('\r') {
111 let mut buf = String::with_capacity(s.len());
112 let mut error = Ok(());
113 unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
114 match unescaped_char {
115 Ok(c) => buf.push(c),
116 Err(err) => {
117 if err.is_fatal() {
118 error = Err(LitError::LexerError);
119 }
120 }
121 }
122 });
123 error?;
124 Symbol::intern(&buf)
125 } else {
126 symbol
127 };
128 LitKind::Str(symbol, ast::StrStyle::Raw(n))
129 }
130 token::ByteStr => {
131 let s = symbol.as_str();
132 let mut buf = Vec::with_capacity(s.len());
133 let mut error = Ok(());
134 unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
135 Ok(c) => buf.push(byte_from_char(c)),
136 Err(err) => {
137 if err.is_fatal() {
138 error = Err(LitError::LexerError);
139 }
140 }
141 });
142 error?;
143 LitKind::ByteStr(buf.into(), StrStyle::Cooked)
144 }
145 token::ByteStrRaw(n) => {
146 let s = symbol.as_str();
147 let bytes = if s.contains('\r') {
148 let mut buf = Vec::with_capacity(s.len());
149 let mut error = Ok(());
150 unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
151 Ok(c) => buf.push(byte_from_char(c)),
152 Err(err) => {
153 if err.is_fatal() {
154 error = Err(LitError::LexerError);
155 }
156 }
157 });
158 error?;
159 buf
160 } else {
161 symbol.to_string().into_bytes()
162 };
163
164 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
165 }
166 token::CStr => {
167 let s = symbol.as_str();
168 let mut buf = Vec::with_capacity(s.len());
169 let mut error = Ok(());
170 unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
171 Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
172 error = Err(LitError::NulInCStr(span));
173 }
174 Ok(CStrUnit::Byte(b)) => buf.push(b),
175 Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
176 Ok(CStrUnit::Char(c)) => {
177 buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
178 }
179 Err(err) => {
180 if err.is_fatal() {
181 error = Err(LitError::LexerError);
182 }
183 }
184 });
185 error?;
186 buf.push(0);
187 LitKind::CStr(buf.into(), StrStyle::Cooked)
188 }
189 token::CStrRaw(n) => {
190 let s = symbol.as_str();
191 let mut buf = Vec::with_capacity(s.len());
192 let mut error = Ok(());
193 unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
194 Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
195 error = Err(LitError::NulInCStr(span));
196 }
197 Ok(CStrUnit::Byte(b)) => buf.push(b),
198 Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
199 Ok(CStrUnit::Char(c)) => {
200 buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
201 }
202 Err(err) => {
203 if err.is_fatal() {
204 error = Err(LitError::LexerError);
205 }
206 }
207 });
208 error?;
209 buf.push(0);
210 LitKind::CStr(buf.into(), StrStyle::Raw(n))
211 }
212 token::Err => LitKind::Err,
213 })
214 }
215 }
216
217 impl fmt::Display for LitKind {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result218 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219 match *self {
220 LitKind::Byte(b) => {
221 let b: String = ascii::escape_default(b).map(Into::<char>::into).collect();
222 write!(f, "b'{b}'")?;
223 }
224 LitKind::Char(ch) => write!(f, "'{}'", escape_char_symbol(ch))?,
225 LitKind::Str(sym, StrStyle::Cooked) => write!(f, "\"{}\"", escape_string_symbol(sym))?,
226 LitKind::Str(sym, StrStyle::Raw(n)) => write!(
227 f,
228 "r{delim}\"{string}\"{delim}",
229 delim = "#".repeat(n as usize),
230 string = sym
231 )?,
232 LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
233 write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
234 }
235 LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
236 // Unwrap because raw byte string literals can only contain ASCII.
237 let symbol = str::from_utf8(bytes).unwrap();
238 write!(
239 f,
240 "br{delim}\"{string}\"{delim}",
241 delim = "#".repeat(n as usize),
242 string = symbol
243 )?;
244 }
245 LitKind::CStr(ref bytes, StrStyle::Cooked) => {
246 write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
247 }
248 LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
249 // This can only be valid UTF-8.
250 let symbol = str::from_utf8(bytes).unwrap();
251 write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
252 }
253 LitKind::Int(n, ty) => {
254 write!(f, "{n}")?;
255 match ty {
256 ast::LitIntType::Unsigned(ty) => write!(f, "{}", ty.name())?,
257 ast::LitIntType::Signed(ty) => write!(f, "{}", ty.name())?,
258 ast::LitIntType::Unsuffixed => {}
259 }
260 }
261 LitKind::Float(symbol, ty) => {
262 write!(f, "{symbol}")?;
263 match ty {
264 ast::LitFloatType::Suffixed(ty) => write!(f, "{}", ty.name())?,
265 ast::LitFloatType::Unsuffixed => {}
266 }
267 }
268 LitKind::Bool(b) => write!(f, "{}", if b { "true" } else { "false" })?,
269 LitKind::Err => {
270 // This only shows up in places like `-Zunpretty=hir` output, so we
271 // don't bother to produce something useful.
272 write!(f, "<bad-literal>")?;
273 }
274 }
275
276 Ok(())
277 }
278 }
279
280 impl MetaItemLit {
281 /// Converts a token literal into a meta item literal.
from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError>282 pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError> {
283 Ok(MetaItemLit {
284 symbol: token_lit.symbol,
285 suffix: token_lit.suffix,
286 kind: LitKind::from_token_lit(token_lit)?,
287 span,
288 })
289 }
290
291 /// Cheaply converts a meta item literal into a token literal.
as_token_lit(&self) -> token::Lit292 pub fn as_token_lit(&self) -> token::Lit {
293 let kind = match self.kind {
294 LitKind::Bool(_) => token::Bool,
295 LitKind::Str(_, ast::StrStyle::Cooked) => token::Str,
296 LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
297 LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
298 LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
299 LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
300 LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
301 LitKind::Byte(_) => token::Byte,
302 LitKind::Char(_) => token::Char,
303 LitKind::Int(..) => token::Integer,
304 LitKind::Float(..) => token::Float,
305 LitKind::Err => token::Err,
306 };
307
308 token::Lit::new(kind, self.symbol, self.suffix)
309 }
310
311 /// Converts an arbitrary token into meta item literal.
from_token(token: &Token) -> Option<MetaItemLit>312 pub fn from_token(token: &Token) -> Option<MetaItemLit> {
313 token::Lit::from_token(token)
314 .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok())
315 }
316 }
317
strip_underscores(symbol: Symbol) -> Symbol318 fn strip_underscores(symbol: Symbol) -> Symbol {
319 // Do not allocate a new string unless necessary.
320 let s = symbol.as_str();
321 if s.contains('_') {
322 let mut s = s.to_string();
323 s.retain(|c| c != '_');
324 return Symbol::intern(&s);
325 }
326 symbol
327 }
328
filtered_float_lit( symbol: Symbol, suffix: Option<Symbol>, base: u32, ) -> Result<LitKind, LitError>329 fn filtered_float_lit(
330 symbol: Symbol,
331 suffix: Option<Symbol>,
332 base: u32,
333 ) -> Result<LitKind, LitError> {
334 debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
335 if base != 10 {
336 return Err(LitError::NonDecimalFloat(base));
337 }
338 Ok(match suffix {
339 Some(suf) => LitKind::Float(
340 symbol,
341 ast::LitFloatType::Suffixed(match suf {
342 sym::f32 => ast::FloatTy::F32,
343 sym::f64 => ast::FloatTy::F64,
344 _ => return Err(LitError::InvalidFloatSuffix),
345 }),
346 ),
347 None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
348 })
349 }
350
float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError>351 fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
352 debug!("float_lit: {:?}, {:?}", symbol, suffix);
353 filtered_float_lit(strip_underscores(symbol), suffix, 10)
354 }
355
integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError>356 fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
357 debug!("integer_lit: {:?}, {:?}", symbol, suffix);
358 let symbol = strip_underscores(symbol);
359 let s = symbol.as_str();
360
361 let base = match s.as_bytes() {
362 [b'0', b'x', ..] => 16,
363 [b'0', b'o', ..] => 8,
364 [b'0', b'b', ..] => 2,
365 _ => 10,
366 };
367
368 let ty = match suffix {
369 Some(suf) => match suf {
370 sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
371 sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
372 sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
373 sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
374 sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
375 sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
376 sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
377 sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
378 sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
379 sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
380 sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
381 sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
382 // `1f64` and `2f32` etc. are valid float literals, and
383 // `fxxx` looks more like an invalid float literal than invalid integer literal.
384 _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
385 _ => return Err(LitError::InvalidIntSuffix),
386 },
387 _ => ast::LitIntType::Unsuffixed,
388 };
389
390 let s = &s[if base != 10 { 2 } else { 0 }..];
391 u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
392 // Small bases are lexed as if they were base 10, e.g, the string
393 // might be `0b10201`. This will cause the conversion above to fail,
394 // but these kinds of errors are already reported by the lexer.
395 let from_lexer = base < 10 && s.chars().any(|c| c.to_digit(10).is_some_and(|d| d >= base));
396 if from_lexer { LitError::LexerError } else { LitError::IntTooLarge(base) }
397 })
398 }
399