• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Code related to parsing literals.
2 
3 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
4 use crate::token::{self, Token};
5 use rustc_lexer::unescape::{
6     byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
7     Mode,
8 };
9 use rustc_span::symbol::{kw, sym, Symbol};
10 use rustc_span::Span;
11 use std::ops::Range;
12 use std::{ascii, fmt, str};
13 
14 // Escapes a string, represented as a symbol. Reuses the original symbol,
15 // avoiding interning, if no changes are required.
escape_string_symbol(symbol: Symbol) -> Symbol16 pub fn escape_string_symbol(symbol: Symbol) -> Symbol {
17     let s = symbol.as_str();
18     let escaped = s.escape_default().to_string();
19     if s == escaped { symbol } else { Symbol::intern(&escaped) }
20 }
21 
22 // Escapes a char.
escape_char_symbol(ch: char) -> Symbol23 pub fn escape_char_symbol(ch: char) -> Symbol {
24     let s: String = ch.escape_default().map(Into::<char>::into).collect();
25     Symbol::intern(&s)
26 }
27 
28 // Escapes a byte string.
escape_byte_str_symbol(bytes: &[u8]) -> Symbol29 pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol {
30     let s = bytes.escape_ascii().to_string();
31     Symbol::intern(&s)
32 }
33 
34 #[derive(Debug)]
35 pub enum LitError {
36     LexerError,
37     InvalidSuffix,
38     InvalidIntSuffix,
39     InvalidFloatSuffix,
40     NonDecimalFloat(u32),
41     IntTooLarge(u32),
42     NulInCStr(Range<usize>),
43 }
44 
45 impl LitKind {
46     /// Converts literal token into a semantic literal.
from_token_lit(lit: token::Lit) -> Result<LitKind, LitError>47     pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
48         let token::Lit { kind, symbol, suffix } = lit;
49         if suffix.is_some() && !kind.may_have_suffix() {
50             return Err(LitError::InvalidSuffix);
51         }
52 
53         Ok(match kind {
54             token::Bool => {
55                 assert!(symbol.is_bool_lit());
56                 LitKind::Bool(symbol == kw::True)
57             }
58             token::Byte => {
59                 return unescape_byte(symbol.as_str())
60                     .map(LitKind::Byte)
61                     .map_err(|_| LitError::LexerError);
62             }
63             token::Char => {
64                 return unescape_char(symbol.as_str())
65                     .map(LitKind::Char)
66                     .map_err(|_| LitError::LexerError);
67             }
68 
69             // There are some valid suffixes for integer and float literals,
70             // so all the handling is done internally.
71             token::Integer => return integer_lit(symbol, suffix),
72             token::Float => return float_lit(symbol, suffix),
73 
74             token::Str => {
75                 // If there are no characters requiring special treatment we can
76                 // reuse the symbol from the token. Otherwise, we must generate a
77                 // new symbol because the string in the LitKind is different to the
78                 // string in the token.
79                 let s = symbol.as_str();
80                 let symbol = if s.contains(['\\', '\r']) {
81                     let mut buf = String::with_capacity(s.len());
82                     let mut error = Ok(());
83                     // Force-inlining here is aggressive but the closure is
84                     // called on every char in the string, so it can be
85                     // hot in programs with many long strings.
86                     unescape_literal(
87                         s,
88                         Mode::Str,
89                         &mut #[inline(always)]
90                         |_, unescaped_char| match unescaped_char {
91                             Ok(c) => buf.push(c),
92                             Err(err) => {
93                                 if err.is_fatal() {
94                                     error = Err(LitError::LexerError);
95                                 }
96                             }
97                         },
98                     );
99                     error?;
100                     Symbol::intern(&buf)
101                 } else {
102                     symbol
103                 };
104                 LitKind::Str(symbol, ast::StrStyle::Cooked)
105             }
106             token::StrRaw(n) => {
107                 // Ditto.
108                 let s = symbol.as_str();
109                 let symbol =
110                     if s.contains('\r') {
111                         let mut buf = String::with_capacity(s.len());
112                         let mut error = Ok(());
113                         unescape_literal(s, Mode::RawStr, &mut |_, unescaped_char| {
114                             match unescaped_char {
115                                 Ok(c) => buf.push(c),
116                                 Err(err) => {
117                                     if err.is_fatal() {
118                                         error = Err(LitError::LexerError);
119                                     }
120                                 }
121                             }
122                         });
123                         error?;
124                         Symbol::intern(&buf)
125                     } else {
126                         symbol
127                     };
128                 LitKind::Str(symbol, ast::StrStyle::Raw(n))
129             }
130             token::ByteStr => {
131                 let s = symbol.as_str();
132                 let mut buf = Vec::with_capacity(s.len());
133                 let mut error = Ok(());
134                 unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
135                     Ok(c) => buf.push(byte_from_char(c)),
136                     Err(err) => {
137                         if err.is_fatal() {
138                             error = Err(LitError::LexerError);
139                         }
140                     }
141                 });
142                 error?;
143                 LitKind::ByteStr(buf.into(), StrStyle::Cooked)
144             }
145             token::ByteStrRaw(n) => {
146                 let s = symbol.as_str();
147                 let bytes = if s.contains('\r') {
148                     let mut buf = Vec::with_capacity(s.len());
149                     let mut error = Ok(());
150                     unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
151                         Ok(c) => buf.push(byte_from_char(c)),
152                         Err(err) => {
153                             if err.is_fatal() {
154                                 error = Err(LitError::LexerError);
155                             }
156                         }
157                     });
158                     error?;
159                     buf
160                 } else {
161                     symbol.to_string().into_bytes()
162                 };
163 
164                 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
165             }
166             token::CStr => {
167                 let s = symbol.as_str();
168                 let mut buf = Vec::with_capacity(s.len());
169                 let mut error = Ok(());
170                 unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
171                     Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
172                         error = Err(LitError::NulInCStr(span));
173                     }
174                     Ok(CStrUnit::Byte(b)) => buf.push(b),
175                     Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
176                     Ok(CStrUnit::Char(c)) => {
177                         buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
178                     }
179                     Err(err) => {
180                         if err.is_fatal() {
181                             error = Err(LitError::LexerError);
182                         }
183                     }
184                 });
185                 error?;
186                 buf.push(0);
187                 LitKind::CStr(buf.into(), StrStyle::Cooked)
188             }
189             token::CStrRaw(n) => {
190                 let s = symbol.as_str();
191                 let mut buf = Vec::with_capacity(s.len());
192                 let mut error = Ok(());
193                 unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
194                     Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
195                         error = Err(LitError::NulInCStr(span));
196                     }
197                     Ok(CStrUnit::Byte(b)) => buf.push(b),
198                     Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
199                     Ok(CStrUnit::Char(c)) => {
200                         buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
201                     }
202                     Err(err) => {
203                         if err.is_fatal() {
204                             error = Err(LitError::LexerError);
205                         }
206                     }
207                 });
208                 error?;
209                 buf.push(0);
210                 LitKind::CStr(buf.into(), StrStyle::Raw(n))
211             }
212             token::Err => LitKind::Err,
213         })
214     }
215 }
216 
217 impl fmt::Display for LitKind {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result218     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219         match *self {
220             LitKind::Byte(b) => {
221                 let b: String = ascii::escape_default(b).map(Into::<char>::into).collect();
222                 write!(f, "b'{b}'")?;
223             }
224             LitKind::Char(ch) => write!(f, "'{}'", escape_char_symbol(ch))?,
225             LitKind::Str(sym, StrStyle::Cooked) => write!(f, "\"{}\"", escape_string_symbol(sym))?,
226             LitKind::Str(sym, StrStyle::Raw(n)) => write!(
227                 f,
228                 "r{delim}\"{string}\"{delim}",
229                 delim = "#".repeat(n as usize),
230                 string = sym
231             )?,
232             LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
233                 write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
234             }
235             LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
236                 // Unwrap because raw byte string literals can only contain ASCII.
237                 let symbol = str::from_utf8(bytes).unwrap();
238                 write!(
239                     f,
240                     "br{delim}\"{string}\"{delim}",
241                     delim = "#".repeat(n as usize),
242                     string = symbol
243                 )?;
244             }
245             LitKind::CStr(ref bytes, StrStyle::Cooked) => {
246                 write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
247             }
248             LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
249                 // This can only be valid UTF-8.
250                 let symbol = str::from_utf8(bytes).unwrap();
251                 write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
252             }
253             LitKind::Int(n, ty) => {
254                 write!(f, "{n}")?;
255                 match ty {
256                     ast::LitIntType::Unsigned(ty) => write!(f, "{}", ty.name())?,
257                     ast::LitIntType::Signed(ty) => write!(f, "{}", ty.name())?,
258                     ast::LitIntType::Unsuffixed => {}
259                 }
260             }
261             LitKind::Float(symbol, ty) => {
262                 write!(f, "{symbol}")?;
263                 match ty {
264                     ast::LitFloatType::Suffixed(ty) => write!(f, "{}", ty.name())?,
265                     ast::LitFloatType::Unsuffixed => {}
266                 }
267             }
268             LitKind::Bool(b) => write!(f, "{}", if b { "true" } else { "false" })?,
269             LitKind::Err => {
270                 // This only shows up in places like `-Zunpretty=hir` output, so we
271                 // don't bother to produce something useful.
272                 write!(f, "<bad-literal>")?;
273             }
274         }
275 
276         Ok(())
277     }
278 }
279 
280 impl MetaItemLit {
281     /// Converts a token literal into a meta item literal.
from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError>282     pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result<MetaItemLit, LitError> {
283         Ok(MetaItemLit {
284             symbol: token_lit.symbol,
285             suffix: token_lit.suffix,
286             kind: LitKind::from_token_lit(token_lit)?,
287             span,
288         })
289     }
290 
291     /// Cheaply converts a meta item literal into a token literal.
as_token_lit(&self) -> token::Lit292     pub fn as_token_lit(&self) -> token::Lit {
293         let kind = match self.kind {
294             LitKind::Bool(_) => token::Bool,
295             LitKind::Str(_, ast::StrStyle::Cooked) => token::Str,
296             LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
297             LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
298             LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
299             LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
300             LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
301             LitKind::Byte(_) => token::Byte,
302             LitKind::Char(_) => token::Char,
303             LitKind::Int(..) => token::Integer,
304             LitKind::Float(..) => token::Float,
305             LitKind::Err => token::Err,
306         };
307 
308         token::Lit::new(kind, self.symbol, self.suffix)
309     }
310 
311     /// Converts an arbitrary token into meta item literal.
from_token(token: &Token) -> Option<MetaItemLit>312     pub fn from_token(token: &Token) -> Option<MetaItemLit> {
313         token::Lit::from_token(token)
314             .and_then(|token_lit| MetaItemLit::from_token_lit(token_lit, token.span).ok())
315     }
316 }
317 
strip_underscores(symbol: Symbol) -> Symbol318 fn strip_underscores(symbol: Symbol) -> Symbol {
319     // Do not allocate a new string unless necessary.
320     let s = symbol.as_str();
321     if s.contains('_') {
322         let mut s = s.to_string();
323         s.retain(|c| c != '_');
324         return Symbol::intern(&s);
325     }
326     symbol
327 }
328 
filtered_float_lit( symbol: Symbol, suffix: Option<Symbol>, base: u32, ) -> Result<LitKind, LitError>329 fn filtered_float_lit(
330     symbol: Symbol,
331     suffix: Option<Symbol>,
332     base: u32,
333 ) -> Result<LitKind, LitError> {
334     debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
335     if base != 10 {
336         return Err(LitError::NonDecimalFloat(base));
337     }
338     Ok(match suffix {
339         Some(suf) => LitKind::Float(
340             symbol,
341             ast::LitFloatType::Suffixed(match suf {
342                 sym::f32 => ast::FloatTy::F32,
343                 sym::f64 => ast::FloatTy::F64,
344                 _ => return Err(LitError::InvalidFloatSuffix),
345             }),
346         ),
347         None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
348     })
349 }
350 
float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError>351 fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
352     debug!("float_lit: {:?}, {:?}", symbol, suffix);
353     filtered_float_lit(strip_underscores(symbol), suffix, 10)
354 }
355 
integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError>356 fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
357     debug!("integer_lit: {:?}, {:?}", symbol, suffix);
358     let symbol = strip_underscores(symbol);
359     let s = symbol.as_str();
360 
361     let base = match s.as_bytes() {
362         [b'0', b'x', ..] => 16,
363         [b'0', b'o', ..] => 8,
364         [b'0', b'b', ..] => 2,
365         _ => 10,
366     };
367 
368     let ty = match suffix {
369         Some(suf) => match suf {
370             sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize),
371             sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8),
372             sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16),
373             sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32),
374             sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64),
375             sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128),
376             sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize),
377             sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8),
378             sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16),
379             sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
380             sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
381             sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
382             // `1f64` and `2f32` etc. are valid float literals, and
383             // `fxxx` looks more like an invalid float literal than invalid integer literal.
384             _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
385             _ => return Err(LitError::InvalidIntSuffix),
386         },
387         _ => ast::LitIntType::Unsuffixed,
388     };
389 
390     let s = &s[if base != 10 { 2 } else { 0 }..];
391     u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| {
392         // Small bases are lexed as if they were base 10, e.g, the string
393         // might be `0b10201`. This will cause the conversion above to fail,
394         // but these kinds of errors are already reported by the lexer.
395         let from_lexer = base < 10 && s.chars().any(|c| c.to_digit(10).is_some_and(|d| d >= base));
396         if from_lexer { LitError::LexerError } else { LitError::IntTooLarge(base) }
397     })
398 }
399