• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::{fmt, str::FromStr};
2 
3 use crate::{
4     Buffer, ParseError,
5     err::{perr, ParseErrorKind::*},
6     parse::{first_byte_or_empty, hex_digit_value, check_suffix},
7 };
8 
9 
10 /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
11 ///
12 /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
13 /// the main part (digits and underscores), and an optional type suffix
14 /// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
15 ///
16 /// Note that integer literals are always positive: the grammar does not contain
17 /// the minus sign at all. The minus sign is just the unary negate operator,
18 /// not part of the literal. Which is interesting for cases like `- 128i8`:
19 /// here, the literal itself would overflow the specified type (`i8` cannot
20 /// represent 128). That's why in rustc, the literal overflow check is
21 /// performed as a lint after parsing, not during the lexing stage. Similarly,
22 /// [`IntegerLit::parse`] does not perform an overflow check.
23 ///
24 /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
25 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
26 #[non_exhaustive]
27 pub struct IntegerLit<B: Buffer> {
28     /// The raw literal. Grammar: `<prefix?><main part><suffix?>`.
29     raw: B,
30     /// First index of the main number part (after the base prefix).
31     start_main_part: usize,
32     /// First index not part of the main number part.
33     end_main_part: usize,
34     /// Parsed `raw[..start_main_part]`.
35     base: IntegerBase,
36 }
37 
38 impl<B: Buffer> IntegerLit<B> {
39     /// Parses the input as an integer literal. Returns an error if the input is
40     /// invalid or represents a different kind of literal.
parse(input: B) -> Result<Self, ParseError>41     pub fn parse(input: B) -> Result<Self, ParseError> {
42         match first_byte_or_empty(&input)? {
43             digit @ b'0'..=b'9' => {
44                 // TODO: simplify once RFC 2528 is stabilized
45                 let IntegerLit {
46                     start_main_part,
47                     end_main_part,
48                     base,
49                     ..
50                 } =  parse_impl(&input, digit)?;
51 
52                 Ok(Self { raw: input, start_main_part, end_main_part, base })
53             },
54             _ => Err(perr(0, DoesNotStartWithDigit)),
55         }
56     }
57 
58     /// Performs the actual string to int conversion to obtain the integer
59     /// value. The optional type suffix of the literal **is ignored by this
60     /// method**. This means `N` does not need to match the type suffix!
61     ///
62     /// Returns `None` if the literal overflows `N`.
63     ///
64     /// Hint: `u128` can represent all possible values integer literal values,
65     /// as there are no negative literals (see type docs). Thus you can, for
66     /// example, safely use `lit.value::<u128>().to_string()` to get a decimal
67     /// string. (Technically, Rust integer literals can represent arbitrarily
68     /// large numbers, but those would be rejected at a later stage by the Rust
69     /// compiler).
value<N: FromIntegerLiteral>(&self) -> Option<N>70     pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
71         let base = N::from_small_number(self.base.value());
72 
73         let mut acc = N::from_small_number(0);
74         for digit in self.raw_main_part().bytes() {
75             if digit == b'_' {
76                 continue;
77             }
78 
79             // We don't actually need the base here: we already know this main
80             // part only contains digits valid for the specified base.
81             let digit = hex_digit_value(digit)
82                 .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
83 
84             acc = acc.checked_mul(base)?;
85             acc = acc.checked_add(N::from_small_number(digit))?;
86         }
87 
88         Some(acc)
89     }
90 
91     /// The base of this integer literal.
base(&self) -> IntegerBase92     pub fn base(&self) -> IntegerBase {
93         self.base
94     }
95 
96     /// The main part containing the digits and potentially `_`. Do not try to
97     /// parse this directly as that would ignore the base!
raw_main_part(&self) -> &str98     pub fn raw_main_part(&self) -> &str {
99         &(*self.raw)[self.start_main_part..self.end_main_part]
100     }
101 
102     /// The optional suffix. Returns `""` if the suffix is empty/does not exist.
103     ///
104     /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`.
suffix(&self) -> &str105     pub fn suffix(&self) -> &str {
106         &(*self.raw)[self.end_main_part..]
107     }
108 
109     /// Returns the raw input that was passed to `parse`.
raw_input(&self) -> &str110     pub fn raw_input(&self) -> &str {
111         &self.raw
112     }
113 
114     /// Returns the raw input that was passed to `parse`, potentially owned.
into_raw_input(self) -> B115     pub fn into_raw_input(self) -> B {
116         self.raw
117     }
118 }
119 
120 impl IntegerLit<&str> {
121     /// Makes a copy of the underlying buffer and returns the owned version of
122     /// `Self`.
to_owned(&self) -> IntegerLit<String>123     pub fn to_owned(&self) -> IntegerLit<String> {
124         IntegerLit {
125             raw: self.raw.to_owned(),
126             start_main_part: self.start_main_part,
127             end_main_part: self.end_main_part,
128             base: self.base,
129         }
130     }
131 }
132 
133 impl<B: Buffer> fmt::Display for IntegerLit<B> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result134     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135         write!(f, "{}", &*self.raw)
136     }
137 }
138 
139 /// Integer literal types. *Implementation detail*.
140 ///
141 /// Implemented for all integer literal types. This trait is sealed and cannot
142 /// be implemented outside of this crate. The trait's methods are implementation
143 /// detail of this library and are not subject to semver.
144 pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
145     /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
146     #[doc(hidden)]
from_small_number(n: u8) -> Self147     fn from_small_number(n: u8) -> Self;
148 
149     #[doc(hidden)]
checked_add(self, rhs: Self) -> Option<Self>150     fn checked_add(self, rhs: Self) -> Option<Self>;
151 
152     #[doc(hidden)]
checked_mul(self, rhs: Self) -> Option<Self>153     fn checked_mul(self, rhs: Self) -> Option<Self>;
154 
155     #[doc(hidden)]
ty() -> IntegerType156     fn ty() -> IntegerType;
157 }
158 
159 macro_rules! impl_from_int_literal {
160     ($( $ty:ty => $variant:ident ,)* ) => {
161         $(
162             impl self::sealed::Sealed for $ty {}
163             impl FromIntegerLiteral for $ty {
164                 fn from_small_number(n: u8) -> Self {
165                     n as Self
166                 }
167                 fn checked_add(self, rhs: Self) -> Option<Self> {
168                     self.checked_add(rhs)
169                 }
170                 fn checked_mul(self, rhs: Self) -> Option<Self> {
171                     self.checked_mul(rhs)
172                 }
173                 fn ty() -> IntegerType {
174                     IntegerType::$variant
175                 }
176             }
177         )*
178     };
179 }
180 
181 impl_from_int_literal!(
182     u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
183     i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
184 );
185 
186 mod sealed {
187     pub trait Sealed {}
188 }
189 
190 /// Precondition: first byte of string has to be in `b'0'..=b'9'`.
191 #[inline(never)]
parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError>192 pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> {
193     // Figure out base and strip prefix base, if it exists.
194     let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
195         (b'0', Some(b'b')) => (2, IntegerBase::Binary),
196         (b'0', Some(b'o')) => (2, IntegerBase::Octal),
197         (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
198 
199         // Everything else is treated as decimal. Several cases are caught
200         // by this:
201         // - "123"
202         // - "0"
203         // - "0u8"
204         // - "0r" -> this will error later
205         _ => (0, IntegerBase::Decimal),
206     };
207     let without_prefix = &input[end_prefix..];
208 
209 
210     // Scan input to find the first character that's not a valid digit.
211     let is_valid_digit = match base {
212         IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'),
213         IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'),
214         IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'),
215         IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'),
216     };
217     let end_main = without_prefix.bytes()
218         .position(|b| !is_valid_digit(b))
219         .unwrap_or(without_prefix.len());
220     let (main_part, suffix) = without_prefix.split_at(end_main);
221 
222     check_suffix(suffix).map_err(|kind| {
223         // This is just to have a nicer error kind for this special case. If the
224         // suffix is invalid, it is non-empty -> unwrap ok.
225         let first = suffix.as_bytes()[0];
226         if !is_valid_digit(first) && first.is_ascii_digit() {
227             perr(end_main + end_prefix, InvalidDigit)
228         } else {
229             perr(end_main + end_prefix..input.len(), kind)
230         }
231     })?;
232     if suffix.starts_with('e') || suffix.starts_with('E') {
233         return Err(perr(end_main, IntegerSuffixStartingWithE));
234     }
235 
236     // Make sure main number part is not empty.
237     if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
238         return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
239     }
240 
241     Ok(IntegerLit {
242         raw: input,
243         start_main_part: end_prefix,
244         end_main_part: end_main + end_prefix,
245         base,
246     })
247 }
248 
249 
250 /// The bases in which an integer can be specified.
251 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
252 pub enum IntegerBase {
253     Binary,
254     Octal,
255     Decimal,
256     Hexadecimal,
257 }
258 
259 impl IntegerBase {
260     /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
261     /// `"0o"`, `""` and `"0x"`.
prefix(self) -> &'static str262     pub fn prefix(self) -> &'static str {
263         match self {
264             Self::Binary => "0b",
265             Self::Octal => "0o",
266             Self::Decimal => "",
267             Self::Hexadecimal => "0x",
268         }
269     }
270 
271     /// Returns the base value, i.e. 2, 8, 10 or 16.
value(self) -> u8272     pub fn value(self) -> u8 {
273         match self {
274             Self::Binary => 2,
275             Self::Octal => 8,
276             Self::Decimal => 10,
277             Self::Hexadecimal => 16,
278         }
279     }
280 }
281 
282 /// All possible integer type suffixes.
283 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
284 #[non_exhaustive]
285 pub enum IntegerType {
286     U8,
287     U16,
288     U32,
289     U64,
290     U128,
291     Usize,
292     I8,
293     I16,
294     I32,
295     I64,
296     I128,
297     Isize,
298 }
299 
300 impl IntegerType {
301     /// Returns the type corresponding to the given suffix (e.g. `"u8"` is
302     /// mapped to `Self::U8`). If the suffix is not a valid integer type,
303     /// `None` is returned.
from_suffix(suffix: &str) -> Option<Self>304     pub fn from_suffix(suffix: &str) -> Option<Self> {
305         match suffix {
306             "u8" => Some(Self::U8),
307             "u16" => Some(Self::U16),
308             "u32" => Some(Self::U32),
309             "u64" => Some(Self::U64),
310             "u128" => Some(Self::U128),
311             "usize" => Some(Self::Usize),
312             "i8" => Some(Self::I8),
313             "i16" => Some(Self::I16),
314             "i32" => Some(Self::I32),
315             "i64" => Some(Self::I64),
316             "i128" => Some(Self::I128),
317             "isize" => Some(Self::Isize),
318             _ => None,
319         }
320     }
321 
322     /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`.
suffix(self) -> &'static str323     pub fn suffix(self) -> &'static str {
324         match self {
325             Self::U8 => "u8",
326             Self::U16 => "u16",
327             Self::U32 => "u32",
328             Self::U64 => "u64",
329             Self::U128 => "u128",
330             Self::Usize => "usize",
331             Self::I8 => "i8",
332             Self::I16 => "i16",
333             Self::I32 => "i32",
334             Self::I64 => "i64",
335             Self::I128 => "i128",
336             Self::Isize => "isize",
337         }
338     }
339 }
340 
341 impl FromStr for IntegerType {
342     type Err = ();
from_str(s: &str) -> Result<Self, Self::Err>343     fn from_str(s: &str) -> Result<Self, Self::Err> {
344         Self::from_suffix(s).ok_or(())
345     }
346 }
347 
348 impl fmt::Display for IntegerType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result349     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350         self.suffix().fmt(f)
351     }
352 }
353 
354 
355 #[cfg(test)]
356 mod tests;
357