357 lines
11 KiB
Rust
357 lines
11 KiB
Rust
use std::{fmt, str::FromStr};
|
|
|
|
use crate::{
|
|
Buffer, ParseError,
|
|
err::{perr, ParseErrorKind::*},
|
|
parse::{first_byte_or_empty, hex_digit_value, check_suffix},
|
|
};
|
|
|
|
|
|
/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
|
|
///
|
|
/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
|
|
/// the main part (digits and underscores), and an optional type suffix
|
|
/// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
|
|
///
|
|
/// Note that integer literals are always positive: the grammar does not contain
|
|
/// the minus sign at all. The minus sign is just the unary negate operator,
|
|
/// not part of the literal. Which is interesting for cases like `- 128i8`:
|
|
/// here, the literal itself would overflow the specified type (`i8` cannot
|
|
/// represent 128). That's why in rustc, the literal overflow check is
|
|
/// performed as a lint after parsing, not during the lexing stage. Similarly,
|
|
/// [`IntegerLit::parse`] does not perform an overflow check.
|
|
///
|
|
/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
#[non_exhaustive]
|
|
pub struct IntegerLit<B: Buffer> {
|
|
/// The raw literal. Grammar: `<prefix?><main part><suffix?>`.
|
|
raw: B,
|
|
/// First index of the main number part (after the base prefix).
|
|
start_main_part: usize,
|
|
/// First index not part of the main number part.
|
|
end_main_part: usize,
|
|
/// Parsed `raw[..start_main_part]`.
|
|
base: IntegerBase,
|
|
}
|
|
|
|
impl<B: Buffer> IntegerLit<B> {
|
|
/// Parses the input as an integer literal. Returns an error if the input is
|
|
/// invalid or represents a different kind of literal.
|
|
pub fn parse(input: B) -> Result<Self, ParseError> {
|
|
match first_byte_or_empty(&input)? {
|
|
digit @ b'0'..=b'9' => {
|
|
// TODO: simplify once RFC 2528 is stabilized
|
|
let IntegerLit {
|
|
start_main_part,
|
|
end_main_part,
|
|
base,
|
|
..
|
|
} = parse_impl(&input, digit)?;
|
|
|
|
Ok(Self { raw: input, start_main_part, end_main_part, base })
|
|
},
|
|
_ => Err(perr(0, DoesNotStartWithDigit)),
|
|
}
|
|
}
|
|
|
|
/// Performs the actual string to int conversion to obtain the integer
|
|
/// value. The optional type suffix of the literal **is ignored by this
|
|
/// method**. This means `N` does not need to match the type suffix!
|
|
///
|
|
/// Returns `None` if the literal overflows `N`.
|
|
///
|
|
/// Hint: `u128` can represent all possible values integer literal values,
|
|
/// as there are no negative literals (see type docs). Thus you can, for
|
|
/// example, safely use `lit.value::<u128>().to_string()` to get a decimal
|
|
/// string. (Technically, Rust integer literals can represent arbitrarily
|
|
/// large numbers, but those would be rejected at a later stage by the Rust
|
|
/// compiler).
|
|
pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
|
|
let base = N::from_small_number(self.base.value());
|
|
|
|
let mut acc = N::from_small_number(0);
|
|
for digit in self.raw_main_part().bytes() {
|
|
if digit == b'_' {
|
|
continue;
|
|
}
|
|
|
|
// We don't actually need the base here: we already know this main
|
|
// part only contains digits valid for the specified base.
|
|
let digit = hex_digit_value(digit)
|
|
.unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
|
|
|
|
acc = acc.checked_mul(base)?;
|
|
acc = acc.checked_add(N::from_small_number(digit))?;
|
|
}
|
|
|
|
Some(acc)
|
|
}
|
|
|
|
/// The base of this integer literal.
|
|
pub fn base(&self) -> IntegerBase {
|
|
self.base
|
|
}
|
|
|
|
/// The main part containing the digits and potentially `_`. Do not try to
|
|
/// parse this directly as that would ignore the base!
|
|
pub fn raw_main_part(&self) -> &str {
|
|
&(*self.raw)[self.start_main_part..self.end_main_part]
|
|
}
|
|
|
|
/// The optional suffix. Returns `""` if the suffix is empty/does not exist.
|
|
///
|
|
/// If you want the type, try `IntegerType::from_suffix(lit.suffix())`.
|
|
pub fn suffix(&self) -> &str {
|
|
&(*self.raw)[self.end_main_part..]
|
|
}
|
|
|
|
/// Returns the raw input that was passed to `parse`.
|
|
pub fn raw_input(&self) -> &str {
|
|
&self.raw
|
|
}
|
|
|
|
/// Returns the raw input that was passed to `parse`, potentially owned.
|
|
pub fn into_raw_input(self) -> B {
|
|
self.raw
|
|
}
|
|
}
|
|
|
|
impl IntegerLit<&str> {
|
|
/// Makes a copy of the underlying buffer and returns the owned version of
|
|
/// `Self`.
|
|
pub fn to_owned(&self) -> IntegerLit<String> {
|
|
IntegerLit {
|
|
raw: self.raw.to_owned(),
|
|
start_main_part: self.start_main_part,
|
|
end_main_part: self.end_main_part,
|
|
base: self.base,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<B: Buffer> fmt::Display for IntegerLit<B> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", &*self.raw)
|
|
}
|
|
}
|
|
|
|
/// Integer literal types. *Implementation detail*.
|
|
///
|
|
/// Implemented for all integer literal types. This trait is sealed and cannot
|
|
/// be implemented outside of this crate. The trait's methods are implementation
|
|
/// detail of this library and are not subject to semver.
|
|
pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
|
|
/// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
|
|
#[doc(hidden)]
|
|
fn from_small_number(n: u8) -> Self;
|
|
|
|
#[doc(hidden)]
|
|
fn checked_add(self, rhs: Self) -> Option<Self>;
|
|
|
|
#[doc(hidden)]
|
|
fn checked_mul(self, rhs: Self) -> Option<Self>;
|
|
|
|
#[doc(hidden)]
|
|
fn ty() -> IntegerType;
|
|
}
|
|
|
|
macro_rules! impl_from_int_literal {
|
|
($( $ty:ty => $variant:ident ,)* ) => {
|
|
$(
|
|
impl self::sealed::Sealed for $ty {}
|
|
impl FromIntegerLiteral for $ty {
|
|
fn from_small_number(n: u8) -> Self {
|
|
n as Self
|
|
}
|
|
fn checked_add(self, rhs: Self) -> Option<Self> {
|
|
self.checked_add(rhs)
|
|
}
|
|
fn checked_mul(self, rhs: Self) -> Option<Self> {
|
|
self.checked_mul(rhs)
|
|
}
|
|
fn ty() -> IntegerType {
|
|
IntegerType::$variant
|
|
}
|
|
}
|
|
)*
|
|
};
|
|
}
|
|
|
|
impl_from_int_literal!(
|
|
u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
|
|
i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
|
|
);
|
|
|
|
mod sealed {
|
|
pub trait Sealed {}
|
|
}
|
|
|
|
/// Precondition: first byte of string has to be in `b'0'..=b'9'`.
|
|
#[inline(never)]
|
|
pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> {
|
|
// Figure out base and strip prefix base, if it exists.
|
|
let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
|
|
(b'0', Some(b'b')) => (2, IntegerBase::Binary),
|
|
(b'0', Some(b'o')) => (2, IntegerBase::Octal),
|
|
(b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
|
|
|
|
// Everything else is treated as decimal. Several cases are caught
|
|
// by this:
|
|
// - "123"
|
|
// - "0"
|
|
// - "0u8"
|
|
// - "0r" -> this will error later
|
|
_ => (0, IntegerBase::Decimal),
|
|
};
|
|
let without_prefix = &input[end_prefix..];
|
|
|
|
|
|
// Scan input to find the first character that's not a valid digit.
|
|
let is_valid_digit = match base {
|
|
IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'),
|
|
IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'),
|
|
IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'),
|
|
IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'),
|
|
};
|
|
let end_main = without_prefix.bytes()
|
|
.position(|b| !is_valid_digit(b))
|
|
.unwrap_or(without_prefix.len());
|
|
let (main_part, suffix) = without_prefix.split_at(end_main);
|
|
|
|
check_suffix(suffix).map_err(|kind| {
|
|
// This is just to have a nicer error kind for this special case. If the
|
|
// suffix is invalid, it is non-empty -> unwrap ok.
|
|
let first = suffix.as_bytes()[0];
|
|
if !is_valid_digit(first) && first.is_ascii_digit() {
|
|
perr(end_main + end_prefix, InvalidDigit)
|
|
} else {
|
|
perr(end_main + end_prefix..input.len(), kind)
|
|
}
|
|
})?;
|
|
if suffix.starts_with('e') || suffix.starts_with('E') {
|
|
return Err(perr(end_main, IntegerSuffixStartingWithE));
|
|
}
|
|
|
|
// Make sure main number part is not empty.
|
|
if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
|
|
return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
|
|
}
|
|
|
|
Ok(IntegerLit {
|
|
raw: input,
|
|
start_main_part: end_prefix,
|
|
end_main_part: end_main + end_prefix,
|
|
base,
|
|
})
|
|
}
|
|
|
|
|
|
/// The bases in which an integer can be specified.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum IntegerBase {
|
|
Binary,
|
|
Octal,
|
|
Decimal,
|
|
Hexadecimal,
|
|
}
|
|
|
|
impl IntegerBase {
|
|
/// Returns the literal prefix that indicates this base, i.e. `"0b"`,
|
|
/// `"0o"`, `""` and `"0x"`.
|
|
pub fn prefix(self) -> &'static str {
|
|
match self {
|
|
Self::Binary => "0b",
|
|
Self::Octal => "0o",
|
|
Self::Decimal => "",
|
|
Self::Hexadecimal => "0x",
|
|
}
|
|
}
|
|
|
|
/// Returns the base value, i.e. 2, 8, 10 or 16.
|
|
pub fn value(self) -> u8 {
|
|
match self {
|
|
Self::Binary => 2,
|
|
Self::Octal => 8,
|
|
Self::Decimal => 10,
|
|
Self::Hexadecimal => 16,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// All possible integer type suffixes.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
#[non_exhaustive]
|
|
pub enum IntegerType {
|
|
U8,
|
|
U16,
|
|
U32,
|
|
U64,
|
|
U128,
|
|
Usize,
|
|
I8,
|
|
I16,
|
|
I32,
|
|
I64,
|
|
I128,
|
|
Isize,
|
|
}
|
|
|
|
impl IntegerType {
|
|
/// Returns the type corresponding to the given suffix (e.g. `"u8"` is
|
|
/// mapped to `Self::U8`). If the suffix is not a valid integer type,
|
|
/// `None` is returned.
|
|
pub fn from_suffix(suffix: &str) -> Option<Self> {
|
|
match suffix {
|
|
"u8" => Some(Self::U8),
|
|
"u16" => Some(Self::U16),
|
|
"u32" => Some(Self::U32),
|
|
"u64" => Some(Self::U64),
|
|
"u128" => Some(Self::U128),
|
|
"usize" => Some(Self::Usize),
|
|
"i8" => Some(Self::I8),
|
|
"i16" => Some(Self::I16),
|
|
"i32" => Some(Self::I32),
|
|
"i64" => Some(Self::I64),
|
|
"i128" => Some(Self::I128),
|
|
"isize" => Some(Self::Isize),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`.
|
|
pub fn suffix(self) -> &'static str {
|
|
match self {
|
|
Self::U8 => "u8",
|
|
Self::U16 => "u16",
|
|
Self::U32 => "u32",
|
|
Self::U64 => "u64",
|
|
Self::U128 => "u128",
|
|
Self::Usize => "usize",
|
|
Self::I8 => "i8",
|
|
Self::I16 => "i16",
|
|
Self::I32 => "i32",
|
|
Self::I64 => "i64",
|
|
Self::I128 => "i128",
|
|
Self::Isize => "isize",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl FromStr for IntegerType {
|
|
type Err = ();
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
Self::from_suffix(s).ok_or(())
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for IntegerType {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
self.suffix().fmt(f)
|
|
}
|
|
}
|
|
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|