Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

628
vendor/pp-rs/src/lexer.rs vendored Normal file
View File

@@ -0,0 +1,628 @@
use crate::token::{Float, Integer, Location, PreprocessorError, Punct};
use std::str::Chars;
use unicode_xid::UnicodeXID;
type CharAndLine = (char, u32);
// GLSL ES 3.20 specification section 3.10. Logical Phases of Compilation
// This iterator implements phases 4 and 5 of the logical phases of compilation:
//
// 4. Each {carriage-return, line-feed} and {line-feed, carriage return} sequence is replaced by
// a single newline. All remaining carriage-return and line-feed characters are then each
// replaced by a newline.
//
// 5. Line numbering for each character, which is equal to the number of preceding newlines plus
// one, is noted. Note this can only be subsequently changed by the #line directive and is not
// affected by the removal of newlines in phase 6 of compilation.
//
// It expects that phases 1 to 3 are already done and that valid utf8 is passed in.
#[derive(Clone)]
pub struct CharsAndLine<'a> {
inner: Chars<'a>,
line: u32,
}
impl<'a> CharsAndLine<'a> {
pub fn new(input: &'a str) -> Self {
CharsAndLine {
inner: input.chars(),
line: 1,
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.as_str().as_ptr()
}
}
impl<'a> Iterator for CharsAndLine<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let current = self.inner.next()?;
match current {
'\n' => {
// Consume the token but see if we can grab a \r that follows
let mut peek_inner = self.inner.clone();
if peek_inner.next() == Some('\r') {
self.inner = peek_inner;
}
let res = Some(('\n', self.line));
self.line += 1;
res
}
'\r' => {
// Consume the token but see if we can grab a \n that follows
let mut peek_inner = self.inner.clone();
if peek_inner.next() == Some('\n') {
self.inner = peek_inner;
}
let res = Some(('\n', self.line));
self.line += 1;
res
}
_ => Some((current, self.line)),
}
}
}
// An iterator that adds stage 6 on top of CharsAndLocation:
//
// 6. Wherever a backslash ('\') occurs immediately before a newline, both are deleted. Note that
// no whitespace is substituted, thereby allowing a single preprocessing token to span a
// newline. This operation is not recursive; any new {backslash newline} sequences generated
// are not removed.
#[derive(Clone)]
pub struct SkipBackslashNewline<'a> {
inner: CharsAndLine<'a>,
}
impl<'a> SkipBackslashNewline<'a> {
pub fn new(input: &'a str) -> Self {
SkipBackslashNewline {
inner: CharsAndLine::new(input),
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.get_current_ptr()
}
}
impl<'a> Iterator for SkipBackslashNewline<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let mut current = self.inner.next()?;
while current.0 == '\\' {
let mut peek_inner = self.inner.clone();
if let Some(('\n', _)) = peek_inner.next() {
self.inner = peek_inner;
current = self.next()?;
} else {
return Some(current);
}
}
Some(current)
}
}
// An iterator that adds stage 7 on top of SkipBackslashNewline:
//
// 7. All comments are replaced with a single space. All (non-zero) characters and invalid UTF-8
// byte sequences are allowed within comments. '//' style comments include the initial '//'
// marker and continue up to, but not including, the terminating newline. '/…/' comments
// include both the start and end marker.
#[derive(Clone)]
pub struct ReplaceComments<'a> {
inner: SkipBackslashNewline<'a>,
}
// The lexer wants to know when whitespace is a comment to know if a comment was ever processed.
// To avoid adding state we use a sentinel value of '\r' because all '\r' have been consumed and
// turned into '\n' by CharsAndLocation.
pub const COMMENT_SENTINEL_VALUE: char = '\r';
impl<'a> ReplaceComments<'a> {
pub fn new(input: &'a str) -> Self {
ReplaceComments {
inner: SkipBackslashNewline::new(input),
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.get_current_ptr()
}
}
impl<'a> Iterator for ReplaceComments<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let current = self.inner.next()?;
if current.0 != '/' {
debug_assert!(current.0 != COMMENT_SENTINEL_VALUE);
return Some(current);
}
let mut peek_inner = self.inner.clone();
match peek_inner.next() {
// The // case, consume until but not including the next \n
Some(('/', _)) => {
self.inner = peek_inner.clone();
while let Some((next, _)) = peek_inner.next() {
if next == '\n' {
break;
}
self.inner = peek_inner.clone();
}
Some((COMMENT_SENTINEL_VALUE, current.1))
}
// The /*, consume until the next */
Some(('*', _)) => {
let mut was_star = false;
while let Some((next, _)) = peek_inner.next() {
if was_star && next == '/' {
break;
}
was_star = next == '*';
}
self.inner = peek_inner;
Some((COMMENT_SENTINEL_VALUE, current.1))
}
// Not // or /*, do nothing
_ => Some(current),
}
}
}
// A lexer for GLSL tokens that also emits a couple extra tokens that are useful to the
// preprocessor: # and newlines. It also include metadata for the token for whether it is at the
// start of the line, or if it has leading whitespace.
// This is a helper iterator to abstract away the tracking of location data (offset, line) from
// `Lexer`. It looks like a Peekable<Iterator<char>> with `next_char` and `peek_char` but also
// allows querying the last seen/consumed lines / offset.
#[derive(Clone)]
struct LexerCharIterator<'a> {
inner: ReplaceComments<'a>,
peeked: Option<(CharAndLine, *const u8)>,
last_consumed: (CharAndLine, *const u8),
input_start: *const u8,
}
pub const NONE_CONSUMED_SENTINEL_VALUE: char = '\r';
impl<'a> LexerCharIterator<'a> {
pub fn new(input: &'a str) -> Self {
LexerCharIterator {
inner: ReplaceComments::new(input),
peeked: None,
last_consumed: ((NONE_CONSUMED_SENTINEL_VALUE, 0), input.as_bytes().as_ptr()),
input_start: input.as_bytes().as_ptr(),
}
}
fn next_char(&mut self) -> Option<char> {
self.last_consumed = match self.peeked.take() {
Some(v) => v,
None => {
let ptr = self.inner.get_current_ptr();
(self.inner.next()?, ptr)
}
};
Some(self.last_consumed.0 .0)
}
fn peek_char(&mut self) -> Option<char> {
match self.peeked {
Some(v) => Some(v.0 .0),
None => {
let ptr = self.inner.get_current_ptr();
let next = self.inner.next()?;
self.peeked = Some((next, ptr));
Some(next.0)
}
}
}
fn get_last_seen_line(&self) -> u32 {
self.peeked.unwrap_or(self.last_consumed).0 .1
}
fn get_last_seen_start_offset(&self) -> usize {
self.peeked.unwrap_or(self.last_consumed).1 as usize - self.input_start as usize
}
fn get_last_consumed_end_offset(&self) -> usize {
self.last_consumed.1 as usize - self.input_start as usize
+ self.last_consumed.0 .0.len_utf8()
}
}
// A superset of the token value returned by the preprocessor
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
// Preprocessor specific token values
Hash,
NewLine,
// Regular token values
Ident(String),
Integer(Integer),
Float(Float),
Punct(Punct),
}
impl From<Punct> for TokenValue {
fn from(punct: Punct) -> Self {
TokenValue::Punct(punct)
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct Token {
pub value: TokenValue,
pub location: Location,
pub leading_whitespace: bool,
pub start_of_line: bool,
}
pub type LexerItem = Result<Token, (PreprocessorError, Location)>;
pub struct Lexer<'a> {
inner: LexerCharIterator<'a>,
leading_whitespace: bool,
start_of_line: bool,
had_comments: bool,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
// TODO bail out on source that is too large.
Lexer {
inner: LexerCharIterator::new(input),
leading_whitespace: true,
start_of_line: true,
had_comments: false,
}
}
pub fn had_comments(&self) -> bool {
self.had_comments
}
// TODO: Make a runtime flag to toggle unicode identifiers support
// The glsl spec only allows ascii identifiers
fn parse_identifier(&mut self) -> Result<TokenValue, PreprocessorError> {
let mut identifier = String::default();
if let Some(c) = self.next_char_if(|c| c.is_xid_start() || c == '_') {
identifier.push(c);
}
let rest = self.consume_chars(|c| c.is_xid_continue());
identifier.push_str(&rest);
// TODO check if identifier is larger than the limit.
Ok(TokenValue::Ident(identifier))
}
fn parse_integer_signedness_suffix(&mut self) -> bool {
self.next_char_if(|c| c == 'u' || c == 'U').is_none()
}
fn parse_integer_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
match self.inner.peek_char() {
Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
Some('s') | Some('S') => Err(PreprocessorError::NotSupported16BitLiteral),
_ => Ok(32),
}
}
fn parse_float_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
match self.inner.peek_char() {
Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
Some('h') | Some('H') => Err(PreprocessorError::NotSupported16BitLiteral),
Some('f') | Some('F') => {
self.inner.next_char();
Ok(32)
}
_ => Ok(32),
}
}
fn next_char_if(&mut self, predicate: impl FnOnce(char) -> bool) -> Option<char> {
if let Some(c) = self.inner.peek_char() {
if predicate(c) {
return self.inner.next_char();
}
}
None
}
fn consume_chars(&mut self, filter: impl Fn(char) -> bool) -> String {
let mut result: String = Default::default();
while let Some(c) = self.next_char_if(&filter) {
result.push(c);
}
result
}
fn parse_number(&mut self, first_char: char) -> Result<TokenValue, PreprocessorError> {
let mut is_float = false;
let mut integer_radix = 10;
let mut raw: String = Default::default();
raw.push(first_char);
// Handle hexadecimal numbers that needs to consume a..f in addition to digits.
if first_char == '0' {
match self.inner.peek_char() {
Some('x') | Some('X') => {
self.inner.next_char();
raw += &self.consume_chars(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'));
integer_radix = 16;
}
// Octal numbers can also be the prefix of floats, so we need to parse all digits
// and not just 0..7 in case it is a float like 00009.0f, the parsing of all digits
// is done below, but we still need to remember the radix.
Some('0'..='9') => {
integer_radix = 8;
}
_ => {}
};
}
if first_char == '.' {
is_float = true;
} else {
// Parse any digits at the end of integers, or for the non-fractional part of floats.
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
if self.next_char_if(|c| c == '.').is_some() {
raw.push('.');
is_float = true;
}
}
// At this point either we're an integer missing only suffixes, or we're a float with
// everything up to the . consumed.
if is_float {
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
}
// Handle scientific notation with a (e|E)(+|-|)\d+ suffix when we're a float or an
// an integer that could turn into a float if we add a exponent to it (so 0x1E-1
// isn't recognized as a float).
if (is_float || integer_radix == 8 || integer_radix == 10)
&& self.next_char_if(|c| c == 'e' || c == 'E').is_some()
{
raw.push('e');
is_float = true;
match self.inner.peek_char() {
Some('+') => {
self.inner.next_char();
raw.push('+');
}
Some('-') => {
self.inner.next_char();
raw.push('-');
}
_ => {}
}
// TODO: what should we do when there is no number after the exponent?
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
}
if is_float {
// TODO: Depending on the GLSL version make it an error to not have the suffix.
let width = self.parse_float_width_suffix()?;
Ok(TokenValue::Float(Float {
value: raw
.parse::<f32>()
.map_err(|_| PreprocessorError::FloatParsingError)?,
width,
}))
} else {
let signed = self.parse_integer_signedness_suffix();
let width = self.parse_integer_width_suffix()?;
// Skip the initial 0 in hexa or octal (in hexa we never added the 'x').
if integer_radix != 10 {
raw = raw.split_off(1);
}
Ok(TokenValue::Integer(Integer {
value: u64::from_str_radix(&raw, integer_radix)
.map_err(|_err| PreprocessorError::IntegerOverflow)?,
signed,
width,
}))
}
}
fn parse_punctuation(&mut self) -> Result<TokenValue, PreprocessorError> {
let save_point = self.inner.clone();
let char0 = self.inner.next_char().unwrap_or('\0');
let char1 = self.inner.next_char().unwrap_or('\0');
let char2 = self.inner.next_char().unwrap_or('\0');
let maybe_punct = match (char0, char1, char2) {
('<', '<', '=') => Some((Punct::LeftShiftAssign, 3)),
('<', '<', _) => Some((Punct::LeftShift, 2)),
('<', '=', _) => Some((Punct::LessEqual, 2)),
('<', _, _) => Some((Punct::LeftAngle, 1)),
('>', '>', '=') => Some((Punct::RightShiftAssign, 3)),
('>', '>', _) => Some((Punct::RightShift, 2)),
('>', '=', _) => Some((Punct::GreaterEqual, 2)),
('>', _, _) => Some((Punct::RightAngle, 1)),
('+', '+', _) => Some((Punct::Increment, 2)),
('+', '=', _) => Some((Punct::AddAssign, 2)),
('+', _, _) => Some((Punct::Plus, 1)),
('-', '-', _) => Some((Punct::Decrement, 2)),
('-', '=', _) => Some((Punct::SubAssign, 2)),
('-', _, _) => Some((Punct::Minus, 1)),
('&', '&', _) => Some((Punct::LogicalAnd, 2)),
('&', '=', _) => Some((Punct::AndAssign, 2)),
('&', _, _) => Some((Punct::Ampersand, 1)),
('|', '|', _) => Some((Punct::LogicalOr, 2)),
('|', '=', _) => Some((Punct::OrAssign, 2)),
('|', _, _) => Some((Punct::Pipe, 1)),
('^', '^', _) => Some((Punct::LogicalXor, 2)),
('^', '=', _) => Some((Punct::XorAssign, 2)),
('^', _, _) => Some((Punct::Caret, 1)),
('=', '=', _) => Some((Punct::EqualEqual, 2)),
('=', _, _) => Some((Punct::Equal, 1)),
('!', '=', _) => Some((Punct::NotEqual, 2)),
('!', _, _) => Some((Punct::Bang, 1)),
('*', '=', _) => Some((Punct::MulAssign, 2)),
('*', _, _) => Some((Punct::Star, 1)),
('/', '=', _) => Some((Punct::DivAssign, 2)),
('/', _, _) => Some((Punct::Slash, 1)),
('%', '=', _) => Some((Punct::ModAssign, 2)),
('%', _, _) => Some((Punct::Percent, 1)),
('(', _, _) => Some((Punct::LeftParen, 1)),
(')', _, _) => Some((Punct::RightParen, 1)),
('{', _, _) => Some((Punct::LeftBrace, 1)),
('}', _, _) => Some((Punct::RightBrace, 1)),
('[', _, _) => Some((Punct::LeftBracket, 1)),
(']', _, _) => Some((Punct::RightBracket, 1)),
(',', _, _) => Some((Punct::Comma, 1)),
(';', _, _) => Some((Punct::Semicolon, 1)),
(':', _, _) => Some((Punct::Colon, 1)),
('~', _, _) => Some((Punct::Tilde, 1)),
('?', _, _) => Some((Punct::Question, 1)),
// Note that Dot (".") is handled in Lexer::next since it can be
// either punctuation or the start of a floating point number.
_ => None,
};
if let Some((punct, size)) = maybe_punct {
self.inner = save_point;
for _ in 0..size {
self.inner.next_char();
}
Ok(punct.into())
} else if char0 == '#' {
self.inner = save_point;
self.inner.next_char();
Ok(TokenValue::Hash)
} else {
Err(PreprocessorError::UnexpectedCharacter)
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = LexerItem;
fn next(&mut self) -> Option<Self::Item> {
while let Some(current_char) = self.inner.peek_char() {
let had_leading_whitespace = self.leading_whitespace;
self.leading_whitespace = false;
let mut location = Location {
line: self.inner.get_last_seen_line(),
start: self.inner.get_last_seen_start_offset() as u32,
end: 0,
};
let was_start_of_line = self.start_of_line;
self.start_of_line = false;
let value = match current_char {
' ' | '\t' | '\x0b' | '\x0c' | COMMENT_SENTINEL_VALUE => {
if current_char == COMMENT_SENTINEL_VALUE {
self.had_comments = true;
}
self.start_of_line = was_start_of_line;
self.leading_whitespace = true;
self.inner.next_char();
continue;
}
'\n' => {
self.leading_whitespace = true;
self.start_of_line = true;
self.inner.next_char();
Ok(TokenValue::NewLine)
}
c @ '0'..='9' => {
self.inner.next_char();
self.parse_number(c)
}
// Special case . as a punctuation because it can be the start of a float.
'.' => {
self.inner.next_char();
match self.inner.peek_char() {
Some('0'..='9') => self.parse_number('.'),
_ => Ok(TokenValue::Punct(Punct::Dot)),
}
}
_ => {
// TODO: see todo in `parse_identifier` for information
if current_char.is_xid_start() || current_char == '_' {
self.parse_identifier()
} else {
self.parse_punctuation()
}
}
};
location.end = self.inner.get_last_consumed_end_offset() as u32;
return Some(value.map_err(|e| (e, Default::default())).map(|t| Token {
value: t,
location,
leading_whitespace: had_leading_whitespace,
start_of_line: was_start_of_line,
}));
}
// Do the C hack of always ending with a newline so that preprocessor directives are ended.
if !self.start_of_line {
self.start_of_line = true;
let end_offset = self.inner.get_last_consumed_end_offset() as u32;
Some(Ok(Token {
value: TokenValue::NewLine,
location: Location {
line: self.inner.get_last_seen_line(),
start: end_offset,
end: end_offset,
},
leading_whitespace: self.leading_whitespace,
start_of_line: false,
}))
} else {
None
}
}
}

821
vendor/pp-rs/src/lexer_tests.rs vendored Normal file
View File

@@ -0,0 +1,821 @@
use super::lexer::{
CharsAndLine, Lexer, LexerItem, ReplaceComments, SkipBackslashNewline, Token, TokenValue,
COMMENT_SENTINEL_VALUE,
};
use super::token::{Float, Integer, Location, PreprocessorError, Punct};
use std::ops::Range;
fn c(c: char, line: u32) -> Option<(char, u32)> {
Some((c, line))
}
fn l(line: u32, pos: Range<u32>) -> Location {
Location {
line,
start: pos.start,
end: pos.end,
}
}
fn unwrap_token(item: Option<LexerItem>) -> Token {
item.unwrap().unwrap()
}
fn unwrap_token_value(item: Option<LexerItem>) -> TokenValue {
unwrap_token(item).value
}
fn unwrap_error(item: Option<LexerItem>) -> PreprocessorError {
item.unwrap().unwrap_err().0
}
fn expect_lexer_end(lexer: &mut Lexer) {
assert_eq!(unwrap_token_value(lexer.next()), TokenValue::NewLine);
assert_eq!(lexer.next(), None);
}
impl From<i32> for TokenValue {
fn from(value: i32) -> Self {
TokenValue::Integer(Integer {
value: value as u64,
signed: true,
width: 32,
})
}
}
impl From<u32> for TokenValue {
fn from(value: u32) -> Self {
TokenValue::Integer(Integer {
value: value as u64,
signed: false,
width: 32,
})
}
}
impl From<f32> for TokenValue {
fn from(value: f32) -> Self {
TokenValue::Float(Float { value, width: 32 })
}
}
#[test]
fn chars_and_location() {
// Test handling of characters in a line.
let mut it = CharsAndLine::new("abc");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), c('c', 1));
assert_eq!(it.next(), None);
// Test handling of \n in the regular case.
let mut it = CharsAndLine::new("a\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \r in the regular case.
let mut it = CharsAndLine::new("a\rb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \n\r.
let mut it = CharsAndLine::new("a\n\rb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \r\n.
let mut it = CharsAndLine::new("a\r\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of a mix of \r and \n
let mut it = CharsAndLine::new("\n\r\n\r\r\r\n");
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('\n', 2));
assert_eq!(it.next(), c('\n', 3));
assert_eq!(it.next(), c('\n', 4));
assert_eq!(it.next(), None);
// Unicode handling
let mut it = CharsAndLine::new("a→üs🦀");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('→', 1));
assert_eq!(it.next(), c('ü', 1));
assert_eq!(it.next(), c('s', 1));
assert_eq!(it.next(), c('🦀', 1));
assert_eq!(it.next(), None);
}
#[test]
fn skip_backslash_newline() {
// Test a simple case.
let mut it = SkipBackslashNewline::new("a\\\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test a double case that requires the loop in the algorithm.
let mut it = SkipBackslashNewline::new("a\\\n\\\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 3));
assert_eq!(it.next(), None);
// Test a backslash on its own
let mut it = SkipBackslashNewline::new("a\\b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\\', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), None);
// Test a case just before EOF
let mut it = SkipBackslashNewline::new("\\\n");
assert_eq!(it.next(), None);
}
#[test]
fn replace_comments() {
// Test a slash that's not a comment
let mut it = ReplaceComments::new("a/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('/', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), None);
// Test a slash with nothing afterwards
let mut it = ReplaceComments::new("a/");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('/', 1));
assert_eq!(it.next(), None);
// Test a single-line comment
let mut it = ReplaceComments::new("a//foo\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test a single-line comment without an ending newline
let mut it = ReplaceComments::new("//foo");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a single-line comment without nothing afterwards
let mut it = ReplaceComments::new("//");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a single-line comment with a line continuation
let mut it = ReplaceComments::new("//foo\\\na");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment
let mut it = ReplaceComments::new("a/*fo\n\no*/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), c('b', 3));
assert_eq!(it.next(), None);
// Test a multi-line comment, without a proper ending (only the *)
let mut it = ReplaceComments::new("a/*fo\n\no*");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment, without a proper ending (nothing)
let mut it = ReplaceComments::new("a/*fo\n\no");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment, or /*/ not being a complete one
let mut it = ReplaceComments::new("a/*/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
}
#[test]
fn lex_whitespace() {
// Empty input gives nothing.
let mut it = Lexer::new("");
assert_eq!(it.next(), None);
// Pure whitespace give nothing too
let mut it = Lexer::new("/**/\t //a");
assert_eq!(it.next(), None);
}
#[test]
fn lex_newline() {
let mut it = Lexer::new("\r\n\n");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(it.next(), None);
// Check a newline is added only if the last token wasn't a newline
let mut it = Lexer::new("\r\n\n\t/**/ //");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
expect_lexer_end(&mut it);
let mut it = Lexer::new("\r\n\n#");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(it.next(), None);
}
#[test]
fn lex_hash() {
let mut it = Lexer::new("a#b");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Hash);
assert_eq!(token.location, l(1, 1..2));
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("b".to_string())
);
expect_lexer_end(&mut it);
let mut it = Lexer::new("\nvoid #");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("void".into())
);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Hash);
assert_eq!(token.location, l(2, 6..7));
expect_lexer_end(&mut it);
}
#[test]
fn lex_metadata() {
// Test the metadata of the first token
let mut it = Lexer::new("1");
assert_eq!(
unwrap_token(it.next()),
Token {
value: 1.into(),
location: l(1, 0..1),
leading_whitespace: true,
start_of_line: true
}
);
expect_lexer_end(&mut it);
// Test various whitespaces are recognized (or lack of)
let mut it = Lexer::new(" 1/*\n*/2\t3+\n4");
// 1 is the first token and the whitespace doesn't prevent it from being the start of the
// line
assert_eq!(
unwrap_token(it.next()),
Token {
value: 1.into(),
location: l(1, 1..2),
leading_whitespace: true,
start_of_line: true
}
);
// 2 is not at the start of the line because the \n in the /**/ doesn't count, however its
// location correctly lists the second line.
assert_eq!(
unwrap_token(it.next()),
Token {
value: 2.into(),
location: l(2, 7..8),
leading_whitespace: true,
start_of_line: false
}
);
assert_eq!(
unwrap_token(it.next()),
Token {
value: 3.into(),
location: l(2, 9..10),
leading_whitespace: true,
start_of_line: false
}
);
// + doesn't have a leading whitespace
assert_eq!(
unwrap_token(it.next()),
Token {
value: Punct::Plus.into(),
location: l(2, 10..11),
leading_whitespace: false,
start_of_line: false
}
);
// The newline is correctly tagged on the preceeding line
assert_eq!(
unwrap_token(it.next()),
Token {
value: TokenValue::NewLine,
location: l(2, 11..12),
leading_whitespace: false,
start_of_line: false
}
);
// 4 is after a newline that correctly sets start_of_line
assert_eq!(
unwrap_token(it.next()),
Token {
value: 4.into(),
location: l(3, 12..13),
leading_whitespace: true,
start_of_line: true
}
);
// The final newline added by the lexer is at the correct position
assert_eq!(
unwrap_token(it.next()),
Token {
value: TokenValue::NewLine,
location: l(3, 13..13),
leading_whitespace: false,
start_of_line: false
}
);
assert_eq!(it.next(), None);
}
#[test]
fn lex_identifiers() {
// Test some basic identifier cases
let mut it = Lexer::new("foo BA_R baz0");
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("foo".to_string()));
assert_eq!(token.location, l(1, 0..3),);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("BA_R".to_string()));
assert_eq!(token.location, l(1, 4..8),);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("baz0".to_string()));
assert_eq!(token.location, l(1, 9..13),);
expect_lexer_end(&mut it);
// Test _ is a valid identifier
let mut it = Lexer::new("_");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("_".to_string())
);
expect_lexer_end(&mut it);
// Test that identifiers are not split by escaped newlines
let mut it = Lexer::new("a\\\nb");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("ab".to_string())
);
expect_lexer_end(&mut it);
// Test that identifiers are split by other whitespace like /**/
let mut it = Lexer::new("a/**/b");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("b".to_string())
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_decimal() {
// Test some basic cases
let mut it = Lexer::new("1 0u 42 65536U");
assert_eq!(unwrap_token_value(it.next()), 1.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, 0u32.into());
assert_eq!(token.location, l(1, 2..4),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 42.into());
assert_eq!(token.location, l(1, 5..7),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 65536u32.into());
assert_eq!(token.location, l(1, 8..14),);
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("31ab");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("ab".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("31/**/32");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("31+32");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("18446744073709551616");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("18446744073709551615");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
// Check that the 16bit or 64bit suffixes produce errors (for now).
let mut it = Lexer::new("13s");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13S");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13l");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("13L");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
// Check that they produce unsupported errors even if they happen with a unsigned suffix too.
let mut it = Lexer::new("13uS");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13Ul");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
}
#[test]
fn lex_hexadecimal() {
// Test some basic cases
let mut it = Lexer::new("0x1 0X0u 0xBaFfe 0XcaFeU");
assert_eq!(unwrap_token_value(it.next()), 1.into());
assert_eq!(unwrap_token_value(it.next()), 0u32.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, 0xBAFFE.into());
assert_eq!(token.location, l(1, 9..16),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 0xCAFEu32.into());
assert_eq!(token.location, l(1, 17..24),);
expect_lexer_end(&mut it);
// Test with redundant zeroes
let mut it = Lexer::new("0x000 0x000000000000001");
assert_eq!(unwrap_token_value(it.next()), 0.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("0x31zb");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("zb".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("0x31/**/32");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("0x31+32");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("0x10000000000000000");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("0xFFFFFFFFFFFFFFFF");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_octal() {
// Test some basic cases
let mut it = Lexer::new("01 00u 07654 01234u");
assert_eq!(unwrap_token_value(it.next()), 1.into());
assert_eq!(unwrap_token_value(it.next()), 0u32.into());
assert_eq!(unwrap_token_value(it.next()), 4012.into());
assert_eq!(unwrap_token_value(it.next()), 668u32.into());
expect_lexer_end(&mut it);
// Test with redundant zeroes
let mut it = Lexer::new("0000 0000000000000001");
assert_eq!(unwrap_token_value(it.next()), 0.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("031zb");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("zb".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("031/**/32");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// TODO(kangz): Fix octal numbers consuming 8 and 9s as well. This can be done with extra logic
// already but is not worth the complexity.
// Test splitting with 8 and 9
// let mut it = Lexer::new("039 038");
// assert_eq!(unwrap_token_value(it.next()), 3.into());
// assert_eq!(unwrap_token_value(it.next()), 9.into());
// assert_eq!(unwrap_token_value(it.next()), 3.into());
// assert_eq!(unwrap_token_value(it.next()), 8.into());
// expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("031+32");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("02000000000000000000000");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("01777777777777777777777");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_float() {
// Test a couple simple cases.
let mut it = Lexer::new("1.0 0.0");
assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with a leading .
let mut it = Lexer::new(".99 0.01 .00000000");
assert_eq!(unwrap_token_value(it.next()), 0.99f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.01f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with nothing after the .
let mut it = Lexer::new("42. 0.");
assert_eq!(unwrap_token_value(it.next()), 42.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with the float suffix
let mut it = Lexer::new("1000.f 1.f .2f");
assert_eq!(unwrap_token_value(it.next()), 1000.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.2f32.into());
expect_lexer_end(&mut it);
// Test parsing with exponents
// - with / without float suffixes
// - at different points in the float parsing.
let mut it = Lexer::new("3e10 4.1e-10f .01e12F 4.1e+10f");
assert_eq!(unwrap_token_value(it.next()), 3e10f32.into());
assert_eq!(unwrap_token_value(it.next()), 4.1e-10f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.01e12f32.into());
assert_eq!(unwrap_token_value(it.next()), 4.1e+10f32.into());
expect_lexer_end(&mut it);
// Test parsing with exponents
// - After values looking like octal integer (works)
// - After values looking like hexadecimal integer (doesn't work)
let mut it = Lexer::new("05e2 0x1e-2");
assert_eq!(unwrap_token_value(it.next()), 5e2f32.into());
assert_eq!(unwrap_token_value(it.next()), 0x1Ei32.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Minus.into());
assert_eq!(unwrap_token_value(it.next()), 2i32.into());
// Test parsing with nothing valid after the 'e' (technically it shouldn't
// be an error, but there's no language where that sequence of token is
// valid.
let mut it = Lexer::new("1.0e");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::FloatParsingError
);
// Check that 16bit and 64bit suffixes produce errors
let mut it = Lexer::new("1.0l");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("1.0L");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("1.0h");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("1.0H");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
}
#[test]
fn lex_punctuation() {
// Test parsing some of the token (but not all, that'd be too many tests!)
let mut it = Lexer::new("+ != <<=");
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, Punct::NotEqual.into());
assert_eq!(token.location, l(1, 2..4),);
let token = unwrap_token(it.next());
assert_eq!(token.value, Punct::LeftShiftAssign.into());
assert_eq!(token.location, l(1, 5..8),);
expect_lexer_end(&mut it);
// Test parsing a token that's a prefix of another one just before EOF
let mut it = Lexer::new("<");
assert_eq!(unwrap_token_value(it.next()), Punct::LeftAngle.into());
expect_lexer_end(&mut it);
// Test \\\n doesn't split the token
let mut it = Lexer::new("=\\\n=");
assert_eq!(unwrap_token_value(it.next()), Punct::EqualEqual.into());
expect_lexer_end(&mut it);
// Test whitespace splits the token
let mut it = Lexer::new("+/**/=");
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Equal.into());
expect_lexer_end(&mut it);
// Test a number stops processing the token
let mut it = Lexer::new("!1");
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test an identifier stops processing the token
let mut it = Lexer::new("&a");
assert_eq!(unwrap_token_value(it.next()), Punct::Ampersand.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
expect_lexer_end(&mut it);
// Test whitespace splits the token
let mut it = Lexer::new(">/**/>");
assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
expect_lexer_end(&mut it);
// Test that tokens are parsed greedily: `a+++++b` is `a ++ ++ + b` (invalid GLSL) and not
// `(a ++) + (++ b)` (valid GLSL)
let mut it = Lexer::new("+++++");
assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
expect_lexer_end(&mut it);
// Test that an invalid char produces and error
let mut it = Lexer::new("@");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::UnexpectedCharacter
);
// Extra punctuation tests for code coverage.
let mut it = Lexer::new("<= >= += -= &= || |= | ^= { } ] ? .");
assert_eq!(unwrap_token_value(it.next()), Punct::LessEqual.into());
assert_eq!(unwrap_token_value(it.next()), Punct::GreaterEqual.into());
assert_eq!(unwrap_token_value(it.next()), Punct::AddAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::SubAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::AndAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::LogicalOr.into());
assert_eq!(unwrap_token_value(it.next()), Punct::OrAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Pipe.into());
assert_eq!(unwrap_token_value(it.next()), Punct::XorAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::LeftBrace.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightBrace.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightBracket.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Question.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Dot.into());
expect_lexer_end(&mut it);
}
#[test]
fn lex_had_comments() {
// Test that had_comments doesn't get set to true if there is no comments.
let mut it = Lexer::new("#version");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert!(!it.had_comments());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("version".to_string())
);
assert!(!it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments doesn't get triggered by its sentinel value of '\r'
let mut it = Lexer::new("\r!");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert!(!it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments gets triggered by // comments
let mut it = Lexer::new("//\n!");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert!(it.had_comments());
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert!(it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments doesn't gets triggered by /**/ comments
let mut it = Lexer::new("/**/#version");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert!(it.had_comments());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("version".to_string())
);
assert!(it.had_comments());
expect_lexer_end(&mut it);
}
// TODO test has_whitespace

11
vendor/pp-rs/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,11 @@
extern crate unicode_xid;
#[allow(clippy::match_like_matches_macro)]
mod lexer;
pub mod pp;
pub mod token;
#[cfg(test)]
mod lexer_tests;
#[cfg(test)]
mod pp_tests;

1034
vendor/pp-rs/src/pp.rs vendored Normal file

File diff suppressed because it is too large Load Diff

457
vendor/pp-rs/src/pp/if_parser.rs vendored Normal file
View File

@@ -0,0 +1,457 @@
use crate::token::{Integer, PreprocessorError, Punct};
use super::{Define, Location, MacroProcessor, MeLexer, Step, StepExit, Token, TokenValue};
use std::{collections::HashMap, convert::TryInto, rc::Rc, vec};
struct IfLexer<'macros> {
tokens: vec::IntoIter<Token>,
defines: &'macros HashMap<String, Rc<Define>>,
}
pub(super) struct IfParser<'macros> {
lexer: IfLexer<'macros>,
macro_processor: MacroProcessor,
location: Location,
parsing_if: bool,
carry: Option<Token>,
}
impl<'macros> IfParser<'macros> {
/// Builds a new IfParser that can be reused
///
/// `parsing_if` indicates wether or not non defined macros should be
/// replaced with 0
pub fn new(
tokens: Vec<Token>,
defines: &'macros HashMap<String, Rc<Define>>,
location: Location,
parsing_if: bool,
) -> Self {
IfParser {
lexer: IfLexer {
tokens: tokens.into_iter(),
defines,
},
macro_processor: MacroProcessor::default(),
location,
parsing_if,
carry: None,
}
}
/// Helper method to consume the next token without define expansion
fn raw_next(&mut self) -> Option<Token> {
self.carry
.take()
.or_else(|| self.macro_processor.step(&mut self.lexer).ok())
}
/// Helper method to consume the next token with define expansion
fn next(&mut self) -> Step<Option<Token>> {
let token = match self.raw_next() {
Some(t) => t,
None => return Ok(None),
};
Ok(match token.value {
TokenValue::Ident(ref name) if name != "defined" => {
match self.add_define(name, token.location)? {
Some(t) => Some(t),
None => self.next()?,
}
}
_ => Some(token),
})
}
/// Helper method to get the next token with define expansion
pub fn peek(&mut self) -> Step<Option<Token>> {
self.carry = self.next()?;
Ok(self.carry.clone())
}
/// Helper method to consume the next token without define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_raw_next(&mut self) -> Step<Token> {
self.raw_next().ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
/// Helper method to consume the next token with define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_next(&mut self) -> Step<Token> {
self.next()?.ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
/// Helper method to get the next token with define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_peek(&mut self) -> Step<Token> {
self.peek()?.ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
fn add_define(&mut self, name: &str, location: Location) -> Step<Option<Token>> {
if self
.macro_processor
.start_define_invocation(name, location, &mut self.lexer)?
{
Ok(None)
} else if self.parsing_if {
Ok(Some(Token {
value: TokenValue::Integer(Integer {
value: 0,
signed: true,
width: 64,
}),
location,
}))
} else {
Err(StepExit::Error((
PreprocessorError::UnexpectedToken(TokenValue::Ident(name.to_string())),
location,
)))
}
}
fn handle_defined(&mut self) -> Step<i64> {
let next = self.expect_raw_next()?;
match next.value {
TokenValue::Ident(ref name) => Ok(self.lexer.defines.get(name).is_some() as i64),
TokenValue::Punct(Punct::LeftParen) => {
let name_token = self.expect_raw_next()?;
let name = match name_token.value {
TokenValue::Ident(name) => Ok(name),
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
name_token.location,
))),
}?;
let close_brace = self.expect_next()?;
match close_brace.value {
TokenValue::Punct(Punct::RightParen) => {
Ok(self.lexer.defines.get(&name).is_some() as i64)
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
close_brace.location,
))),
}
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
next.location,
))),
}
}
fn parse_atom(&mut self) -> Step<i64> {
let token = self.expect_next()?;
match token.value {
TokenValue::Ident(name) => {
debug_assert_eq!(name, "defined");
self.handle_defined()
}
TokenValue::Integer(int) => Ok(int.value as i64),
TokenValue::Punct(Punct::LeftParen) => {
let val = self.parse_logical_or()?;
let close_brace = self.expect_next()?;
match close_brace.value {
TokenValue::Punct(Punct::RightParen) => Ok(val),
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
close_brace.location,
))),
}
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
token.location,
))),
}
}
fn parse_unary(&mut self) -> Step<i64> {
match self.expect_peek()?.value {
TokenValue::Punct(punct) => match punct {
Punct::Plus | Punct::Minus | Punct::Bang | Punct::Tilde => {
self.next()?;
let val = self.parse_unary()?;
Ok(match punct {
Punct::Plus => val,
Punct::Minus => -val,
Punct::Bang => (val == 0) as i64,
Punct::Tilde => !val,
_ => unreachable!(),
})
}
_ => self.parse_atom(),
},
_ => self.parse_atom(),
}
}
fn parse_multiplicative(&mut self) -> Step<i64> {
let mut left = self.parse_unary()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::Star | Punct::Slash | Punct::Percent = punct {
self.next()?;
let right = self.parse_unary()?;
match punct {
Punct::Star => {
left = left.checked_mul(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::Slash => {
left = left.checked_div(right).ok_or(StepExit::Error((
PreprocessorError::DivisionByZero,
self.location,
)))?
}
Punct::Percent => {
left = left.checked_rem(right).ok_or(StepExit::Error((
PreprocessorError::DivisionByZero,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_additive(&mut self) -> Step<i64> {
let mut left = self.parse_multiplicative()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::Plus | Punct::Minus = punct {
self.next()?;
let right = self.parse_multiplicative()?;
match punct {
Punct::Plus => {
left = left.checked_add(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::Minus => {
left = left.checked_sub(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_shift(&mut self) -> Step<i64> {
let mut left = self.parse_additive()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::LeftShift | Punct::RightShift = punct {
self.next()?;
let right = self.parse_additive()?;
match punct {
Punct::LeftShift => {
let right = right.try_into().map_err(|_| {
StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
})?;
left = left.checked_shl(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::RightShift => {
let right = right.try_into().map_err(|_| {
StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
})?;
left = left.checked_shr(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_comparative(&mut self) -> Step<i64> {
let mut left = self.parse_shift()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::LeftAngle | Punct::RightAngle | Punct::LessEqual | Punct::GreaterEqual =
punct
{
self.next()?;
let right = self.parse_shift()?;
match punct {
Punct::LeftAngle => left = (left < right) as i64,
Punct::RightAngle => left = (left > right) as i64,
Punct::LessEqual => left = (left <= right) as i64,
Punct::GreaterEqual => left = (left >= right) as i64,
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_equality(&mut self) -> Step<i64> {
let mut left = self.parse_comparative()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::EqualEqual | Punct::NotEqual = punct {
self.next()?;
let right = self.parse_comparative()?;
match punct {
Punct::EqualEqual => left = (left == right) as i64,
Punct::NotEqual => left = (left != right) as i64,
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_bit_and(&mut self) -> Step<i64> {
let mut left = self.parse_equality()?;
while let Some(TokenValue::Punct(Punct::Ampersand)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_equality()?;
left &= right
}
Ok(left)
}
fn parse_bit_xor(&mut self) -> Step<i64> {
let mut left = self.parse_bit_and()?;
while let Some(TokenValue::Punct(Punct::Caret)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_and()?;
left ^= right
}
Ok(left)
}
fn parse_bit_or(&mut self) -> Step<i64> {
let mut left = self.parse_bit_xor()?;
while let Some(TokenValue::Punct(Punct::Pipe)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_xor()?;
left |= right
}
Ok(left)
}
fn parse_logical_and(&mut self) -> Step<i64> {
let mut left = self.parse_bit_or()?;
while let Some(TokenValue::Punct(Punct::LogicalAnd)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_or()?;
left = (left != 0 && right != 0) as i64;
}
Ok(left)
}
fn parse_logical_or(&mut self) -> Step<i64> {
let mut left = self.parse_logical_and()?;
while let Some(TokenValue::Punct(Punct::LogicalOr)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_logical_and()?;
left = (left != 0 || right != 0) as i64;
}
Ok(left)
}
pub fn evaluate_expression(&mut self) -> Step<i64> {
self.parse_logical_or()
}
}
impl<'macros> MeLexer for IfLexer<'macros> {
fn step(&mut self) -> Step<Token> {
self.tokens.next().ok_or(StepExit::Finished)
}
fn get_define(&self, name: &str) -> Option<&Rc<Define>> {
self.defines.get(name)
}
fn apply_line_offset(&self, line: u32, _: Location) -> Step<u32> {
Ok(line)
}
}

1413
vendor/pp-rs/src/pp_tests.rs vendored Normal file

File diff suppressed because it is too large Load Diff

159
vendor/pp-rs/src/token.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//TODO: Source file
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Location {
/// Byte offset into the source string where the first char begins
pub start: u32,
/// Byte offset into the source string where the first char not belonging to
/// this `Location` begins
pub end: u32,
/// used internally in the `#line` directive and the `__LINE__` macro
pub(crate) line: u32,
}
impl Default for Location {
fn default() -> Self {
Location {
start: 0,
end: 0,
line: 1,
}
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum Punct {
// Compound assignments
AddAssign,
SubAssign,
MulAssign,
DivAssign,
ModAssign,
LeftShiftAssign,
RightShiftAssign,
AndAssign,
XorAssign,
OrAssign,
// Two character punctuation
Increment,
Decrement,
LogicalAnd,
LogicalOr,
LogicalXor,
LessEqual,
GreaterEqual,
EqualEqual,
NotEqual,
LeftShift,
RightShift,
// Parenthesis or similar
LeftBrace,
RightBrace,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
// Other one character punctuation
LeftAngle,
RightAngle,
Semicolon,
Comma,
Colon,
Dot,
Equal,
Bang,
Minus,
Tilde,
Plus,
Star,
Slash,
Percent,
Pipe,
Caret,
Ampersand,
Question,
}
#[derive(Clone, PartialEq, Debug)]
// TODO location?
pub enum PreprocessorError {
IntegerOverflow,
FloatParsingError,
UnexpectedCharacter,
UnexpectedToken(TokenValue),
UnexpectedHash,
UnexpectedNewLine,
UnexpectedEndOfInput,
TooFewDefineArguments,
TooManyDefineArguments,
ErrorDirective,
DuplicateParameter,
UnknownDirective,
DefineRedefined,
ElifOutsideOfBlock,
ElseOutsideOfBlock,
EndifOutsideOfBlock,
ElifAfterElse,
MoreThanOneElse,
UnfinishedBlock,
LineOverflow,
NotSupported16BitLiteral,
NotSupported64BitLiteral,
MacroNotDefined,
RecursionLimitReached,
DivisionByZero,
RemainderByZero,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Integer {
pub value: u64,
pub signed: bool,
pub width: i32,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Float {
pub value: f32,
pub width: i32,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Version {
pub tokens: Vec<Token>,
pub is_first_directive: bool,
pub has_comments_before: bool,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Extension {
pub tokens: Vec<Token>,
pub has_non_directive_before: bool,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Pragma {
pub tokens: Vec<Token>,
}
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
Ident(String),
Integer(Integer),
Float(Float),
Punct(Punct),
Version(Version),
Extension(Extension),
Pragma(Pragma),
}
#[derive(Clone, PartialEq, Debug)]
pub struct Token {
pub value: TokenValue,
pub location: Location,
// TODO macro invocation stack?
}