Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1
vendor/pp-rs/.cargo-checksum.json vendored Normal file
View File

@@ -0,0 +1 @@
{"files":{"Cargo.toml":"f0f19a5b1f5b882eacf1098c1c7148e55a64b767d06295944fbcdc240b62497d","LICENSE":"f1f86507901c4e35cdf3454a62ef96bc998f9ae9602e47975268655f3510fe29","README.md":"8d41bf0ee1c02264e2c1adc7e7cc422fe426bfeb8aef4f7eea3dd0e9ea06025d","src/lexer.rs":"26c4f8a6d888fe663d7bac147bf5a3e6a85d0778eccf06ee20dfe4887fac4c36","src/lexer_tests.rs":"e659469db529ec52c1aa2c40ee4ee12d8201bbbb965a99fcd8aa1048bc4fa377","src/lib.rs":"6b3c084d310f60957f506d02a9191655566b649c7e669353f8535dc2e347dfdf","src/pp.rs":"bf104d5ac0736b5e38b3293b754434a9d49cd199d8836d8f6a12100235e82ec2","src/pp/if_parser.rs":"c8deb5d5683a692bca51c99b855bb1f8606ab6abf8ca583f41dbe2b00603fb98","src/pp_tests.rs":"e381a929f93694b696ea0e322309cd7af0d474ba0379a1684de76f160fe65160","src/token.rs":"b1b37c2be6b178a76229b9b4991e716834afeeff29e4bd9c67db80c812d4006f"},"package":"bb458bb7f6e250e6eb79d5026badc10a3ebb8f9a15d1fff0f13d17c71f4d6dee"}

23
vendor/pp-rs/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,23 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "pp-rs"
version = "0.2.1"
authors = ["pp-rs Developers"]
description = "Shader preprocessor"
homepage = "https://github.com/Kangz/glslpp-rs"
keywords = ["shader", "preprocessor", "glsl"]
license = "BSD-3-Clause"
repository = "https://github.com/Kangz/glslpp-rs"
[dependencies.unicode-xid]
version = "0.2"

29
vendor/pp-rs/LICENSE vendored Normal file
View File

@@ -0,0 +1,29 @@
BSD 3-Clause License
Copyright (c) 2020, Corentin Wallez
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

12
vendor/pp-rs/README.md vendored Normal file
View File

@@ -0,0 +1,12 @@
# (glsl)pp-rs
<!-- Some Matrix channel, like Naga's? -->
[![Crates.io](https://img.shields.io/crates/v/pp-rs.svg?label=pp-rs)](https://crates.io/crates/pp-rs)
[![Docs.rs](https://docs.rs/pp-rs/badge.svg)](https://docs.rs/pp-rs)
[![Build Status](https://github.com/Kangz/glslpp-rs/workflows/pipeline/badge.svg)](https://github.com/Kangz/glslpp-rs/actions)
[![codecov](https://codecov.io/gh/Kangz/glslpp-rs/branch/main/graph/badge.svg?token=CMM90W97YO)](https://codecov.io/gh/Kangz/glslpp-rs)
A shader preprocessor and lexer in Rust.
**WARNING**: this crate is a work-in-progress: its interface will change and documentation is extremely lacking.

628
vendor/pp-rs/src/lexer.rs vendored Normal file
View File

@@ -0,0 +1,628 @@
use crate::token::{Float, Integer, Location, PreprocessorError, Punct};
use std::str::Chars;
use unicode_xid::UnicodeXID;
type CharAndLine = (char, u32);
// GLSL ES 3.20 specification section 3.10. Logical Phases of Compilation
// This iterator implements phases 4 and 5 of the logical phases of compilation:
//
// 4. Each {carriage-return, line-feed} and {line-feed, carriage return} sequence is replaced by
// a single newline. All remaining carriage-return and line-feed characters are then each
// replaced by a newline.
//
// 5. Line numbering for each character, which is equal to the number of preceding newlines plus
// one, is noted. Note this can only be subsequently changed by the #line directive and is not
// affected by the removal of newlines in phase 6 of compilation.
//
// It expects that phases 1 to 3 are already done and that valid utf8 is passed in.
#[derive(Clone)]
pub struct CharsAndLine<'a> {
inner: Chars<'a>,
line: u32,
}
impl<'a> CharsAndLine<'a> {
pub fn new(input: &'a str) -> Self {
CharsAndLine {
inner: input.chars(),
line: 1,
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.as_str().as_ptr()
}
}
impl<'a> Iterator for CharsAndLine<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let current = self.inner.next()?;
match current {
'\n' => {
// Consume the token but see if we can grab a \r that follows
let mut peek_inner = self.inner.clone();
if peek_inner.next() == Some('\r') {
self.inner = peek_inner;
}
let res = Some(('\n', self.line));
self.line += 1;
res
}
'\r' => {
// Consume the token but see if we can grab a \n that follows
let mut peek_inner = self.inner.clone();
if peek_inner.next() == Some('\n') {
self.inner = peek_inner;
}
let res = Some(('\n', self.line));
self.line += 1;
res
}
_ => Some((current, self.line)),
}
}
}
// An iterator that adds stage 6 on top of CharsAndLocation:
//
// 6. Wherever a backslash ('\') occurs immediately before a newline, both are deleted. Note that
// no whitespace is substituted, thereby allowing a single preprocessing token to span a
// newline. This operation is not recursive; any new {backslash newline} sequences generated
// are not removed.
#[derive(Clone)]
pub struct SkipBackslashNewline<'a> {
inner: CharsAndLine<'a>,
}
impl<'a> SkipBackslashNewline<'a> {
pub fn new(input: &'a str) -> Self {
SkipBackslashNewline {
inner: CharsAndLine::new(input),
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.get_current_ptr()
}
}
impl<'a> Iterator for SkipBackslashNewline<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let mut current = self.inner.next()?;
while current.0 == '\\' {
let mut peek_inner = self.inner.clone();
if let Some(('\n', _)) = peek_inner.next() {
self.inner = peek_inner;
current = self.next()?;
} else {
return Some(current);
}
}
Some(current)
}
}
// An iterator that adds stage 7 on top of SkipBackslashNewline:
//
// 7. All comments are replaced with a single space. All (non-zero) characters and invalid UTF-8
// byte sequences are allowed within comments. '//' style comments include the initial '//'
// marker and continue up to, but not including, the terminating newline. '/…/' comments
// include both the start and end marker.
#[derive(Clone)]
pub struct ReplaceComments<'a> {
inner: SkipBackslashNewline<'a>,
}
// The lexer wants to know when whitespace is a comment to know if a comment was ever processed.
// To avoid adding state we use a sentinel value of '\r' because all '\r' have been consumed and
// turned into '\n' by CharsAndLocation.
pub const COMMENT_SENTINEL_VALUE: char = '\r';
impl<'a> ReplaceComments<'a> {
pub fn new(input: &'a str) -> Self {
ReplaceComments {
inner: SkipBackslashNewline::new(input),
}
}
pub fn get_current_ptr(&self) -> *const u8 {
self.inner.get_current_ptr()
}
}
impl<'a> Iterator for ReplaceComments<'a> {
type Item = CharAndLine;
fn next(&mut self) -> Option<Self::Item> {
let current = self.inner.next()?;
if current.0 != '/' {
debug_assert!(current.0 != COMMENT_SENTINEL_VALUE);
return Some(current);
}
let mut peek_inner = self.inner.clone();
match peek_inner.next() {
// The // case, consume until but not including the next \n
Some(('/', _)) => {
self.inner = peek_inner.clone();
while let Some((next, _)) = peek_inner.next() {
if next == '\n' {
break;
}
self.inner = peek_inner.clone();
}
Some((COMMENT_SENTINEL_VALUE, current.1))
}
// The /*, consume until the next */
Some(('*', _)) => {
let mut was_star = false;
while let Some((next, _)) = peek_inner.next() {
if was_star && next == '/' {
break;
}
was_star = next == '*';
}
self.inner = peek_inner;
Some((COMMENT_SENTINEL_VALUE, current.1))
}
// Not // or /*, do nothing
_ => Some(current),
}
}
}
// A lexer for GLSL tokens that also emits a couple extra tokens that are useful to the
// preprocessor: # and newlines. It also include metadata for the token for whether it is at the
// start of the line, or if it has leading whitespace.
// This is a helper iterator to abstract away the tracking of location data (offset, line) from
// `Lexer`. It looks like a Peekable<Iterator<char>> with `next_char` and `peek_char` but also
// allows querying the last seen/consumed lines / offset.
#[derive(Clone)]
struct LexerCharIterator<'a> {
inner: ReplaceComments<'a>,
peeked: Option<(CharAndLine, *const u8)>,
last_consumed: (CharAndLine, *const u8),
input_start: *const u8,
}
pub const NONE_CONSUMED_SENTINEL_VALUE: char = '\r';
impl<'a> LexerCharIterator<'a> {
pub fn new(input: &'a str) -> Self {
LexerCharIterator {
inner: ReplaceComments::new(input),
peeked: None,
last_consumed: ((NONE_CONSUMED_SENTINEL_VALUE, 0), input.as_bytes().as_ptr()),
input_start: input.as_bytes().as_ptr(),
}
}
fn next_char(&mut self) -> Option<char> {
self.last_consumed = match self.peeked.take() {
Some(v) => v,
None => {
let ptr = self.inner.get_current_ptr();
(self.inner.next()?, ptr)
}
};
Some(self.last_consumed.0 .0)
}
fn peek_char(&mut self) -> Option<char> {
match self.peeked {
Some(v) => Some(v.0 .0),
None => {
let ptr = self.inner.get_current_ptr();
let next = self.inner.next()?;
self.peeked = Some((next, ptr));
Some(next.0)
}
}
}
fn get_last_seen_line(&self) -> u32 {
self.peeked.unwrap_or(self.last_consumed).0 .1
}
fn get_last_seen_start_offset(&self) -> usize {
self.peeked.unwrap_or(self.last_consumed).1 as usize - self.input_start as usize
}
fn get_last_consumed_end_offset(&self) -> usize {
self.last_consumed.1 as usize - self.input_start as usize
+ self.last_consumed.0 .0.len_utf8()
}
}
// A superset of the token value returned by the preprocessor
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
// Preprocessor specific token values
Hash,
NewLine,
// Regular token values
Ident(String),
Integer(Integer),
Float(Float),
Punct(Punct),
}
impl From<Punct> for TokenValue {
fn from(punct: Punct) -> Self {
TokenValue::Punct(punct)
}
}
#[derive(Clone, PartialEq, Debug)]
pub struct Token {
pub value: TokenValue,
pub location: Location,
pub leading_whitespace: bool,
pub start_of_line: bool,
}
pub type LexerItem = Result<Token, (PreprocessorError, Location)>;
pub struct Lexer<'a> {
inner: LexerCharIterator<'a>,
leading_whitespace: bool,
start_of_line: bool,
had_comments: bool,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
// TODO bail out on source that is too large.
Lexer {
inner: LexerCharIterator::new(input),
leading_whitespace: true,
start_of_line: true,
had_comments: false,
}
}
pub fn had_comments(&self) -> bool {
self.had_comments
}
// TODO: Make a runtime flag to toggle unicode identifiers support
// The glsl spec only allows ascii identifiers
fn parse_identifier(&mut self) -> Result<TokenValue, PreprocessorError> {
let mut identifier = String::default();
if let Some(c) = self.next_char_if(|c| c.is_xid_start() || c == '_') {
identifier.push(c);
}
let rest = self.consume_chars(|c| c.is_xid_continue());
identifier.push_str(&rest);
// TODO check if identifier is larger than the limit.
Ok(TokenValue::Ident(identifier))
}
fn parse_integer_signedness_suffix(&mut self) -> bool {
self.next_char_if(|c| c == 'u' || c == 'U').is_none()
}
fn parse_integer_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
match self.inner.peek_char() {
Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
Some('s') | Some('S') => Err(PreprocessorError::NotSupported16BitLiteral),
_ => Ok(32),
}
}
fn parse_float_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
match self.inner.peek_char() {
Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
Some('h') | Some('H') => Err(PreprocessorError::NotSupported16BitLiteral),
Some('f') | Some('F') => {
self.inner.next_char();
Ok(32)
}
_ => Ok(32),
}
}
fn next_char_if(&mut self, predicate: impl FnOnce(char) -> bool) -> Option<char> {
if let Some(c) = self.inner.peek_char() {
if predicate(c) {
return self.inner.next_char();
}
}
None
}
fn consume_chars(&mut self, filter: impl Fn(char) -> bool) -> String {
let mut result: String = Default::default();
while let Some(c) = self.next_char_if(&filter) {
result.push(c);
}
result
}
fn parse_number(&mut self, first_char: char) -> Result<TokenValue, PreprocessorError> {
let mut is_float = false;
let mut integer_radix = 10;
let mut raw: String = Default::default();
raw.push(first_char);
// Handle hexadecimal numbers that needs to consume a..f in addition to digits.
if first_char == '0' {
match self.inner.peek_char() {
Some('x') | Some('X') => {
self.inner.next_char();
raw += &self.consume_chars(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'));
integer_radix = 16;
}
// Octal numbers can also be the prefix of floats, so we need to parse all digits
// and not just 0..7 in case it is a float like 00009.0f, the parsing of all digits
// is done below, but we still need to remember the radix.
Some('0'..='9') => {
integer_radix = 8;
}
_ => {}
};
}
if first_char == '.' {
is_float = true;
} else {
// Parse any digits at the end of integers, or for the non-fractional part of floats.
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
if self.next_char_if(|c| c == '.').is_some() {
raw.push('.');
is_float = true;
}
}
// At this point either we're an integer missing only suffixes, or we're a float with
// everything up to the . consumed.
if is_float {
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
}
// Handle scientific notation with a (e|E)(+|-|)\d+ suffix when we're a float or an
// an integer that could turn into a float if we add a exponent to it (so 0x1E-1
// isn't recognized as a float).
if (is_float || integer_radix == 8 || integer_radix == 10)
&& self.next_char_if(|c| c == 'e' || c == 'E').is_some()
{
raw.push('e');
is_float = true;
match self.inner.peek_char() {
Some('+') => {
self.inner.next_char();
raw.push('+');
}
Some('-') => {
self.inner.next_char();
raw.push('-');
}
_ => {}
}
// TODO: what should we do when there is no number after the exponent?
raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
}
if is_float {
// TODO: Depending on the GLSL version make it an error to not have the suffix.
let width = self.parse_float_width_suffix()?;
Ok(TokenValue::Float(Float {
value: raw
.parse::<f32>()
.map_err(|_| PreprocessorError::FloatParsingError)?,
width,
}))
} else {
let signed = self.parse_integer_signedness_suffix();
let width = self.parse_integer_width_suffix()?;
// Skip the initial 0 in hexa or octal (in hexa we never added the 'x').
if integer_radix != 10 {
raw = raw.split_off(1);
}
Ok(TokenValue::Integer(Integer {
value: u64::from_str_radix(&raw, integer_radix)
.map_err(|_err| PreprocessorError::IntegerOverflow)?,
signed,
width,
}))
}
}
fn parse_punctuation(&mut self) -> Result<TokenValue, PreprocessorError> {
let save_point = self.inner.clone();
let char0 = self.inner.next_char().unwrap_or('\0');
let char1 = self.inner.next_char().unwrap_or('\0');
let char2 = self.inner.next_char().unwrap_or('\0');
let maybe_punct = match (char0, char1, char2) {
('<', '<', '=') => Some((Punct::LeftShiftAssign, 3)),
('<', '<', _) => Some((Punct::LeftShift, 2)),
('<', '=', _) => Some((Punct::LessEqual, 2)),
('<', _, _) => Some((Punct::LeftAngle, 1)),
('>', '>', '=') => Some((Punct::RightShiftAssign, 3)),
('>', '>', _) => Some((Punct::RightShift, 2)),
('>', '=', _) => Some((Punct::GreaterEqual, 2)),
('>', _, _) => Some((Punct::RightAngle, 1)),
('+', '+', _) => Some((Punct::Increment, 2)),
('+', '=', _) => Some((Punct::AddAssign, 2)),
('+', _, _) => Some((Punct::Plus, 1)),
('-', '-', _) => Some((Punct::Decrement, 2)),
('-', '=', _) => Some((Punct::SubAssign, 2)),
('-', _, _) => Some((Punct::Minus, 1)),
('&', '&', _) => Some((Punct::LogicalAnd, 2)),
('&', '=', _) => Some((Punct::AndAssign, 2)),
('&', _, _) => Some((Punct::Ampersand, 1)),
('|', '|', _) => Some((Punct::LogicalOr, 2)),
('|', '=', _) => Some((Punct::OrAssign, 2)),
('|', _, _) => Some((Punct::Pipe, 1)),
('^', '^', _) => Some((Punct::LogicalXor, 2)),
('^', '=', _) => Some((Punct::XorAssign, 2)),
('^', _, _) => Some((Punct::Caret, 1)),
('=', '=', _) => Some((Punct::EqualEqual, 2)),
('=', _, _) => Some((Punct::Equal, 1)),
('!', '=', _) => Some((Punct::NotEqual, 2)),
('!', _, _) => Some((Punct::Bang, 1)),
('*', '=', _) => Some((Punct::MulAssign, 2)),
('*', _, _) => Some((Punct::Star, 1)),
('/', '=', _) => Some((Punct::DivAssign, 2)),
('/', _, _) => Some((Punct::Slash, 1)),
('%', '=', _) => Some((Punct::ModAssign, 2)),
('%', _, _) => Some((Punct::Percent, 1)),
('(', _, _) => Some((Punct::LeftParen, 1)),
(')', _, _) => Some((Punct::RightParen, 1)),
('{', _, _) => Some((Punct::LeftBrace, 1)),
('}', _, _) => Some((Punct::RightBrace, 1)),
('[', _, _) => Some((Punct::LeftBracket, 1)),
(']', _, _) => Some((Punct::RightBracket, 1)),
(',', _, _) => Some((Punct::Comma, 1)),
(';', _, _) => Some((Punct::Semicolon, 1)),
(':', _, _) => Some((Punct::Colon, 1)),
('~', _, _) => Some((Punct::Tilde, 1)),
('?', _, _) => Some((Punct::Question, 1)),
// Note that Dot (".") is handled in Lexer::next since it can be
// either punctuation or the start of a floating point number.
_ => None,
};
if let Some((punct, size)) = maybe_punct {
self.inner = save_point;
for _ in 0..size {
self.inner.next_char();
}
Ok(punct.into())
} else if char0 == '#' {
self.inner = save_point;
self.inner.next_char();
Ok(TokenValue::Hash)
} else {
Err(PreprocessorError::UnexpectedCharacter)
}
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = LexerItem;
fn next(&mut self) -> Option<Self::Item> {
while let Some(current_char) = self.inner.peek_char() {
let had_leading_whitespace = self.leading_whitespace;
self.leading_whitespace = false;
let mut location = Location {
line: self.inner.get_last_seen_line(),
start: self.inner.get_last_seen_start_offset() as u32,
end: 0,
};
let was_start_of_line = self.start_of_line;
self.start_of_line = false;
let value = match current_char {
' ' | '\t' | '\x0b' | '\x0c' | COMMENT_SENTINEL_VALUE => {
if current_char == COMMENT_SENTINEL_VALUE {
self.had_comments = true;
}
self.start_of_line = was_start_of_line;
self.leading_whitespace = true;
self.inner.next_char();
continue;
}
'\n' => {
self.leading_whitespace = true;
self.start_of_line = true;
self.inner.next_char();
Ok(TokenValue::NewLine)
}
c @ '0'..='9' => {
self.inner.next_char();
self.parse_number(c)
}
// Special case . as a punctuation because it can be the start of a float.
'.' => {
self.inner.next_char();
match self.inner.peek_char() {
Some('0'..='9') => self.parse_number('.'),
_ => Ok(TokenValue::Punct(Punct::Dot)),
}
}
_ => {
// TODO: see todo in `parse_identifier` for information
if current_char.is_xid_start() || current_char == '_' {
self.parse_identifier()
} else {
self.parse_punctuation()
}
}
};
location.end = self.inner.get_last_consumed_end_offset() as u32;
return Some(value.map_err(|e| (e, Default::default())).map(|t| Token {
value: t,
location,
leading_whitespace: had_leading_whitespace,
start_of_line: was_start_of_line,
}));
}
// Do the C hack of always ending with a newline so that preprocessor directives are ended.
if !self.start_of_line {
self.start_of_line = true;
let end_offset = self.inner.get_last_consumed_end_offset() as u32;
Some(Ok(Token {
value: TokenValue::NewLine,
location: Location {
line: self.inner.get_last_seen_line(),
start: end_offset,
end: end_offset,
},
leading_whitespace: self.leading_whitespace,
start_of_line: false,
}))
} else {
None
}
}
}

821
vendor/pp-rs/src/lexer_tests.rs vendored Normal file
View File

@@ -0,0 +1,821 @@
use super::lexer::{
CharsAndLine, Lexer, LexerItem, ReplaceComments, SkipBackslashNewline, Token, TokenValue,
COMMENT_SENTINEL_VALUE,
};
use super::token::{Float, Integer, Location, PreprocessorError, Punct};
use std::ops::Range;
fn c(c: char, line: u32) -> Option<(char, u32)> {
Some((c, line))
}
fn l(line: u32, pos: Range<u32>) -> Location {
Location {
line,
start: pos.start,
end: pos.end,
}
}
fn unwrap_token(item: Option<LexerItem>) -> Token {
item.unwrap().unwrap()
}
fn unwrap_token_value(item: Option<LexerItem>) -> TokenValue {
unwrap_token(item).value
}
fn unwrap_error(item: Option<LexerItem>) -> PreprocessorError {
item.unwrap().unwrap_err().0
}
fn expect_lexer_end(lexer: &mut Lexer) {
assert_eq!(unwrap_token_value(lexer.next()), TokenValue::NewLine);
assert_eq!(lexer.next(), None);
}
impl From<i32> for TokenValue {
fn from(value: i32) -> Self {
TokenValue::Integer(Integer {
value: value as u64,
signed: true,
width: 32,
})
}
}
impl From<u32> for TokenValue {
fn from(value: u32) -> Self {
TokenValue::Integer(Integer {
value: value as u64,
signed: false,
width: 32,
})
}
}
impl From<f32> for TokenValue {
fn from(value: f32) -> Self {
TokenValue::Float(Float { value, width: 32 })
}
}
#[test]
fn chars_and_location() {
// Test handling of characters in a line.
let mut it = CharsAndLine::new("abc");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), c('c', 1));
assert_eq!(it.next(), None);
// Test handling of \n in the regular case.
let mut it = CharsAndLine::new("a\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \r in the regular case.
let mut it = CharsAndLine::new("a\rb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \n\r.
let mut it = CharsAndLine::new("a\n\rb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of \r\n.
let mut it = CharsAndLine::new("a\r\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test handling of a mix of \r and \n
let mut it = CharsAndLine::new("\n\r\n\r\r\r\n");
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('\n', 2));
assert_eq!(it.next(), c('\n', 3));
assert_eq!(it.next(), c('\n', 4));
assert_eq!(it.next(), None);
// Unicode handling
let mut it = CharsAndLine::new("a→üs🦀");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('→', 1));
assert_eq!(it.next(), c('ü', 1));
assert_eq!(it.next(), c('s', 1));
assert_eq!(it.next(), c('🦀', 1));
assert_eq!(it.next(), None);
}
#[test]
fn skip_backslash_newline() {
// Test a simple case.
let mut it = SkipBackslashNewline::new("a\\\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test a double case that requires the loop in the algorithm.
let mut it = SkipBackslashNewline::new("a\\\n\\\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('b', 3));
assert_eq!(it.next(), None);
// Test a backslash on its own
let mut it = SkipBackslashNewline::new("a\\b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('\\', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), None);
// Test a case just before EOF
let mut it = SkipBackslashNewline::new("\\\n");
assert_eq!(it.next(), None);
}
#[test]
fn replace_comments() {
// Test a slash that's not a comment
let mut it = ReplaceComments::new("a/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('/', 1));
assert_eq!(it.next(), c('b', 1));
assert_eq!(it.next(), None);
// Test a slash with nothing afterwards
let mut it = ReplaceComments::new("a/");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c('/', 1));
assert_eq!(it.next(), None);
// Test a single-line comment
let mut it = ReplaceComments::new("a//foo\nb");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), c('\n', 1));
assert_eq!(it.next(), c('b', 2));
assert_eq!(it.next(), None);
// Test a single-line comment without an ending newline
let mut it = ReplaceComments::new("//foo");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a single-line comment without nothing afterwards
let mut it = ReplaceComments::new("//");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a single-line comment with a line continuation
let mut it = ReplaceComments::new("//foo\\\na");
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment
let mut it = ReplaceComments::new("a/*fo\n\no*/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), c('b', 3));
assert_eq!(it.next(), None);
// Test a multi-line comment, without a proper ending (only the *)
let mut it = ReplaceComments::new("a/*fo\n\no*");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment, without a proper ending (nothing)
let mut it = ReplaceComments::new("a/*fo\n\no");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
// Test a multi-line comment, or /*/ not being a complete one
let mut it = ReplaceComments::new("a/*/b");
assert_eq!(it.next(), c('a', 1));
assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
assert_eq!(it.next(), None);
}
#[test]
fn lex_whitespace() {
// Empty input gives nothing.
let mut it = Lexer::new("");
assert_eq!(it.next(), None);
// Pure whitespace give nothing too
let mut it = Lexer::new("/**/\t //a");
assert_eq!(it.next(), None);
}
#[test]
fn lex_newline() {
let mut it = Lexer::new("\r\n\n");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(it.next(), None);
// Check a newline is added only if the last token wasn't a newline
let mut it = Lexer::new("\r\n\n\t/**/ //");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
expect_lexer_end(&mut it);
let mut it = Lexer::new("\r\n\n#");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(it.next(), None);
}
#[test]
fn lex_hash() {
let mut it = Lexer::new("a#b");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Hash);
assert_eq!(token.location, l(1, 1..2));
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("b".to_string())
);
expect_lexer_end(&mut it);
let mut it = Lexer::new("\nvoid #");
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("void".into())
);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Hash);
assert_eq!(token.location, l(2, 6..7));
expect_lexer_end(&mut it);
}
#[test]
fn lex_metadata() {
// Test the metadata of the first token
let mut it = Lexer::new("1");
assert_eq!(
unwrap_token(it.next()),
Token {
value: 1.into(),
location: l(1, 0..1),
leading_whitespace: true,
start_of_line: true
}
);
expect_lexer_end(&mut it);
// Test various whitespaces are recognized (or lack of)
let mut it = Lexer::new(" 1/*\n*/2\t3+\n4");
// 1 is the first token and the whitespace doesn't prevent it from being the start of the
// line
assert_eq!(
unwrap_token(it.next()),
Token {
value: 1.into(),
location: l(1, 1..2),
leading_whitespace: true,
start_of_line: true
}
);
// 2 is not at the start of the line because the \n in the /**/ doesn't count, however its
// location correctly lists the second line.
assert_eq!(
unwrap_token(it.next()),
Token {
value: 2.into(),
location: l(2, 7..8),
leading_whitespace: true,
start_of_line: false
}
);
assert_eq!(
unwrap_token(it.next()),
Token {
value: 3.into(),
location: l(2, 9..10),
leading_whitespace: true,
start_of_line: false
}
);
// + doesn't have a leading whitespace
assert_eq!(
unwrap_token(it.next()),
Token {
value: Punct::Plus.into(),
location: l(2, 10..11),
leading_whitespace: false,
start_of_line: false
}
);
// The newline is correctly tagged on the preceeding line
assert_eq!(
unwrap_token(it.next()),
Token {
value: TokenValue::NewLine,
location: l(2, 11..12),
leading_whitespace: false,
start_of_line: false
}
);
// 4 is after a newline that correctly sets start_of_line
assert_eq!(
unwrap_token(it.next()),
Token {
value: 4.into(),
location: l(3, 12..13),
leading_whitespace: true,
start_of_line: true
}
);
// The final newline added by the lexer is at the correct position
assert_eq!(
unwrap_token(it.next()),
Token {
value: TokenValue::NewLine,
location: l(3, 13..13),
leading_whitespace: false,
start_of_line: false
}
);
assert_eq!(it.next(), None);
}
#[test]
fn lex_identifiers() {
// Test some basic identifier cases
let mut it = Lexer::new("foo BA_R baz0");
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("foo".to_string()));
assert_eq!(token.location, l(1, 0..3),);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("BA_R".to_string()));
assert_eq!(token.location, l(1, 4..8),);
let token = unwrap_token(it.next());
assert_eq!(token.value, TokenValue::Ident("baz0".to_string()));
assert_eq!(token.location, l(1, 9..13),);
expect_lexer_end(&mut it);
// Test _ is a valid identifier
let mut it = Lexer::new("_");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("_".to_string())
);
expect_lexer_end(&mut it);
// Test that identifiers are not split by escaped newlines
let mut it = Lexer::new("a\\\nb");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("ab".to_string())
);
expect_lexer_end(&mut it);
// Test that identifiers are split by other whitespace like /**/
let mut it = Lexer::new("a/**/b");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("b".to_string())
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_decimal() {
// Test some basic cases
let mut it = Lexer::new("1 0u 42 65536U");
assert_eq!(unwrap_token_value(it.next()), 1.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, 0u32.into());
assert_eq!(token.location, l(1, 2..4),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 42.into());
assert_eq!(token.location, l(1, 5..7),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 65536u32.into());
assert_eq!(token.location, l(1, 8..14),);
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("31ab");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("ab".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("31/**/32");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("31+32");
assert_eq!(unwrap_token_value(it.next()), 31.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("18446744073709551616");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("18446744073709551615");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
// Check that the 16bit or 64bit suffixes produce errors (for now).
let mut it = Lexer::new("13s");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13S");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13l");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("13L");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
// Check that they produce unsupported errors even if they happen with a unsigned suffix too.
let mut it = Lexer::new("13uS");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("13Ul");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
}
#[test]
fn lex_hexadecimal() {
// Test some basic cases
let mut it = Lexer::new("0x1 0X0u 0xBaFfe 0XcaFeU");
assert_eq!(unwrap_token_value(it.next()), 1.into());
assert_eq!(unwrap_token_value(it.next()), 0u32.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, 0xBAFFE.into());
assert_eq!(token.location, l(1, 9..16),);
let token = unwrap_token(it.next());
assert_eq!(token.value, 0xCAFEu32.into());
assert_eq!(token.location, l(1, 17..24),);
expect_lexer_end(&mut it);
// Test with redundant zeroes
let mut it = Lexer::new("0x000 0x000000000000001");
assert_eq!(unwrap_token_value(it.next()), 0.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("0x31zb");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("zb".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("0x31/**/32");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("0x31+32");
assert_eq!(unwrap_token_value(it.next()), 0x31.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("0x10000000000000000");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("0xFFFFFFFFFFFFFFFF");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_octal() {
// Test some basic cases
let mut it = Lexer::new("01 00u 07654 01234u");
assert_eq!(unwrap_token_value(it.next()), 1.into());
assert_eq!(unwrap_token_value(it.next()), 0u32.into());
assert_eq!(unwrap_token_value(it.next()), 4012.into());
assert_eq!(unwrap_token_value(it.next()), 668u32.into());
expect_lexer_end(&mut it);
// Test with redundant zeroes
let mut it = Lexer::new("0000 0000000000000001");
assert_eq!(unwrap_token_value(it.next()), 0.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test splitting with identifiers
let mut it = Lexer::new("031zb");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("zb".to_string())
);
expect_lexer_end(&mut it);
// Test splitting with whitespace
let mut it = Lexer::new("031/**/32");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// TODO(kangz): Fix octal numbers consuming 8 and 9s as well. This can be done with extra logic
// already but is not worth the complexity.
// Test splitting with 8 and 9
// let mut it = Lexer::new("039 038");
// assert_eq!(unwrap_token_value(it.next()), 3.into());
// assert_eq!(unwrap_token_value(it.next()), 9.into());
// assert_eq!(unwrap_token_value(it.next()), 3.into());
// assert_eq!(unwrap_token_value(it.next()), 8.into());
// expect_lexer_end(&mut it);
// Test splitting with punctuation
let mut it = Lexer::new("031+32");
assert_eq!(unwrap_token_value(it.next()), 25.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), 32.into());
expect_lexer_end(&mut it);
// Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
// produce an error down the line).
let mut it = Lexer::new("02000000000000000000000");
assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
let mut it = Lexer::new("01777777777777777777777");
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Integer(Integer {
value: 18446744073709551615,
signed: true,
width: 32
})
);
expect_lexer_end(&mut it);
}
#[test]
fn lex_float() {
// Test a couple simple cases.
let mut it = Lexer::new("1.0 0.0");
assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with a leading .
let mut it = Lexer::new(".99 0.01 .00000000");
assert_eq!(unwrap_token_value(it.next()), 0.99f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.01f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with nothing after the .
let mut it = Lexer::new("42. 0.");
assert_eq!(unwrap_token_value(it.next()), 42.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
expect_lexer_end(&mut it);
// Test parsing with the float suffix
let mut it = Lexer::new("1000.f 1.f .2f");
assert_eq!(unwrap_token_value(it.next()), 1000.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.2f32.into());
expect_lexer_end(&mut it);
// Test parsing with exponents
// - with / without float suffixes
// - at different points in the float parsing.
let mut it = Lexer::new("3e10 4.1e-10f .01e12F 4.1e+10f");
assert_eq!(unwrap_token_value(it.next()), 3e10f32.into());
assert_eq!(unwrap_token_value(it.next()), 4.1e-10f32.into());
assert_eq!(unwrap_token_value(it.next()), 0.01e12f32.into());
assert_eq!(unwrap_token_value(it.next()), 4.1e+10f32.into());
expect_lexer_end(&mut it);
// Test parsing with exponents
// - After values looking like octal integer (works)
// - After values looking like hexadecimal integer (doesn't work)
let mut it = Lexer::new("05e2 0x1e-2");
assert_eq!(unwrap_token_value(it.next()), 5e2f32.into());
assert_eq!(unwrap_token_value(it.next()), 0x1Ei32.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Minus.into());
assert_eq!(unwrap_token_value(it.next()), 2i32.into());
// Test parsing with nothing valid after the 'e' (technically it shouldn't
// be an error, but there's no language where that sequence of token is
// valid.
let mut it = Lexer::new("1.0e");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::FloatParsingError
);
// Check that 16bit and 64bit suffixes produce errors
let mut it = Lexer::new("1.0l");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("1.0L");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported64BitLiteral
);
let mut it = Lexer::new("1.0h");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
let mut it = Lexer::new("1.0H");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::NotSupported16BitLiteral
);
}
#[test]
fn lex_punctuation() {
// Test parsing some of the token (but not all, that'd be too many tests!)
let mut it = Lexer::new("+ != <<=");
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
let token = unwrap_token(it.next());
assert_eq!(token.value, Punct::NotEqual.into());
assert_eq!(token.location, l(1, 2..4),);
let token = unwrap_token(it.next());
assert_eq!(token.value, Punct::LeftShiftAssign.into());
assert_eq!(token.location, l(1, 5..8),);
expect_lexer_end(&mut it);
// Test parsing a token that's a prefix of another one just before EOF
let mut it = Lexer::new("<");
assert_eq!(unwrap_token_value(it.next()), Punct::LeftAngle.into());
expect_lexer_end(&mut it);
// Test \\\n doesn't split the token
let mut it = Lexer::new("=\\\n=");
assert_eq!(unwrap_token_value(it.next()), Punct::EqualEqual.into());
expect_lexer_end(&mut it);
// Test whitespace splits the token
let mut it = Lexer::new("+/**/=");
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Equal.into());
expect_lexer_end(&mut it);
// Test a number stops processing the token
let mut it = Lexer::new("!1");
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert_eq!(unwrap_token_value(it.next()), 1.into());
expect_lexer_end(&mut it);
// Test an identifier stops processing the token
let mut it = Lexer::new("&a");
assert_eq!(unwrap_token_value(it.next()), Punct::Ampersand.into());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("a".to_string())
);
expect_lexer_end(&mut it);
// Test whitespace splits the token
let mut it = Lexer::new(">/**/>");
assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
expect_lexer_end(&mut it);
// Test that tokens are parsed greedily: `a+++++b` is `a ++ ++ + b` (invalid GLSL) and not
// `(a ++) + (++ b)` (valid GLSL)
let mut it = Lexer::new("+++++");
assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
expect_lexer_end(&mut it);
// Test that an invalid char produces and error
let mut it = Lexer::new("@");
assert_eq!(
unwrap_error(it.next()),
PreprocessorError::UnexpectedCharacter
);
// Extra punctuation tests for code coverage.
let mut it = Lexer::new("<= >= += -= &= || |= | ^= { } ] ? .");
assert_eq!(unwrap_token_value(it.next()), Punct::LessEqual.into());
assert_eq!(unwrap_token_value(it.next()), Punct::GreaterEqual.into());
assert_eq!(unwrap_token_value(it.next()), Punct::AddAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::SubAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::AndAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::LogicalOr.into());
assert_eq!(unwrap_token_value(it.next()), Punct::OrAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Pipe.into());
assert_eq!(unwrap_token_value(it.next()), Punct::XorAssign.into());
assert_eq!(unwrap_token_value(it.next()), Punct::LeftBrace.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightBrace.into());
assert_eq!(unwrap_token_value(it.next()), Punct::RightBracket.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Question.into());
assert_eq!(unwrap_token_value(it.next()), Punct::Dot.into());
expect_lexer_end(&mut it);
}
#[test]
fn lex_had_comments() {
// Test that had_comments doesn't get set to true if there is no comments.
let mut it = Lexer::new("#version");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert!(!it.had_comments());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("version".to_string())
);
assert!(!it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments doesn't get triggered by its sentinel value of '\r'
let mut it = Lexer::new("\r!");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert!(!it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments gets triggered by // comments
let mut it = Lexer::new("//\n!");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
assert!(it.had_comments());
assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
assert!(it.had_comments());
expect_lexer_end(&mut it);
// Test that had_comments doesn't gets triggered by /**/ comments
let mut it = Lexer::new("/**/#version");
assert!(!it.had_comments());
assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
assert!(it.had_comments());
assert_eq!(
unwrap_token_value(it.next()),
TokenValue::Ident("version".to_string())
);
assert!(it.had_comments());
expect_lexer_end(&mut it);
}
// TODO test has_whitespace

11
vendor/pp-rs/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,11 @@
extern crate unicode_xid;
#[allow(clippy::match_like_matches_macro)]
mod lexer;
pub mod pp;
pub mod token;
#[cfg(test)]
mod lexer_tests;
#[cfg(test)]
mod pp_tests;

1034
vendor/pp-rs/src/pp.rs vendored Normal file

File diff suppressed because it is too large Load Diff

457
vendor/pp-rs/src/pp/if_parser.rs vendored Normal file
View File

@@ -0,0 +1,457 @@
use crate::token::{Integer, PreprocessorError, Punct};
use super::{Define, Location, MacroProcessor, MeLexer, Step, StepExit, Token, TokenValue};
use std::{collections::HashMap, convert::TryInto, rc::Rc, vec};
struct IfLexer<'macros> {
tokens: vec::IntoIter<Token>,
defines: &'macros HashMap<String, Rc<Define>>,
}
pub(super) struct IfParser<'macros> {
lexer: IfLexer<'macros>,
macro_processor: MacroProcessor,
location: Location,
parsing_if: bool,
carry: Option<Token>,
}
impl<'macros> IfParser<'macros> {
/// Builds a new IfParser that can be reused
///
/// `parsing_if` indicates wether or not non defined macros should be
/// replaced with 0
pub fn new(
tokens: Vec<Token>,
defines: &'macros HashMap<String, Rc<Define>>,
location: Location,
parsing_if: bool,
) -> Self {
IfParser {
lexer: IfLexer {
tokens: tokens.into_iter(),
defines,
},
macro_processor: MacroProcessor::default(),
location,
parsing_if,
carry: None,
}
}
/// Helper method to consume the next token without define expansion
fn raw_next(&mut self) -> Option<Token> {
self.carry
.take()
.or_else(|| self.macro_processor.step(&mut self.lexer).ok())
}
/// Helper method to consume the next token with define expansion
fn next(&mut self) -> Step<Option<Token>> {
let token = match self.raw_next() {
Some(t) => t,
None => return Ok(None),
};
Ok(match token.value {
TokenValue::Ident(ref name) if name != "defined" => {
match self.add_define(name, token.location)? {
Some(t) => Some(t),
None => self.next()?,
}
}
_ => Some(token),
})
}
/// Helper method to get the next token with define expansion
pub fn peek(&mut self) -> Step<Option<Token>> {
self.carry = self.next()?;
Ok(self.carry.clone())
}
/// Helper method to consume the next token without define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_raw_next(&mut self) -> Step<Token> {
self.raw_next().ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
/// Helper method to consume the next token with define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_next(&mut self) -> Step<Token> {
self.next()?.ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
/// Helper method to get the next token with define expansion
///
/// Returns an EOI error if there are no further tokens
fn expect_peek(&mut self) -> Step<Token> {
self.peek()?.ok_or(StepExit::Error((
PreprocessorError::UnexpectedEndOfInput,
self.location,
)))
}
fn add_define(&mut self, name: &str, location: Location) -> Step<Option<Token>> {
if self
.macro_processor
.start_define_invocation(name, location, &mut self.lexer)?
{
Ok(None)
} else if self.parsing_if {
Ok(Some(Token {
value: TokenValue::Integer(Integer {
value: 0,
signed: true,
width: 64,
}),
location,
}))
} else {
Err(StepExit::Error((
PreprocessorError::UnexpectedToken(TokenValue::Ident(name.to_string())),
location,
)))
}
}
fn handle_defined(&mut self) -> Step<i64> {
let next = self.expect_raw_next()?;
match next.value {
TokenValue::Ident(ref name) => Ok(self.lexer.defines.get(name).is_some() as i64),
TokenValue::Punct(Punct::LeftParen) => {
let name_token = self.expect_raw_next()?;
let name = match name_token.value {
TokenValue::Ident(name) => Ok(name),
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
name_token.location,
))),
}?;
let close_brace = self.expect_next()?;
match close_brace.value {
TokenValue::Punct(Punct::RightParen) => {
Ok(self.lexer.defines.get(&name).is_some() as i64)
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
close_brace.location,
))),
}
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
next.location,
))),
}
}
fn parse_atom(&mut self) -> Step<i64> {
let token = self.expect_next()?;
match token.value {
TokenValue::Ident(name) => {
debug_assert_eq!(name, "defined");
self.handle_defined()
}
TokenValue::Integer(int) => Ok(int.value as i64),
TokenValue::Punct(Punct::LeftParen) => {
let val = self.parse_logical_or()?;
let close_brace = self.expect_next()?;
match close_brace.value {
TokenValue::Punct(Punct::RightParen) => Ok(val),
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
close_brace.location,
))),
}
}
value => Err(StepExit::Error((
PreprocessorError::UnexpectedToken(value),
token.location,
))),
}
}
fn parse_unary(&mut self) -> Step<i64> {
match self.expect_peek()?.value {
TokenValue::Punct(punct) => match punct {
Punct::Plus | Punct::Minus | Punct::Bang | Punct::Tilde => {
self.next()?;
let val = self.parse_unary()?;
Ok(match punct {
Punct::Plus => val,
Punct::Minus => -val,
Punct::Bang => (val == 0) as i64,
Punct::Tilde => !val,
_ => unreachable!(),
})
}
_ => self.parse_atom(),
},
_ => self.parse_atom(),
}
}
fn parse_multiplicative(&mut self) -> Step<i64> {
let mut left = self.parse_unary()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::Star | Punct::Slash | Punct::Percent = punct {
self.next()?;
let right = self.parse_unary()?;
match punct {
Punct::Star => {
left = left.checked_mul(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::Slash => {
left = left.checked_div(right).ok_or(StepExit::Error((
PreprocessorError::DivisionByZero,
self.location,
)))?
}
Punct::Percent => {
left = left.checked_rem(right).ok_or(StepExit::Error((
PreprocessorError::DivisionByZero,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_additive(&mut self) -> Step<i64> {
let mut left = self.parse_multiplicative()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::Plus | Punct::Minus = punct {
self.next()?;
let right = self.parse_multiplicative()?;
match punct {
Punct::Plus => {
left = left.checked_add(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::Minus => {
left = left.checked_sub(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_shift(&mut self) -> Step<i64> {
let mut left = self.parse_additive()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::LeftShift | Punct::RightShift = punct {
self.next()?;
let right = self.parse_additive()?;
match punct {
Punct::LeftShift => {
let right = right.try_into().map_err(|_| {
StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
})?;
left = left.checked_shl(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
Punct::RightShift => {
let right = right.try_into().map_err(|_| {
StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
})?;
left = left.checked_shr(right).ok_or(StepExit::Error((
PreprocessorError::IntegerOverflow,
self.location,
)))?
}
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_comparative(&mut self) -> Step<i64> {
let mut left = self.parse_shift()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::LeftAngle | Punct::RightAngle | Punct::LessEqual | Punct::GreaterEqual =
punct
{
self.next()?;
let right = self.parse_shift()?;
match punct {
Punct::LeftAngle => left = (left < right) as i64,
Punct::RightAngle => left = (left > right) as i64,
Punct::LessEqual => left = (left <= right) as i64,
Punct::GreaterEqual => left = (left >= right) as i64,
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_equality(&mut self) -> Step<i64> {
let mut left = self.parse_comparative()?;
while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
if let Punct::EqualEqual | Punct::NotEqual = punct {
self.next()?;
let right = self.parse_comparative()?;
match punct {
Punct::EqualEqual => left = (left == right) as i64,
Punct::NotEqual => left = (left != right) as i64,
_ => unreachable!(),
}
} else {
break;
}
}
Ok(left)
}
fn parse_bit_and(&mut self) -> Step<i64> {
let mut left = self.parse_equality()?;
while let Some(TokenValue::Punct(Punct::Ampersand)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_equality()?;
left &= right
}
Ok(left)
}
fn parse_bit_xor(&mut self) -> Step<i64> {
let mut left = self.parse_bit_and()?;
while let Some(TokenValue::Punct(Punct::Caret)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_and()?;
left ^= right
}
Ok(left)
}
fn parse_bit_or(&mut self) -> Step<i64> {
let mut left = self.parse_bit_xor()?;
while let Some(TokenValue::Punct(Punct::Pipe)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_xor()?;
left |= right
}
Ok(left)
}
fn parse_logical_and(&mut self) -> Step<i64> {
let mut left = self.parse_bit_or()?;
while let Some(TokenValue::Punct(Punct::LogicalAnd)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_bit_or()?;
left = (left != 0 && right != 0) as i64;
}
Ok(left)
}
fn parse_logical_or(&mut self) -> Step<i64> {
let mut left = self.parse_logical_and()?;
while let Some(TokenValue::Punct(Punct::LogicalOr)) = self.peek()?.map(|t| t.value) {
self.next()?;
let right = self.parse_logical_and()?;
left = (left != 0 || right != 0) as i64;
}
Ok(left)
}
pub fn evaluate_expression(&mut self) -> Step<i64> {
self.parse_logical_or()
}
}
impl<'macros> MeLexer for IfLexer<'macros> {
fn step(&mut self) -> Step<Token> {
self.tokens.next().ok_or(StepExit::Finished)
}
fn get_define(&self, name: &str) -> Option<&Rc<Define>> {
self.defines.get(name)
}
fn apply_line_offset(&self, line: u32, _: Location) -> Step<u32> {
Ok(line)
}
}

1413
vendor/pp-rs/src/pp_tests.rs vendored Normal file

File diff suppressed because it is too large Load Diff

159
vendor/pp-rs/src/token.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//TODO: Source file
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Location {
/// Byte offset into the source string where the first char begins
pub start: u32,
/// Byte offset into the source string where the first char not belonging to
/// this `Location` begins
pub end: u32,
/// used internally in the `#line` directive and the `__LINE__` macro
pub(crate) line: u32,
}
impl Default for Location {
fn default() -> Self {
Location {
start: 0,
end: 0,
line: 1,
}
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum Punct {
// Compound assignments
AddAssign,
SubAssign,
MulAssign,
DivAssign,
ModAssign,
LeftShiftAssign,
RightShiftAssign,
AndAssign,
XorAssign,
OrAssign,
// Two character punctuation
Increment,
Decrement,
LogicalAnd,
LogicalOr,
LogicalXor,
LessEqual,
GreaterEqual,
EqualEqual,
NotEqual,
LeftShift,
RightShift,
// Parenthesis or similar
LeftBrace,
RightBrace,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
// Other one character punctuation
LeftAngle,
RightAngle,
Semicolon,
Comma,
Colon,
Dot,
Equal,
Bang,
Minus,
Tilde,
Plus,
Star,
Slash,
Percent,
Pipe,
Caret,
Ampersand,
Question,
}
#[derive(Clone, PartialEq, Debug)]
// TODO location?
pub enum PreprocessorError {
IntegerOverflow,
FloatParsingError,
UnexpectedCharacter,
UnexpectedToken(TokenValue),
UnexpectedHash,
UnexpectedNewLine,
UnexpectedEndOfInput,
TooFewDefineArguments,
TooManyDefineArguments,
ErrorDirective,
DuplicateParameter,
UnknownDirective,
DefineRedefined,
ElifOutsideOfBlock,
ElseOutsideOfBlock,
EndifOutsideOfBlock,
ElifAfterElse,
MoreThanOneElse,
UnfinishedBlock,
LineOverflow,
NotSupported16BitLiteral,
NotSupported64BitLiteral,
MacroNotDefined,
RecursionLimitReached,
DivisionByZero,
RemainderByZero,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Integer {
pub value: u64,
pub signed: bool,
pub width: i32,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Float {
pub value: f32,
pub width: i32,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Version {
pub tokens: Vec<Token>,
pub is_first_directive: bool,
pub has_comments_before: bool,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Extension {
pub tokens: Vec<Token>,
pub has_non_directive_before: bool,
}
#[derive(Clone, PartialEq, Debug)]
pub struct Pragma {
pub tokens: Vec<Token>,
}
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
Ident(String),
Integer(Integer),
Float(Float),
Punct(Punct),
Version(Version),
Extension(Extension),
Pragma(Pragma),
}
#[derive(Clone, PartialEq, Debug)]
pub struct Token {
pub value: TokenValue,
pub location: Location,
// TODO macro invocation stack?
}