Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/pp-rs/src/lexer.rs
+++ b/vendor/pp-rs/src/lexer.rs
@@ -0,0 +1,628 @@
+use crate::token::{Float, Integer, Location, PreprocessorError, Punct};
+use std::str::Chars;
+use unicode_xid::UnicodeXID;
+
+type CharAndLine = (char, u32);
+
+// GLSL ES 3.20 specification section 3.10. Logical Phases of Compilation
+// This iterator implements phases 4 and 5 of the logical phases of compilation:
+//
+//   4. Each {carriage-return, line-feed} and {line-feed, carriage return} sequence is replaced by
+//      a single newline. All remaining carriage-return and line-feed characters are then each
+//      replaced by a newline.
+//
+//   5. Line numbering for each character, which is equal to the number of preceding newlines plus
+//      one, is noted. Note this can only be subsequently changed by the #line directive and is not
+//      affected by the removal of newlines in phase 6 of compilation.
+//
+// It expects that phases 1 to 3 are already done and that valid utf8 is passed in.
+#[derive(Clone)]
+pub struct CharsAndLine<'a> {
+    inner: Chars<'a>,
+    line: u32,
+}
+
+impl<'a> CharsAndLine<'a> {
+    pub fn new(input: &'a str) -> Self {
+        CharsAndLine {
+            inner: input.chars(),
+            line: 1,
+        }
+    }
+
+    pub fn get_current_ptr(&self) -> *const u8 {
+        self.inner.as_str().as_ptr()
+    }
+}
+
+impl<'a> Iterator for CharsAndLine<'a> {
+    type Item = CharAndLine;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let current = self.inner.next()?;
+
+        match current {
+            '\n' => {
+                // Consume the token but see if we can grab a \r that follows
+                let mut peek_inner = self.inner.clone();
+                if peek_inner.next() == Some('\r') {
+                    self.inner = peek_inner;
+                }
+
+                let res = Some(('\n', self.line));
+                self.line += 1;
+                res
+            }
+            '\r' => {
+                // Consume the token but see if we can grab a \n that follows
+                let mut peek_inner = self.inner.clone();
+                if peek_inner.next() == Some('\n') {
+                    self.inner = peek_inner;
+                }
+
+                let res = Some(('\n', self.line));
+                self.line += 1;
+                res
+            }
+
+            _ => Some((current, self.line)),
+        }
+    }
+}
+
+// An iterator that adds stage 6 on top of CharsAndLocation:
+//
+//  6. Wherever a backslash ('\') occurs immediately before a newline, both are deleted. Note that
+//     no whitespace is substituted, thereby allowing a single preprocessing token to span a
+//     newline. This operation is not recursive; any new {backslash newline} sequences generated
+//     are not removed.
+#[derive(Clone)]
+pub struct SkipBackslashNewline<'a> {
+    inner: CharsAndLine<'a>,
+}
+
+impl<'a> SkipBackslashNewline<'a> {
+    pub fn new(input: &'a str) -> Self {
+        SkipBackslashNewline {
+            inner: CharsAndLine::new(input),
+        }
+    }
+
+    pub fn get_current_ptr(&self) -> *const u8 {
+        self.inner.get_current_ptr()
+    }
+}
+
+impl<'a> Iterator for SkipBackslashNewline<'a> {
+    type Item = CharAndLine;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut current = self.inner.next()?;
+
+        while current.0 == '\\' {
+            let mut peek_inner = self.inner.clone();
+            if let Some(('\n', _)) = peek_inner.next() {
+                self.inner = peek_inner;
+                current = self.next()?;
+            } else {
+                return Some(current);
+            }
+        }
+
+        Some(current)
+    }
+}
+
+// An iterator that adds stage 7 on top of SkipBackslashNewline:
+//
+//   7. All comments are replaced with a single space. All (non-zero) characters and invalid UTF-8
+//      byte sequences are allowed within comments. '//' style comments include the initial '//'
+//      marker and continue up to, but not including, the terminating newline. '/…/' comments
+//      include both the start and end marker.
+#[derive(Clone)]
+pub struct ReplaceComments<'a> {
+    inner: SkipBackslashNewline<'a>,
+}
+
+// The lexer wants to know when whitespace is a comment to know if a comment was ever processed.
+// To avoid adding state we use a sentinel value of '\r' because all '\r' have been consumed and
+// turned into '\n' by CharsAndLocation.
+pub const COMMENT_SENTINEL_VALUE: char = '\r';
+
+impl<'a> ReplaceComments<'a> {
+    pub fn new(input: &'a str) -> Self {
+        ReplaceComments {
+            inner: SkipBackslashNewline::new(input),
+        }
+    }
+
+    pub fn get_current_ptr(&self) -> *const u8 {
+        self.inner.get_current_ptr()
+    }
+}
+
+impl<'a> Iterator for ReplaceComments<'a> {
+    type Item = CharAndLine;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let current = self.inner.next()?;
+
+        if current.0 != '/' {
+            debug_assert!(current.0 != COMMENT_SENTINEL_VALUE);
+            return Some(current);
+        }
+
+        let mut peek_inner = self.inner.clone();
+        match peek_inner.next() {
+            // The // case, consume until but not including the next \n
+            Some(('/', _)) => {
+                self.inner = peek_inner.clone();
+                while let Some((next, _)) = peek_inner.next() {
+                    if next == '\n' {
+                        break;
+                    }
+                    self.inner = peek_inner.clone();
+                }
+
+                Some((COMMENT_SENTINEL_VALUE, current.1))
+            }
+            // The /*, consume until the next */
+            Some(('*', _)) => {
+                let mut was_star = false;
+                while let Some((next, _)) = peek_inner.next() {
+                    if was_star && next == '/' {
+                        break;
+                    }
+                    was_star = next == '*';
+                }
+                self.inner = peek_inner;
+
+                Some((COMMENT_SENTINEL_VALUE, current.1))
+            }
+
+            // Not // or /*, do nothing
+            _ => Some(current),
+        }
+    }
+}
+
+// A lexer for GLSL tokens that also emits a couple extra tokens that are useful to the
+// preprocessor: # and newlines. It also include metadata for the token for whether it is at the
+// start of the line, or if it has leading whitespace.
+
+// This is a helper iterator to abstract away the tracking of location data (offset, line) from
+// `Lexer`. It looks like a Peekable<Iterator<char>> with `next_char` and `peek_char` but also
+// allows querying the last seen/consumed lines / offset.
+#[derive(Clone)]
+struct LexerCharIterator<'a> {
+    inner: ReplaceComments<'a>,
+    peeked: Option<(CharAndLine, *const u8)>,
+    last_consumed: (CharAndLine, *const u8),
+    input_start: *const u8,
+}
+
+pub const NONE_CONSUMED_SENTINEL_VALUE: char = '\r';
+
+impl<'a> LexerCharIterator<'a> {
+    pub fn new(input: &'a str) -> Self {
+        LexerCharIterator {
+            inner: ReplaceComments::new(input),
+            peeked: None,
+            last_consumed: ((NONE_CONSUMED_SENTINEL_VALUE, 0), input.as_bytes().as_ptr()),
+            input_start: input.as_bytes().as_ptr(),
+        }
+    }
+    fn next_char(&mut self) -> Option<char> {
+        self.last_consumed = match self.peeked.take() {
+            Some(v) => v,
+            None => {
+                let ptr = self.inner.get_current_ptr();
+                (self.inner.next()?, ptr)
+            }
+        };
+        Some(self.last_consumed.0 .0)
+    }
+
+    fn peek_char(&mut self) -> Option<char> {
+        match self.peeked {
+            Some(v) => Some(v.0 .0),
+            None => {
+                let ptr = self.inner.get_current_ptr();
+                let next = self.inner.next()?;
+                self.peeked = Some((next, ptr));
+                Some(next.0)
+            }
+        }
+    }
+
+    fn get_last_seen_line(&self) -> u32 {
+        self.peeked.unwrap_or(self.last_consumed).0 .1
+    }
+
+    fn get_last_seen_start_offset(&self) -> usize {
+        self.peeked.unwrap_or(self.last_consumed).1 as usize - self.input_start as usize
+    }
+
+    fn get_last_consumed_end_offset(&self) -> usize {
+        self.last_consumed.1 as usize - self.input_start as usize
+            + self.last_consumed.0 .0.len_utf8()
+    }
+}
+
+// A superset of the token value returned by the preprocessor
+#[derive(Clone, PartialEq, Debug)]
+pub enum TokenValue {
+    // Preprocessor specific token values
+    Hash,
+    NewLine,
+
+    // Regular token values
+    Ident(String),
+    Integer(Integer),
+    Float(Float),
+    Punct(Punct),
+}
+
+impl From<Punct> for TokenValue {
+    fn from(punct: Punct) -> Self {
+        TokenValue::Punct(punct)
+    }
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Token {
+    pub value: TokenValue,
+    pub location: Location,
+    pub leading_whitespace: bool,
+    pub start_of_line: bool,
+}
+
+pub type LexerItem = Result<Token, (PreprocessorError, Location)>;
+pub struct Lexer<'a> {
+    inner: LexerCharIterator<'a>,
+    leading_whitespace: bool,
+    start_of_line: bool,
+    had_comments: bool,
+}
+
+impl<'a> Lexer<'a> {
+    pub fn new(input: &'a str) -> Self {
+        // TODO bail out on source that is too large.
+        Lexer {
+            inner: LexerCharIterator::new(input),
+            leading_whitespace: true,
+            start_of_line: true,
+            had_comments: false,
+        }
+    }
+
+    pub fn had_comments(&self) -> bool {
+        self.had_comments
+    }
+
+    // TODO: Make a runtime flag to toggle unicode identifiers support
+    // The glsl spec only allows ascii identifiers
+    fn parse_identifier(&mut self) -> Result<TokenValue, PreprocessorError> {
+        let mut identifier = String::default();
+
+        if let Some(c) = self.next_char_if(|c| c.is_xid_start() || c == '_') {
+            identifier.push(c);
+        }
+
+        let rest = self.consume_chars(|c| c.is_xid_continue());
+        identifier.push_str(&rest);
+
+        // TODO check if identifier is larger than the limit.
+        Ok(TokenValue::Ident(identifier))
+    }
+
+    fn parse_integer_signedness_suffix(&mut self) -> bool {
+        self.next_char_if(|c| c == 'u' || c == 'U').is_none()
+    }
+
+    fn parse_integer_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
+        match self.inner.peek_char() {
+            Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
+            Some('s') | Some('S') => Err(PreprocessorError::NotSupported16BitLiteral),
+            _ => Ok(32),
+        }
+    }
+
+    fn parse_float_width_suffix(&mut self) -> Result<i32, PreprocessorError> {
+        match self.inner.peek_char() {
+            Some('l') | Some('L') => Err(PreprocessorError::NotSupported64BitLiteral),
+            Some('h') | Some('H') => Err(PreprocessorError::NotSupported16BitLiteral),
+            Some('f') | Some('F') => {
+                self.inner.next_char();
+                Ok(32)
+            }
+            _ => Ok(32),
+        }
+    }
+
+    fn next_char_if(&mut self, predicate: impl FnOnce(char) -> bool) -> Option<char> {
+        if let Some(c) = self.inner.peek_char() {
+            if predicate(c) {
+                return self.inner.next_char();
+            }
+        }
+        None
+    }
+
+    fn consume_chars(&mut self, filter: impl Fn(char) -> bool) -> String {
+        let mut result: String = Default::default();
+        while let Some(c) = self.next_char_if(&filter) {
+            result.push(c);
+        }
+        result
+    }
+
+    fn parse_number(&mut self, first_char: char) -> Result<TokenValue, PreprocessorError> {
+        let mut is_float = false;
+        let mut integer_radix = 10;
+        let mut raw: String = Default::default();
+        raw.push(first_char);
+
+        // Handle hexadecimal numbers that needs to consume a..f in addition to digits.
+        if first_char == '0' {
+            match self.inner.peek_char() {
+                Some('x') | Some('X') => {
+                    self.inner.next_char();
+
+                    raw += &self.consume_chars(|c| matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F'));
+                    integer_radix = 16;
+                }
+
+                // Octal numbers can also be the prefix of floats, so we need to parse all digits
+                // and not just 0..7 in case it is a float like 00009.0f, the parsing of all digits
+                // is done below, but we still need to remember the radix.
+                Some('0'..='9') => {
+                    integer_radix = 8;
+                }
+                _ => {}
+            };
+        }
+
+        if first_char == '.' {
+            is_float = true;
+        } else {
+            // Parse any digits at the end of integers, or for the non-fractional part of floats.
+            raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
+
+            if self.next_char_if(|c| c == '.').is_some() {
+                raw.push('.');
+                is_float = true;
+            }
+        }
+
+        // At this point either we're an integer missing only suffixes, or we're a float with
+        // everything up to the . consumed.
+        if is_float {
+            raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
+        }
+
+        // Handle scientific notation with a (e|E)(+|-|)\d+ suffix when we're a float or an
+        // an integer that could turn into a float if we add a exponent to it (so 0x1E-1
+        // isn't recognized as a float).
+        if (is_float || integer_radix == 8 || integer_radix == 10)
+            && self.next_char_if(|c| c == 'e' || c == 'E').is_some()
+        {
+            raw.push('e');
+            is_float = true;
+
+            match self.inner.peek_char() {
+                Some('+') => {
+                    self.inner.next_char();
+                    raw.push('+');
+                }
+                Some('-') => {
+                    self.inner.next_char();
+                    raw.push('-');
+                }
+                _ => {}
+            }
+
+            // TODO: what should we do when there is no number after the exponent?
+            raw += &self.consume_chars(|c| ('0'..='9').contains(&c));
+        }
+
+        if is_float {
+            // TODO: Depending on the GLSL version make it an error to not have the suffix.
+            let width = self.parse_float_width_suffix()?;
+
+            Ok(TokenValue::Float(Float {
+                value: raw
+                    .parse::<f32>()
+                    .map_err(|_| PreprocessorError::FloatParsingError)?,
+                width,
+            }))
+        } else {
+            let signed = self.parse_integer_signedness_suffix();
+            let width = self.parse_integer_width_suffix()?;
+
+            // Skip the initial 0 in hexa or octal (in hexa we never added the 'x').
+            if integer_radix != 10 {
+                raw = raw.split_off(1);
+            }
+
+            Ok(TokenValue::Integer(Integer {
+                value: u64::from_str_radix(&raw, integer_radix)
+                    .map_err(|_err| PreprocessorError::IntegerOverflow)?,
+                signed,
+                width,
+            }))
+        }
+    }
+
+    fn parse_punctuation(&mut self) -> Result<TokenValue, PreprocessorError> {
+        let save_point = self.inner.clone();
+
+        let char0 = self.inner.next_char().unwrap_or('\0');
+        let char1 = self.inner.next_char().unwrap_or('\0');
+        let char2 = self.inner.next_char().unwrap_or('\0');
+
+        let maybe_punct = match (char0, char1, char2) {
+            ('<', '<', '=') => Some((Punct::LeftShiftAssign, 3)),
+            ('<', '<', _) => Some((Punct::LeftShift, 2)),
+            ('<', '=', _) => Some((Punct::LessEqual, 2)),
+            ('<', _, _) => Some((Punct::LeftAngle, 1)),
+
+            ('>', '>', '=') => Some((Punct::RightShiftAssign, 3)),
+            ('>', '>', _) => Some((Punct::RightShift, 2)),
+            ('>', '=', _) => Some((Punct::GreaterEqual, 2)),
+            ('>', _, _) => Some((Punct::RightAngle, 1)),
+
+            ('+', '+', _) => Some((Punct::Increment, 2)),
+            ('+', '=', _) => Some((Punct::AddAssign, 2)),
+            ('+', _, _) => Some((Punct::Plus, 1)),
+
+            ('-', '-', _) => Some((Punct::Decrement, 2)),
+            ('-', '=', _) => Some((Punct::SubAssign, 2)),
+            ('-', _, _) => Some((Punct::Minus, 1)),
+
+            ('&', '&', _) => Some((Punct::LogicalAnd, 2)),
+            ('&', '=', _) => Some((Punct::AndAssign, 2)),
+            ('&', _, _) => Some((Punct::Ampersand, 1)),
+
+            ('|', '|', _) => Some((Punct::LogicalOr, 2)),
+            ('|', '=', _) => Some((Punct::OrAssign, 2)),
+            ('|', _, _) => Some((Punct::Pipe, 1)),
+
+            ('^', '^', _) => Some((Punct::LogicalXor, 2)),
+            ('^', '=', _) => Some((Punct::XorAssign, 2)),
+            ('^', _, _) => Some((Punct::Caret, 1)),
+
+            ('=', '=', _) => Some((Punct::EqualEqual, 2)),
+            ('=', _, _) => Some((Punct::Equal, 1)),
+            ('!', '=', _) => Some((Punct::NotEqual, 2)),
+            ('!', _, _) => Some((Punct::Bang, 1)),
+
+            ('*', '=', _) => Some((Punct::MulAssign, 2)),
+            ('*', _, _) => Some((Punct::Star, 1)),
+            ('/', '=', _) => Some((Punct::DivAssign, 2)),
+            ('/', _, _) => Some((Punct::Slash, 1)),
+            ('%', '=', _) => Some((Punct::ModAssign, 2)),
+            ('%', _, _) => Some((Punct::Percent, 1)),
+
+            ('(', _, _) => Some((Punct::LeftParen, 1)),
+            (')', _, _) => Some((Punct::RightParen, 1)),
+            ('{', _, _) => Some((Punct::LeftBrace, 1)),
+            ('}', _, _) => Some((Punct::RightBrace, 1)),
+            ('[', _, _) => Some((Punct::LeftBracket, 1)),
+            (']', _, _) => Some((Punct::RightBracket, 1)),
+
+            (',', _, _) => Some((Punct::Comma, 1)),
+            (';', _, _) => Some((Punct::Semicolon, 1)),
+            (':', _, _) => Some((Punct::Colon, 1)),
+            ('~', _, _) => Some((Punct::Tilde, 1)),
+            ('?', _, _) => Some((Punct::Question, 1)),
+            // Note that Dot (".") is handled in Lexer::next since it can be
+            // either punctuation or the start of a floating point number.
+            _ => None,
+        };
+
+        if let Some((punct, size)) = maybe_punct {
+            self.inner = save_point;
+            for _ in 0..size {
+                self.inner.next_char();
+            }
+            Ok(punct.into())
+        } else if char0 == '#' {
+            self.inner = save_point;
+            self.inner.next_char();
+            Ok(TokenValue::Hash)
+        } else {
+            Err(PreprocessorError::UnexpectedCharacter)
+        }
+    }
+}
+
+impl<'a> Iterator for Lexer<'a> {
+    type Item = LexerItem;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while let Some(current_char) = self.inner.peek_char() {
+            let had_leading_whitespace = self.leading_whitespace;
+            self.leading_whitespace = false;
+
+            let mut location = Location {
+                line: self.inner.get_last_seen_line(),
+                start: self.inner.get_last_seen_start_offset() as u32,
+                end: 0,
+            };
+
+            let was_start_of_line = self.start_of_line;
+            self.start_of_line = false;
+
+            let value = match current_char {
+                ' ' | '\t' | '\x0b' | '\x0c' | COMMENT_SENTINEL_VALUE => {
+                    if current_char == COMMENT_SENTINEL_VALUE {
+                        self.had_comments = true;
+                    }
+                    self.start_of_line = was_start_of_line;
+                    self.leading_whitespace = true;
+                    self.inner.next_char();
+                    continue;
+                }
+                '\n' => {
+                    self.leading_whitespace = true;
+                    self.start_of_line = true;
+                    self.inner.next_char();
+                    Ok(TokenValue::NewLine)
+                }
+
+                c @ '0'..='9' => {
+                    self.inner.next_char();
+                    self.parse_number(c)
+                }
+
+                // Special case . as a punctuation because it can be the start of a float.
+                '.' => {
+                    self.inner.next_char();
+
+                    match self.inner.peek_char() {
+                        Some('0'..='9') => self.parse_number('.'),
+                        _ => Ok(TokenValue::Punct(Punct::Dot)),
+                    }
+                }
+                _ => {
+                    // TODO: see todo in `parse_identifier` for information
+                    if current_char.is_xid_start() || current_char == '_' {
+                        self.parse_identifier()
+                    } else {
+                        self.parse_punctuation()
+                    }
+                }
+            };
+
+            location.end = self.inner.get_last_consumed_end_offset() as u32;
+
+            return Some(value.map_err(|e| (e, Default::default())).map(|t| Token {
+                value: t,
+                location,
+                leading_whitespace: had_leading_whitespace,
+                start_of_line: was_start_of_line,
+            }));
+        }
+
+        // Do the C hack of always ending with a newline so that preprocessor directives are ended.
+        if !self.start_of_line {
+            self.start_of_line = true;
+
+            let end_offset = self.inner.get_last_consumed_end_offset() as u32;
+
+            Some(Ok(Token {
+                value: TokenValue::NewLine,
+                location: Location {
+                    line: self.inner.get_last_seen_line(),
+                    start: end_offset,
+                    end: end_offset,
+                },
+                leading_whitespace: self.leading_whitespace,
+                start_of_line: false,
+            }))
+        } else {
+            None
+        }
+    }
+}
--- a/vendor/pp-rs/src/lexer_tests.rs
+++ b/vendor/pp-rs/src/lexer_tests.rs
@@ -0,0 +1,821 @@
+use super::lexer::{
+    CharsAndLine, Lexer, LexerItem, ReplaceComments, SkipBackslashNewline, Token, TokenValue,
+    COMMENT_SENTINEL_VALUE,
+};
+use super::token::{Float, Integer, Location, PreprocessorError, Punct};
+use std::ops::Range;
+
+fn c(c: char, line: u32) -> Option<(char, u32)> {
+    Some((c, line))
+}
+
+fn l(line: u32, pos: Range<u32>) -> Location {
+    Location {
+        line,
+        start: pos.start,
+        end: pos.end,
+    }
+}
+
+fn unwrap_token(item: Option<LexerItem>) -> Token {
+    item.unwrap().unwrap()
+}
+
+fn unwrap_token_value(item: Option<LexerItem>) -> TokenValue {
+    unwrap_token(item).value
+}
+
+fn unwrap_error(item: Option<LexerItem>) -> PreprocessorError {
+    item.unwrap().unwrap_err().0
+}
+
+fn expect_lexer_end(lexer: &mut Lexer) {
+    assert_eq!(unwrap_token_value(lexer.next()), TokenValue::NewLine);
+    assert_eq!(lexer.next(), None);
+}
+
+impl From<i32> for TokenValue {
+    fn from(value: i32) -> Self {
+        TokenValue::Integer(Integer {
+            value: value as u64,
+            signed: true,
+            width: 32,
+        })
+    }
+}
+
+impl From<u32> for TokenValue {
+    fn from(value: u32) -> Self {
+        TokenValue::Integer(Integer {
+            value: value as u64,
+            signed: false,
+            width: 32,
+        })
+    }
+}
+
+impl From<f32> for TokenValue {
+    fn from(value: f32) -> Self {
+        TokenValue::Float(Float { value, width: 32 })
+    }
+}
+
+#[test]
+fn chars_and_location() {
+    // Test handling of characters in a line.
+    let mut it = CharsAndLine::new("abc");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('b', 1));
+    assert_eq!(it.next(), c('c', 1));
+    assert_eq!(it.next(), None);
+
+    // Test handling of \n in the regular case.
+    let mut it = CharsAndLine::new("a\nb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test handling of \r in the regular case.
+    let mut it = CharsAndLine::new("a\rb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test handling of \n\r.
+    let mut it = CharsAndLine::new("a\n\rb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test handling of \r\n.
+    let mut it = CharsAndLine::new("a\r\nb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test handling of a mix of \r and \n
+    let mut it = CharsAndLine::new("\n\r\n\r\r\r\n");
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('\n', 2));
+    assert_eq!(it.next(), c('\n', 3));
+    assert_eq!(it.next(), c('\n', 4));
+    assert_eq!(it.next(), None);
+
+    // Unicode handling
+    let mut it = CharsAndLine::new("a→üs🦀");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('→', 1));
+    assert_eq!(it.next(), c('ü', 1));
+    assert_eq!(it.next(), c('s', 1));
+    assert_eq!(it.next(), c('🦀', 1));
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn skip_backslash_newline() {
+    // Test a simple case.
+    let mut it = SkipBackslashNewline::new("a\\\nb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test a double case that requires the loop in the algorithm.
+    let mut it = SkipBackslashNewline::new("a\\\n\\\nb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('b', 3));
+    assert_eq!(it.next(), None);
+
+    // Test a backslash on its own
+    let mut it = SkipBackslashNewline::new("a\\b");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('\\', 1));
+    assert_eq!(it.next(), c('b', 1));
+    assert_eq!(it.next(), None);
+
+    // Test a case just before EOF
+    let mut it = SkipBackslashNewline::new("\\\n");
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn replace_comments() {
+    // Test a slash that's not a comment
+    let mut it = ReplaceComments::new("a/b");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('/', 1));
+    assert_eq!(it.next(), c('b', 1));
+    assert_eq!(it.next(), None);
+
+    // Test a slash with nothing afterwards
+    let mut it = ReplaceComments::new("a/");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c('/', 1));
+    assert_eq!(it.next(), None);
+
+    // Test a single-line comment
+    let mut it = ReplaceComments::new("a//foo\nb");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), c('\n', 1));
+    assert_eq!(it.next(), c('b', 2));
+    assert_eq!(it.next(), None);
+
+    // Test a single-line comment without an ending newline
+    let mut it = ReplaceComments::new("//foo");
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+
+    // Test a single-line comment without nothing afterwards
+    let mut it = ReplaceComments::new("//");
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+
+    // Test a single-line comment with a line continuation
+    let mut it = ReplaceComments::new("//foo\\\na");
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+
+    // Test a multi-line comment
+    let mut it = ReplaceComments::new("a/*fo\n\no*/b");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), c('b', 3));
+    assert_eq!(it.next(), None);
+
+    // Test a multi-line comment, without a proper ending (only the *)
+    let mut it = ReplaceComments::new("a/*fo\n\no*");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+
+    // Test a multi-line comment, without a proper ending (nothing)
+    let mut it = ReplaceComments::new("a/*fo\n\no");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+
+    // Test a multi-line comment, or /*/ not being a complete one
+    let mut it = ReplaceComments::new("a/*/b");
+    assert_eq!(it.next(), c('a', 1));
+    assert_eq!(it.next(), c(COMMENT_SENTINEL_VALUE, 1));
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn lex_whitespace() {
+    // Empty input gives nothing.
+    let mut it = Lexer::new("");
+    assert_eq!(it.next(), None);
+
+    // Pure whitespace give nothing too
+    let mut it = Lexer::new("/**/\t //a");
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn lex_newline() {
+    let mut it = Lexer::new("\r\n\n");
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(it.next(), None);
+
+    // Check a newline is added only if the last token wasn't a newline
+    let mut it = Lexer::new("\r\n\n\t/**/ //");
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    expect_lexer_end(&mut it);
+
+    let mut it = Lexer::new("\r\n\n#");
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn lex_hash() {
+    let mut it = Lexer::new("a#b");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("a".to_string())
+    );
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, TokenValue::Hash);
+    assert_eq!(token.location, l(1, 1..2));
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("b".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    let mut it = Lexer::new("\nvoid #");
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("void".into())
+    );
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, TokenValue::Hash);
+    assert_eq!(token.location, l(2, 6..7));
+    expect_lexer_end(&mut it);
+}
+
+#[test]
+fn lex_metadata() {
+    // Test the metadata of the first token
+    let mut it = Lexer::new("1");
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: 1.into(),
+            location: l(1, 0..1),
+            leading_whitespace: true,
+            start_of_line: true
+        }
+    );
+    expect_lexer_end(&mut it);
+
+    // Test various whitespaces are recognized (or lack of)
+    let mut it = Lexer::new(" 1/*\n*/2\t3+\n4");
+    // 1 is the first token and the whitespace doesn't prevent it from being the start of the
+    // line
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: 1.into(),
+            location: l(1, 1..2),
+            leading_whitespace: true,
+            start_of_line: true
+        }
+    );
+    // 2 is not at the start of the line because the \n in the /**/ doesn't count, however its
+    // location correctly lists the second line.
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: 2.into(),
+            location: l(2, 7..8),
+            leading_whitespace: true,
+            start_of_line: false
+        }
+    );
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: 3.into(),
+            location: l(2, 9..10),
+            leading_whitespace: true,
+            start_of_line: false
+        }
+    );
+    // + doesn't have a leading whitespace
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: Punct::Plus.into(),
+            location: l(2, 10..11),
+            leading_whitespace: false,
+            start_of_line: false
+        }
+    );
+    // The newline is correctly tagged on the preceeding line
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: TokenValue::NewLine,
+            location: l(2, 11..12),
+            leading_whitespace: false,
+            start_of_line: false
+        }
+    );
+    // 4 is after a newline that correctly sets start_of_line
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: 4.into(),
+            location: l(3, 12..13),
+            leading_whitespace: true,
+            start_of_line: true
+        }
+    );
+    // The final newline added by the lexer is at the correct position
+    assert_eq!(
+        unwrap_token(it.next()),
+        Token {
+            value: TokenValue::NewLine,
+            location: l(3, 13..13),
+            leading_whitespace: false,
+            start_of_line: false
+        }
+    );
+    assert_eq!(it.next(), None);
+}
+
+#[test]
+fn lex_identifiers() {
+    // Test some basic identifier cases
+    let mut it = Lexer::new("foo BA_R baz0");
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, TokenValue::Ident("foo".to_string()));
+    assert_eq!(token.location, l(1, 0..3),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, TokenValue::Ident("BA_R".to_string()));
+    assert_eq!(token.location, l(1, 4..8),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, TokenValue::Ident("baz0".to_string()));
+    assert_eq!(token.location, l(1, 9..13),);
+    expect_lexer_end(&mut it);
+
+    // Test _ is a valid identifier
+    let mut it = Lexer::new("_");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("_".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test that identifiers are not split by escaped newlines
+    let mut it = Lexer::new("a\\\nb");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("ab".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test that identifiers are split by other whitespace like /**/
+    let mut it = Lexer::new("a/**/b");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("a".to_string())
+    );
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("b".to_string())
+    );
+    expect_lexer_end(&mut it);
+}
+
+#[test]
+fn lex_decimal() {
+    // Test some basic cases
+    let mut it = Lexer::new("1 0u 42 65536U");
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, 0u32.into());
+    assert_eq!(token.location, l(1, 2..4),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, 42.into());
+    assert_eq!(token.location, l(1, 5..7),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, 65536u32.into());
+    assert_eq!(token.location, l(1, 8..14),);
+    expect_lexer_end(&mut it);
+
+    // Test splitting with identifiers
+    let mut it = Lexer::new("31ab");
+    assert_eq!(unwrap_token_value(it.next()), 31.into());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("ab".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test splitting with whitespace
+    let mut it = Lexer::new("31/**/32");
+    assert_eq!(unwrap_token_value(it.next()), 31.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // Test splitting with punctuation
+    let mut it = Lexer::new("31+32");
+    assert_eq!(unwrap_token_value(it.next()), 31.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
+    // produce an error down the line).
+    let mut it = Lexer::new("18446744073709551616");
+    assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
+    let mut it = Lexer::new("18446744073709551615");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Integer(Integer {
+            value: 18446744073709551615,
+            signed: true,
+            width: 32
+        })
+    );
+    expect_lexer_end(&mut it);
+
+    // Check that the 16bit or 64bit suffixes produce errors (for now).
+    let mut it = Lexer::new("13s");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported16BitLiteral
+    );
+    let mut it = Lexer::new("13S");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported16BitLiteral
+    );
+    let mut it = Lexer::new("13l");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported64BitLiteral
+    );
+    let mut it = Lexer::new("13L");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported64BitLiteral
+    );
+
+    // Check that they produce unsupported errors even if they happen with a unsigned suffix too.
+    let mut it = Lexer::new("13uS");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported16BitLiteral
+    );
+    let mut it = Lexer::new("13Ul");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported64BitLiteral
+    );
+}
+
+#[test]
+fn lex_hexadecimal() {
+    // Test some basic cases
+    let mut it = Lexer::new("0x1 0X0u 0xBaFfe 0XcaFeU");
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    assert_eq!(unwrap_token_value(it.next()), 0u32.into());
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, 0xBAFFE.into());
+    assert_eq!(token.location, l(1, 9..16),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, 0xCAFEu32.into());
+    assert_eq!(token.location, l(1, 17..24),);
+    expect_lexer_end(&mut it);
+
+    // Test with redundant zeroes
+    let mut it = Lexer::new("0x000 0x000000000000001");
+    assert_eq!(unwrap_token_value(it.next()), 0.into());
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    expect_lexer_end(&mut it);
+
+    // Test splitting with identifiers
+    let mut it = Lexer::new("0x31zb");
+    assert_eq!(unwrap_token_value(it.next()), 0x31.into());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("zb".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test splitting with whitespace
+    let mut it = Lexer::new("0x31/**/32");
+    assert_eq!(unwrap_token_value(it.next()), 0x31.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // Test splitting with punctuation
+    let mut it = Lexer::new("0x31+32");
+    assert_eq!(unwrap_token_value(it.next()), 0x31.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
+    // produce an error down the line).
+    let mut it = Lexer::new("0x10000000000000000");
+    assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
+    let mut it = Lexer::new("0xFFFFFFFFFFFFFFFF");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Integer(Integer {
+            value: 18446744073709551615,
+            signed: true,
+            width: 32
+        })
+    );
+    expect_lexer_end(&mut it);
+}
+
+#[test]
+fn lex_octal() {
+    // Test some basic cases
+    let mut it = Lexer::new("01 00u 07654 01234u");
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    assert_eq!(unwrap_token_value(it.next()), 0u32.into());
+    assert_eq!(unwrap_token_value(it.next()), 4012.into());
+    assert_eq!(unwrap_token_value(it.next()), 668u32.into());
+    expect_lexer_end(&mut it);
+
+    // Test with redundant zeroes
+    let mut it = Lexer::new("0000 0000000000000001");
+    assert_eq!(unwrap_token_value(it.next()), 0.into());
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    expect_lexer_end(&mut it);
+
+    // Test splitting with identifiers
+    let mut it = Lexer::new("031zb");
+    assert_eq!(unwrap_token_value(it.next()), 25.into());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("zb".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test splitting with whitespace
+    let mut it = Lexer::new("031/**/32");
+    assert_eq!(unwrap_token_value(it.next()), 25.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // TODO(kangz): Fix octal numbers consuming 8 and 9s as well. This can be done with extra logic
+    // already but is not worth the complexity.
+
+    // Test splitting with 8 and 9
+    // let mut it = Lexer::new("039 038");
+    // assert_eq!(unwrap_token_value(it.next()), 3.into());
+    // assert_eq!(unwrap_token_value(it.next()), 9.into());
+    // assert_eq!(unwrap_token_value(it.next()), 3.into());
+    // assert_eq!(unwrap_token_value(it.next()), 8.into());
+    // expect_lexer_end(&mut it);
+
+    // Test splitting with punctuation
+    let mut it = Lexer::new("031+32");
+    assert_eq!(unwrap_token_value(it.next()), 25.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    assert_eq!(unwrap_token_value(it.next()), 32.into());
+    expect_lexer_end(&mut it);
+
+    // Test that 2^64 produces an overflow error but that 2^64-1 correctly parses (even if it might
+    // produce an error down the line).
+    let mut it = Lexer::new("02000000000000000000000");
+    assert_eq!(unwrap_error(it.next()), PreprocessorError::IntegerOverflow);
+    let mut it = Lexer::new("01777777777777777777777");
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Integer(Integer {
+            value: 18446744073709551615,
+            signed: true,
+            width: 32
+        })
+    );
+    expect_lexer_end(&mut it);
+}
+
+#[test]
+fn lex_float() {
+    // Test a couple simple cases.
+    let mut it = Lexer::new("1.0 0.0");
+    assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
+    expect_lexer_end(&mut it);
+
+    // Test parsing with a leading .
+    let mut it = Lexer::new(".99 0.01 .00000000");
+    assert_eq!(unwrap_token_value(it.next()), 0.99f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.01f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
+    expect_lexer_end(&mut it);
+
+    // Test parsing with nothing after the .
+    let mut it = Lexer::new("42. 0.");
+    assert_eq!(unwrap_token_value(it.next()), 42.0f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.0f32.into());
+    expect_lexer_end(&mut it);
+
+    // Test parsing with the float suffix
+    let mut it = Lexer::new("1000.f 1.f .2f");
+    assert_eq!(unwrap_token_value(it.next()), 1000.0f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 1.0f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.2f32.into());
+    expect_lexer_end(&mut it);
+
+    // Test parsing with exponents
+    //  - with / without float suffixes
+    //  - at different points in the float parsing.
+    let mut it = Lexer::new("3e10 4.1e-10f .01e12F 4.1e+10f");
+    assert_eq!(unwrap_token_value(it.next()), 3e10f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 4.1e-10f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 0.01e12f32.into());
+    assert_eq!(unwrap_token_value(it.next()), 4.1e+10f32.into());
+    expect_lexer_end(&mut it);
+
+    // Test parsing with exponents
+    //  - After values looking like octal integer (works)
+    //  - After values looking like hexadecimal integer (doesn't work)
+    let mut it = Lexer::new("05e2 0x1e-2");
+    assert_eq!(unwrap_token_value(it.next()), 5e2f32.into());
+
+    assert_eq!(unwrap_token_value(it.next()), 0x1Ei32.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Minus.into());
+    assert_eq!(unwrap_token_value(it.next()), 2i32.into());
+
+    // Test parsing with nothing valid after the 'e' (technically it shouldn't
+    // be an error, but there's no language where that sequence of token is
+    // valid.
+    let mut it = Lexer::new("1.0e");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::FloatParsingError
+    );
+
+    // Check that 16bit and 64bit suffixes produce errors
+    let mut it = Lexer::new("1.0l");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported64BitLiteral
+    );
+    let mut it = Lexer::new("1.0L");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported64BitLiteral
+    );
+    let mut it = Lexer::new("1.0h");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported16BitLiteral
+    );
+    let mut it = Lexer::new("1.0H");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::NotSupported16BitLiteral
+    );
+}
+
+#[test]
+fn lex_punctuation() {
+    // Test parsing some of the token (but not all, that'd be too many tests!)
+    let mut it = Lexer::new("+ != <<=");
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, Punct::NotEqual.into());
+    assert_eq!(token.location, l(1, 2..4),);
+    let token = unwrap_token(it.next());
+    assert_eq!(token.value, Punct::LeftShiftAssign.into());
+    assert_eq!(token.location, l(1, 5..8),);
+    expect_lexer_end(&mut it);
+
+    // Test parsing a token that's a prefix of another one just before EOF
+    let mut it = Lexer::new("<");
+    assert_eq!(unwrap_token_value(it.next()), Punct::LeftAngle.into());
+    expect_lexer_end(&mut it);
+
+    // Test \\\n doesn't split the token
+    let mut it = Lexer::new("=\\\n=");
+    assert_eq!(unwrap_token_value(it.next()), Punct::EqualEqual.into());
+    expect_lexer_end(&mut it);
+
+    // Test whitespace splits the token
+    let mut it = Lexer::new("+/**/=");
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Equal.into());
+    expect_lexer_end(&mut it);
+
+    // Test a number stops processing the token
+    let mut it = Lexer::new("!1");
+    assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
+    assert_eq!(unwrap_token_value(it.next()), 1.into());
+    expect_lexer_end(&mut it);
+
+    // Test an identifier stops processing the token
+    let mut it = Lexer::new("&a");
+    assert_eq!(unwrap_token_value(it.next()), Punct::Ampersand.into());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("a".to_string())
+    );
+    expect_lexer_end(&mut it);
+
+    // Test whitespace splits the token
+    let mut it = Lexer::new(">/**/>");
+    assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::RightAngle.into());
+    expect_lexer_end(&mut it);
+
+    // Test that tokens are parsed greedily: `a+++++b` is `a ++ ++ + b` (invalid GLSL) and not
+    // `(a ++) + (++ b)` (valid GLSL)
+    let mut it = Lexer::new("+++++");
+    assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Increment.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Plus.into());
+    expect_lexer_end(&mut it);
+
+    // Test that an invalid char produces and error
+    let mut it = Lexer::new("@");
+    assert_eq!(
+        unwrap_error(it.next()),
+        PreprocessorError::UnexpectedCharacter
+    );
+
+    // Extra punctuation tests for code coverage.
+    let mut it = Lexer::new("<= >= += -= &= || |= | ^= { } ] ? .");
+    assert_eq!(unwrap_token_value(it.next()), Punct::LessEqual.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::GreaterEqual.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::AddAssign.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::SubAssign.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::AndAssign.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::LogicalOr.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::OrAssign.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Pipe.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::XorAssign.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::LeftBrace.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::RightBrace.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::RightBracket.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Question.into());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Dot.into());
+    expect_lexer_end(&mut it);
+}
+
+#[test]
+fn lex_had_comments() {
+    // Test that had_comments doesn't get set to true if there is no comments.
+    let mut it = Lexer::new("#version");
+    assert!(!it.had_comments());
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
+    assert!(!it.had_comments());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("version".to_string())
+    );
+    assert!(!it.had_comments());
+    expect_lexer_end(&mut it);
+
+    // Test that had_comments doesn't get triggered by its sentinel value of '\r'
+    let mut it = Lexer::new("\r!");
+    assert!(!it.had_comments());
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
+    assert!(!it.had_comments());
+    expect_lexer_end(&mut it);
+
+    // Test that had_comments gets triggered by // comments
+    let mut it = Lexer::new("//\n!");
+    assert!(!it.had_comments());
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::NewLine);
+    assert!(it.had_comments());
+    assert_eq!(unwrap_token_value(it.next()), Punct::Bang.into());
+    assert!(it.had_comments());
+    expect_lexer_end(&mut it);
+
+    // Test that had_comments doesn't gets triggered by /**/ comments
+    let mut it = Lexer::new("/**/#version");
+    assert!(!it.had_comments());
+    assert_eq!(unwrap_token_value(it.next()), TokenValue::Hash);
+    assert!(it.had_comments());
+    assert_eq!(
+        unwrap_token_value(it.next()),
+        TokenValue::Ident("version".to_string())
+    );
+    assert!(it.had_comments());
+    expect_lexer_end(&mut it);
+}
+
+// TODO test has_whitespace
--- a/vendor/pp-rs/src/lib.rs
+++ b/vendor/pp-rs/src/lib.rs
@@ -0,0 +1,11 @@
+extern crate unicode_xid;
+
+#[allow(clippy::match_like_matches_macro)]
+mod lexer;
+pub mod pp;
+pub mod token;
+
+#[cfg(test)]
+mod lexer_tests;
+#[cfg(test)]
+mod pp_tests;
--- a/vendor/pp-rs/src/pp.rs
+++ b/vendor/pp-rs/src/pp.rs
--- a/vendor/pp-rs/src/pp/if_parser.rs
+++ b/vendor/pp-rs/src/pp/if_parser.rs
@@ -0,0 +1,457 @@
+use crate::token::{Integer, PreprocessorError, Punct};
+
+use super::{Define, Location, MacroProcessor, MeLexer, Step, StepExit, Token, TokenValue};
+use std::{collections::HashMap, convert::TryInto, rc::Rc, vec};
+
+struct IfLexer<'macros> {
+    tokens: vec::IntoIter<Token>,
+    defines: &'macros HashMap<String, Rc<Define>>,
+}
+
+pub(super) struct IfParser<'macros> {
+    lexer: IfLexer<'macros>,
+    macro_processor: MacroProcessor,
+    location: Location,
+
+    parsing_if: bool,
+    carry: Option<Token>,
+}
+
+impl<'macros> IfParser<'macros> {
+    /// Builds a new IfParser that can be reused
+    ///
+    /// `parsing_if` indicates wether or not non defined macros should be
+    /// replaced with 0
+    pub fn new(
+        tokens: Vec<Token>,
+        defines: &'macros HashMap<String, Rc<Define>>,
+        location: Location,
+        parsing_if: bool,
+    ) -> Self {
+        IfParser {
+            lexer: IfLexer {
+                tokens: tokens.into_iter(),
+                defines,
+            },
+            macro_processor: MacroProcessor::default(),
+            location,
+
+            parsing_if,
+            carry: None,
+        }
+    }
+
+    /// Helper method to consume the next token without define expansion
+    fn raw_next(&mut self) -> Option<Token> {
+        self.carry
+            .take()
+            .or_else(|| self.macro_processor.step(&mut self.lexer).ok())
+    }
+
+    /// Helper method to consume the next token with define expansion
+    fn next(&mut self) -> Step<Option<Token>> {
+        let token = match self.raw_next() {
+            Some(t) => t,
+            None => return Ok(None),
+        };
+
+        Ok(match token.value {
+            TokenValue::Ident(ref name) if name != "defined" => {
+                match self.add_define(name, token.location)? {
+                    Some(t) => Some(t),
+                    None => self.next()?,
+                }
+            }
+            _ => Some(token),
+        })
+    }
+
+    /// Helper method to get the next token with define expansion
+    pub fn peek(&mut self) -> Step<Option<Token>> {
+        self.carry = self.next()?;
+        Ok(self.carry.clone())
+    }
+
+    /// Helper method to consume the next token without define expansion
+    ///
+    /// Returns an EOI error if there are no further tokens
+    fn expect_raw_next(&mut self) -> Step<Token> {
+        self.raw_next().ok_or(StepExit::Error((
+            PreprocessorError::UnexpectedEndOfInput,
+            self.location,
+        )))
+    }
+
+    /// Helper method to consume the next token with define expansion
+    ///
+    /// Returns an EOI error if there are no further tokens
+    fn expect_next(&mut self) -> Step<Token> {
+        self.next()?.ok_or(StepExit::Error((
+            PreprocessorError::UnexpectedEndOfInput,
+            self.location,
+        )))
+    }
+
+    /// Helper method to get the next token with define expansion
+    ///
+    /// Returns an EOI error if there are no further tokens
+    fn expect_peek(&mut self) -> Step<Token> {
+        self.peek()?.ok_or(StepExit::Error((
+            PreprocessorError::UnexpectedEndOfInput,
+            self.location,
+        )))
+    }
+
+    fn add_define(&mut self, name: &str, location: Location) -> Step<Option<Token>> {
+        if self
+            .macro_processor
+            .start_define_invocation(name, location, &mut self.lexer)?
+        {
+            Ok(None)
+        } else if self.parsing_if {
+            Ok(Some(Token {
+                value: TokenValue::Integer(Integer {
+                    value: 0,
+                    signed: true,
+                    width: 64,
+                }),
+                location,
+            }))
+        } else {
+            Err(StepExit::Error((
+                PreprocessorError::UnexpectedToken(TokenValue::Ident(name.to_string())),
+                location,
+            )))
+        }
+    }
+
+    fn handle_defined(&mut self) -> Step<i64> {
+        let next = self.expect_raw_next()?;
+
+        match next.value {
+            TokenValue::Ident(ref name) => Ok(self.lexer.defines.get(name).is_some() as i64),
+            TokenValue::Punct(Punct::LeftParen) => {
+                let name_token = self.expect_raw_next()?;
+                let name = match name_token.value {
+                    TokenValue::Ident(name) => Ok(name),
+                    value => Err(StepExit::Error((
+                        PreprocessorError::UnexpectedToken(value),
+                        name_token.location,
+                    ))),
+                }?;
+
+                let close_brace = self.expect_next()?;
+
+                match close_brace.value {
+                    TokenValue::Punct(Punct::RightParen) => {
+                        Ok(self.lexer.defines.get(&name).is_some() as i64)
+                    }
+                    value => Err(StepExit::Error((
+                        PreprocessorError::UnexpectedToken(value),
+                        close_brace.location,
+                    ))),
+                }
+            }
+            value => Err(StepExit::Error((
+                PreprocessorError::UnexpectedToken(value),
+                next.location,
+            ))),
+        }
+    }
+
+    fn parse_atom(&mut self) -> Step<i64> {
+        let token = self.expect_next()?;
+
+        match token.value {
+            TokenValue::Ident(name) => {
+                debug_assert_eq!(name, "defined");
+
+                self.handle_defined()
+            }
+            TokenValue::Integer(int) => Ok(int.value as i64),
+            TokenValue::Punct(Punct::LeftParen) => {
+                let val = self.parse_logical_or()?;
+
+                let close_brace = self.expect_next()?;
+
+                match close_brace.value {
+                    TokenValue::Punct(Punct::RightParen) => Ok(val),
+                    value => Err(StepExit::Error((
+                        PreprocessorError::UnexpectedToken(value),
+                        close_brace.location,
+                    ))),
+                }
+            }
+            value => Err(StepExit::Error((
+                PreprocessorError::UnexpectedToken(value),
+                token.location,
+            ))),
+        }
+    }
+
+    fn parse_unary(&mut self) -> Step<i64> {
+        match self.expect_peek()?.value {
+            TokenValue::Punct(punct) => match punct {
+                Punct::Plus | Punct::Minus | Punct::Bang | Punct::Tilde => {
+                    self.next()?;
+
+                    let val = self.parse_unary()?;
+
+                    Ok(match punct {
+                        Punct::Plus => val,
+                        Punct::Minus => -val,
+                        Punct::Bang => (val == 0) as i64,
+                        Punct::Tilde => !val,
+                        _ => unreachable!(),
+                    })
+                }
+                _ => self.parse_atom(),
+            },
+            _ => self.parse_atom(),
+        }
+    }
+
+    fn parse_multiplicative(&mut self) -> Step<i64> {
+        let mut left = self.parse_unary()?;
+
+        while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
+            if let Punct::Star | Punct::Slash | Punct::Percent = punct {
+                self.next()?;
+
+                let right = self.parse_unary()?;
+
+                match punct {
+                    Punct::Star => {
+                        left = left.checked_mul(right).ok_or(StepExit::Error((
+                            PreprocessorError::IntegerOverflow,
+                            self.location,
+                        )))?
+                    }
+                    Punct::Slash => {
+                        left = left.checked_div(right).ok_or(StepExit::Error((
+                            PreprocessorError::DivisionByZero,
+                            self.location,
+                        )))?
+                    }
+                    Punct::Percent => {
+                        left = left.checked_rem(right).ok_or(StepExit::Error((
+                            PreprocessorError::DivisionByZero,
+                            self.location,
+                        )))?
+                    }
+                    _ => unreachable!(),
+                }
+            } else {
+                break;
+            }
+        }
+
+        Ok(left)
+    }
+
+    fn parse_additive(&mut self) -> Step<i64> {
+        let mut left = self.parse_multiplicative()?;
+
+        while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
+            if let Punct::Plus | Punct::Minus = punct {
+                self.next()?;
+
+                let right = self.parse_multiplicative()?;
+
+                match punct {
+                    Punct::Plus => {
+                        left = left.checked_add(right).ok_or(StepExit::Error((
+                            PreprocessorError::IntegerOverflow,
+                            self.location,
+                        )))?
+                    }
+                    Punct::Minus => {
+                        left = left.checked_sub(right).ok_or(StepExit::Error((
+                            PreprocessorError::IntegerOverflow,
+                            self.location,
+                        )))?
+                    }
+                    _ => unreachable!(),
+                }
+            } else {
+                break;
+            }
+        }
+
+        Ok(left)
+    }
+
+    fn parse_shift(&mut self) -> Step<i64> {
+        let mut left = self.parse_additive()?;
+
+        while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
+            if let Punct::LeftShift | Punct::RightShift = punct {
+                self.next()?;
+
+                let right = self.parse_additive()?;
+
+                match punct {
+                    Punct::LeftShift => {
+                        let right = right.try_into().map_err(|_| {
+                            StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
+                        })?;
+                        left = left.checked_shl(right).ok_or(StepExit::Error((
+                            PreprocessorError::IntegerOverflow,
+                            self.location,
+                        )))?
+                    }
+                    Punct::RightShift => {
+                        let right = right.try_into().map_err(|_| {
+                            StepExit::Error((PreprocessorError::IntegerOverflow, self.location))
+                        })?;
+                        left = left.checked_shr(right).ok_or(StepExit::Error((
+                            PreprocessorError::IntegerOverflow,
+                            self.location,
+                        )))?
+                    }
+                    _ => unreachable!(),
+                }
+            } else {
+                break;
+            }
+        }
+
+        Ok(left)
+    }
+
+    fn parse_comparative(&mut self) -> Step<i64> {
+        let mut left = self.parse_shift()?;
+
+        while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
+            if let Punct::LeftAngle | Punct::RightAngle | Punct::LessEqual | Punct::GreaterEqual =
+                punct
+            {
+                self.next()?;
+
+                let right = self.parse_shift()?;
+
+                match punct {
+                    Punct::LeftAngle => left = (left < right) as i64,
+                    Punct::RightAngle => left = (left > right) as i64,
+                    Punct::LessEqual => left = (left <= right) as i64,
+                    Punct::GreaterEqual => left = (left >= right) as i64,
+                    _ => unreachable!(),
+                }
+            } else {
+                break;
+            }
+        }
+
+        Ok(left)
+    }
+
+    fn parse_equality(&mut self) -> Step<i64> {
+        let mut left = self.parse_comparative()?;
+
+        while let Some(TokenValue::Punct(punct)) = self.peek()?.map(|t| t.value) {
+            if let Punct::EqualEqual | Punct::NotEqual = punct {
+                self.next()?;
+
+                let right = self.parse_comparative()?;
+
+                match punct {
+                    Punct::EqualEqual => left = (left == right) as i64,
+                    Punct::NotEqual => left = (left != right) as i64,
+                    _ => unreachable!(),
+                }
+            } else {
+                break;
+            }
+        }
+
+        Ok(left)
+    }
+
+    fn parse_bit_and(&mut self) -> Step<i64> {
+        let mut left = self.parse_equality()?;
+
+        while let Some(TokenValue::Punct(Punct::Ampersand)) = self.peek()?.map(|t| t.value) {
+            self.next()?;
+
+            let right = self.parse_equality()?;
+
+            left &= right
+        }
+
+        Ok(left)
+    }
+
+    fn parse_bit_xor(&mut self) -> Step<i64> {
+        let mut left = self.parse_bit_and()?;
+
+        while let Some(TokenValue::Punct(Punct::Caret)) = self.peek()?.map(|t| t.value) {
+            self.next()?;
+
+            let right = self.parse_bit_and()?;
+
+            left ^= right
+        }
+
+        Ok(left)
+    }
+
+    fn parse_bit_or(&mut self) -> Step<i64> {
+        let mut left = self.parse_bit_xor()?;
+
+        while let Some(TokenValue::Punct(Punct::Pipe)) = self.peek()?.map(|t| t.value) {
+            self.next()?;
+
+            let right = self.parse_bit_xor()?;
+
+            left |= right
+        }
+
+        Ok(left)
+    }
+
+    fn parse_logical_and(&mut self) -> Step<i64> {
+        let mut left = self.parse_bit_or()?;
+
+        while let Some(TokenValue::Punct(Punct::LogicalAnd)) = self.peek()?.map(|t| t.value) {
+            self.next()?;
+
+            let right = self.parse_bit_or()?;
+
+            left = (left != 0 && right != 0) as i64;
+        }
+
+        Ok(left)
+    }
+
+    fn parse_logical_or(&mut self) -> Step<i64> {
+        let mut left = self.parse_logical_and()?;
+
+        while let Some(TokenValue::Punct(Punct::LogicalOr)) = self.peek()?.map(|t| t.value) {
+            self.next()?;
+
+            let right = self.parse_logical_and()?;
+
+            left = (left != 0 || right != 0) as i64;
+        }
+
+        Ok(left)
+    }
+
+    pub fn evaluate_expression(&mut self) -> Step<i64> {
+        self.parse_logical_or()
+    }
+}
+
+impl<'macros> MeLexer for IfLexer<'macros> {
+    fn step(&mut self) -> Step<Token> {
+        self.tokens.next().ok_or(StepExit::Finished)
+    }
+
+    fn get_define(&self, name: &str) -> Option<&Rc<Define>> {
+        self.defines.get(name)
+    }
+
+    fn apply_line_offset(&self, line: u32, _: Location) -> Step<u32> {
+        Ok(line)
+    }
+}
--- a/vendor/pp-rs/src/pp_tests.rs
+++ b/vendor/pp-rs/src/pp_tests.rs
--- a/vendor/pp-rs/src/token.rs
+++ b/vendor/pp-rs/src/token.rs
@@ -0,0 +1,159 @@
+//TODO: Source file
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub struct Location {
+    /// Byte offset into the source string where the first char begins
+    pub start: u32,
+    /// Byte offset into the source string where the first char not belonging to
+    /// this `Location` begins
+    pub end: u32,
+    /// used internally in the `#line` directive and the `__LINE__` macro
+    pub(crate) line: u32,
+}
+
+impl Default for Location {
+    fn default() -> Self {
+        Location {
+            start: 0,
+            end: 0,
+            line: 1,
+        }
+    }
+}
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum Punct {
+    // Compound assignments
+    AddAssign,
+    SubAssign,
+    MulAssign,
+    DivAssign,
+    ModAssign,
+    LeftShiftAssign,
+    RightShiftAssign,
+    AndAssign,
+    XorAssign,
+    OrAssign,
+
+    // Two character punctuation
+    Increment,
+    Decrement,
+    LogicalAnd,
+    LogicalOr,
+    LogicalXor,
+    LessEqual,
+    GreaterEqual,
+    EqualEqual,
+    NotEqual,
+    LeftShift,
+    RightShift,
+
+    // Parenthesis or similar
+    LeftBrace,
+    RightBrace,
+    LeftParen,
+    RightParen,
+    LeftBracket,
+    RightBracket,
+
+    // Other one character punctuation
+    LeftAngle,
+    RightAngle,
+    Semicolon,
+    Comma,
+    Colon,
+    Dot,
+    Equal,
+    Bang,
+    Minus,
+    Tilde,
+    Plus,
+    Star,
+    Slash,
+    Percent,
+    Pipe,
+    Caret,
+    Ampersand,
+    Question,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+// TODO location?
+pub enum PreprocessorError {
+    IntegerOverflow,
+    FloatParsingError,
+    UnexpectedCharacter,
+    UnexpectedToken(TokenValue),
+    UnexpectedHash,
+    UnexpectedNewLine,
+    UnexpectedEndOfInput,
+    TooFewDefineArguments,
+    TooManyDefineArguments,
+    ErrorDirective,
+    DuplicateParameter,
+    UnknownDirective,
+    DefineRedefined,
+    ElifOutsideOfBlock,
+    ElseOutsideOfBlock,
+    EndifOutsideOfBlock,
+    ElifAfterElse,
+    MoreThanOneElse,
+    UnfinishedBlock,
+    LineOverflow,
+    NotSupported16BitLiteral,
+    NotSupported64BitLiteral,
+    MacroNotDefined,
+    RecursionLimitReached,
+    DivisionByZero,
+    RemainderByZero,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Integer {
+    pub value: u64,
+    pub signed: bool,
+    pub width: i32,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Float {
+    pub value: f32,
+    pub width: i32,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Version {
+    pub tokens: Vec<Token>,
+    pub is_first_directive: bool,
+    pub has_comments_before: bool,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Extension {
+    pub tokens: Vec<Token>,
+    pub has_non_directive_before: bool,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Pragma {
+    pub tokens: Vec<Token>,
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub enum TokenValue {
+    Ident(String),
+
+    Integer(Integer),
+    Float(Float),
+    Punct(Punct),
+
+    Version(Version),
+    Extension(Extension),
+    Pragma(Pragma),
+}
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct Token {
+    pub value: TokenValue,
+    pub location: Location,
+    // TODO macro invocation stack?
+}