Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/regex-automata/src/meta/error.rs
+++ b/vendor/regex-automata/src/meta/error.rs
@@ -0,0 +1,241 @@
+use regex_syntax::{ast, hir};
+
+use crate::{nfa, util::search::MatchError, PatternID};
+
+/// An error that occurs when construction of a `Regex` fails.
+///
+/// A build error is generally a result of one of two possible failure
+/// modes. First is a parse or syntax error in the concrete syntax of a
+/// pattern. Second is that the construction of the underlying regex matcher
+/// fails, usually because it gets too big with respect to limits like
+/// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit).
+///
+/// This error provides very little introspection capabilities. You can:
+///
+/// * Ask for the [`PatternID`] of the pattern that caused an error, if one
+/// is available. This is available for things like syntax errors, but not for
+/// cases where build limits are exceeded.
+/// * Ask for the underlying syntax error, but only if the error is a syntax
+/// error.
+/// * Ask for a human readable message corresponding to the underlying error.
+/// * The `BuildError::source` method (from the `std::error::Error`
+/// trait implementation) may be used to query for an underlying error if one
+/// exists. There are no API guarantees about which error is returned.
+///
+/// When the `std` feature is enabled, this implements `std::error::Error`.
+#[derive(Clone, Debug)]
+pub struct BuildError {
+    kind: BuildErrorKind,
+}
+
+#[derive(Clone, Debug)]
+enum BuildErrorKind {
+    Syntax { pid: PatternID, err: regex_syntax::Error },
+    NFA(nfa::thompson::BuildError),
+}
+
+impl BuildError {
+    /// If it is known which pattern ID caused this build error to occur, then
+    /// this method returns it.
+    ///
+    /// Some errors are not associated with a particular pattern. However, any
+    /// errors that occur as part of parsing a pattern are guaranteed to be
+    /// associated with a pattern ID.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{meta::Regex, PatternID};
+    ///
+    /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err();
+    /// assert_eq!(Some(PatternID::must(2)), err.pattern());
+    /// ```
+    pub fn pattern(&self) -> Option<PatternID> {
+        match self.kind {
+            BuildErrorKind::Syntax { pid, .. } => Some(pid),
+            _ => None,
+        }
+    }
+
+    /// If this error occurred because the regex exceeded the configured size
+    /// limit before being built, then this returns the configured size limit.
+    ///
+    /// The limit returned is what was configured, and corresponds to the
+    /// maximum amount of heap usage in bytes.
+    pub fn size_limit(&self) -> Option<usize> {
+        match self.kind {
+            BuildErrorKind::NFA(ref err) => err.size_limit(),
+            _ => None,
+        }
+    }
+
+    /// If this error corresponds to a syntax error, then a reference to it is
+    /// returned by this method.
+    pub fn syntax_error(&self) -> Option<&regex_syntax::Error> {
+        match self.kind {
+            BuildErrorKind::Syntax { ref err, .. } => Some(err),
+            _ => None,
+        }
+    }
+
+    pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError {
+        let err = regex_syntax::Error::from(err);
+        BuildError { kind: BuildErrorKind::Syntax { pid, err } }
+    }
+
+    pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError {
+        let err = regex_syntax::Error::from(err);
+        BuildError { kind: BuildErrorKind::Syntax { pid, err } }
+    }
+
+    pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError {
+        BuildError { kind: BuildErrorKind::NFA(err) }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for BuildError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self.kind {
+            BuildErrorKind::Syntax { ref err, .. } => Some(err),
+            BuildErrorKind::NFA(ref err) => Some(err),
+        }
+    }
+}
+
+impl core::fmt::Display for BuildError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self.kind {
+            BuildErrorKind::Syntax { pid, .. } => {
+                write!(f, "error parsing pattern {}", pid.as_usize())
+            }
+            BuildErrorKind::NFA(_) => write!(f, "error building NFA"),
+        }
+    }
+}
+
+/// An error that occurs when a search should be retried.
+///
+/// This retry error distinguishes between two different failure modes.
+///
+/// The first is one where potential quadratic behavior has been detected.
+/// In this case, whatever optimization that led to this behavior should be
+/// stopped, and the next best strategy should be used.
+///
+/// The second indicates that the underlying regex engine has failed for some
+/// reason. This usually occurs because either a lazy DFA's cache has become
+/// ineffective or because a non-ASCII byte has been seen *and* a Unicode word
+/// boundary was used in one of the patterns. In this failure case, a different
+/// regex engine that won't fail in these ways (PikeVM, backtracker or the
+/// one-pass DFA) should be used.
+///
+/// This is an internal error only and should never bleed into the public
+/// API.
+#[derive(Debug)]
+pub(crate) enum RetryError {
+    Quadratic(RetryQuadraticError),
+    Fail(RetryFailError),
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for RetryError {}
+
+impl core::fmt::Display for RetryError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match *self {
+            RetryError::Quadratic(ref err) => err.fmt(f),
+            RetryError::Fail(ref err) => err.fmt(f),
+        }
+    }
+}
+
+impl From<MatchError> for RetryError {
+    fn from(merr: MatchError) -> RetryError {
+        RetryError::Fail(RetryFailError::from(merr))
+    }
+}
+
+/// An error that occurs when potential quadratic behavior has been detected
+/// when applying either the "reverse suffix" or "reverse inner" optimizations.
+///
+/// When this error occurs, callers should abandon the "reverse" optimization
+/// and use a normal forward search.
+#[derive(Debug)]
+pub(crate) struct RetryQuadraticError(());
+
+impl RetryQuadraticError {
+    pub(crate) fn new() -> RetryQuadraticError {
+        RetryQuadraticError(())
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for RetryQuadraticError {}
+
+impl core::fmt::Display for RetryQuadraticError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "regex engine gave up to avoid quadratic behavior")
+    }
+}
+
+impl From<RetryQuadraticError> for RetryError {
+    fn from(err: RetryQuadraticError) -> RetryError {
+        RetryError::Quadratic(err)
+    }
+}
+
+/// An error that occurs when a regex engine "gives up" for some reason before
+/// finishing a search. Usually this occurs because of heuristic Unicode word
+/// boundary support or because of ineffective cache usage in the lazy DFA.
+///
+/// When this error occurs, callers should retry the regex search with a
+/// different regex engine.
+///
+/// Note that this has convenient `From` impls that will automatically
+/// convert a `MatchError` into this error. This works because the meta
+/// regex engine internals guarantee that errors like `HaystackTooLong` and
+/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and
+/// `GaveUp`, which both correspond to this "failure" error.
+#[derive(Debug)]
+pub(crate) struct RetryFailError {
+    offset: usize,
+}
+
+impl RetryFailError {
+    pub(crate) fn from_offset(offset: usize) -> RetryFailError {
+        RetryFailError { offset }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for RetryFailError {}
+
+impl core::fmt::Display for RetryFailError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "regex engine failed at offset {:?}", self.offset)
+    }
+}
+
+impl From<RetryFailError> for RetryError {
+    fn from(err: RetryFailError) -> RetryError {
+        RetryError::Fail(err)
+    }
+}
+
+impl From<MatchError> for RetryFailError {
+    fn from(merr: MatchError) -> RetryFailError {
+        use crate::util::search::MatchErrorKind::*;
+
+        match *merr.kind() {
+            Quit { offset, .. } => RetryFailError::from_offset(offset),
+            GaveUp { offset } => RetryFailError::from_offset(offset),
+            // These can never occur because we avoid them by construction
+            // or with higher level control flow logic. For example, the
+            // backtracker's wrapper will never hand out a backtracker engine
+            // when the haystack would be too long.
+            HaystackTooLong { .. } | UnsupportedAnchored { .. } => {
+                unreachable!("found impossible error in meta engine: {merr}")
+            }
+        }
+    }
+}
--- a/vendor/regex-automata/src/meta/limited.rs
+++ b/vendor/regex-automata/src/meta/limited.rs
@@ -0,0 +1,251 @@
+/*!
+This module defines two bespoke reverse DFA searching routines. (One for the
+lazy DFA and one for the fully compiled DFA.) These routines differ from the
+usual ones by permitting the caller to specify a minimum starting position.
+That is, the search will begin at `input.end()` and will usually stop at
+`input.start()`, unless `min_start > input.start()`, in which case, the search
+will stop at `min_start`.
+
+In other words, this lets you say, "no, the search must not extend past this
+point, even if it's within the bounds of the given `Input`." And if the search
+*does* want to go past that point, it stops and returns a "may be quadratic"
+error, which indicates that the caller should retry using some other technique.
+
+These routines specifically exist to protect against quadratic behavior when
+employing the "reverse suffix" and "reverse inner" optimizations. Without the
+backstop these routines provide, it is possible for parts of the haystack to
+get re-scanned over and over again. The backstop not only prevents this, but
+*tells you when it is happening* so that you can change the strategy.
+
+Why can't we just use the normal search routines? We could use the normal
+search routines and just set the start bound on the provided `Input` to our
+`min_start` position. The problem here is that it's impossible to distinguish
+between "no match because we reached the end of input" and "determined there
+was no match well before the end of input." The former case is what we care
+about with respect to quadratic behavior. The latter case is totally fine.
+
+Why don't we modify the normal search routines to report the position at which
+the search stops? I considered this, and I still wonder if it is indeed the
+right thing to do. However, I think the straight-forward thing to do there
+would be to complicate the return type signature of almost every search routine
+in this crate, which I really do not want to do. It therefore might make more
+sense to provide a richer way for search routines to report meta data, but that
+was beyond my bandwidth to work on at the time of writing.
+
+See the 'opt/reverse-inner' and 'opt/reverse-suffix' benchmarks in rebar for a
+real demonstration of how quadratic behavior is mitigated.
+*/
+
+use crate::{
+    meta::error::{RetryError, RetryQuadraticError},
+    HalfMatch, Input, MatchError,
+};
+
+#[cfg(feature = "dfa-build")]
+pub(crate) fn dfa_try_search_half_rev(
+    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
+    input: &Input<'_>,
+    min_start: usize,
+) -> Result<Option<HalfMatch>, RetryError> {
+    use crate::dfa::Automaton;
+
+    let mut mat = None;
+    let mut sid = dfa.start_state_reverse(input)?;
+    if input.start() == input.end() {
+        dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?;
+        return Ok(mat);
+    }
+    let mut at = input.end() - 1;
+    loop {
+        sid = dfa.next_state(sid, input.haystack()[at]);
+        if dfa.is_special_state(sid) {
+            if dfa.is_match_state(sid) {
+                let pattern = dfa.match_pattern(sid, 0);
+                // Since reverse searches report the beginning of a
+                // match and the beginning is inclusive (not exclusive
+                // like the end of a match), we add 1 to make it
+                // inclusive.
+                mat = Some(HalfMatch::new(pattern, at + 1));
+            } else if dfa.is_dead_state(sid) {
+                return Ok(mat);
+            } else if dfa.is_quit_state(sid) {
+                return Err(MatchError::quit(input.haystack()[at], at).into());
+            }
+        }
+        if at == input.start() {
+            break;
+        }
+        at -= 1;
+        if at < min_start {
+            trace!(
+                "reached position {at} which is before the previous literal \
+				 match, quitting to avoid quadratic behavior",
+            );
+            return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+        }
+    }
+    let was_dead = dfa.is_dead_state(sid);
+    dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?;
+    // If we reach the beginning of the search and we could otherwise still
+    // potentially keep matching if there was more to match, then we actually
+    // return an error to indicate giving up on this optimization. Why? Because
+    // we can't prove that the real match begins at where we would report it.
+    //
+    // This only happens when all of the following are true:
+    //
+    // 1) We reach the starting point of our search span.
+    // 2) The match we found is before the starting point.
+    // 3) The FSM reports we could possibly find a longer match.
+    //
+    // We need (1) because otherwise the search stopped before the starting
+    // point and there is no possible way to find a more leftmost position.
+    //
+    // We need (2) because if the match found has an offset equal to the minimum
+    // possible offset, then there is no possible more leftmost match.
+    //
+    // We need (3) because if the FSM couldn't continue anyway (i.e., it's in
+    // a dead state), then we know we couldn't find anything more leftmost
+    // than what we have. (We have to check the state we were in prior to the
+    // EOI transition since the EOI transition will usually bring us to a dead
+    // state by virtue of it represents the end-of-input.)
+    if at == input.start()
+        && mat.map_or(false, |m| m.offset() > input.start())
+        && !was_dead
+    {
+        trace!(
+            "reached beginning of search at offset {at} without hitting \
+             a dead state, quitting to avoid potential false positive match",
+        );
+        return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+    }
+    Ok(mat)
+}
+
+#[cfg(feature = "hybrid")]
+pub(crate) fn hybrid_try_search_half_rev(
+    dfa: &crate::hybrid::dfa::DFA,
+    cache: &mut crate::hybrid::dfa::Cache,
+    input: &Input<'_>,
+    min_start: usize,
+) -> Result<Option<HalfMatch>, RetryError> {
+    let mut mat = None;
+    let mut sid = dfa.start_state_reverse(cache, input)?;
+    if input.start() == input.end() {
+        hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
+        return Ok(mat);
+    }
+    let mut at = input.end() - 1;
+    loop {
+        sid = dfa
+            .next_state(cache, sid, input.haystack()[at])
+            .map_err(|_| MatchError::gave_up(at))?;
+        if sid.is_tagged() {
+            if sid.is_match() {
+                let pattern = dfa.match_pattern(cache, sid, 0);
+                // Since reverse searches report the beginning of a
+                // match and the beginning is inclusive (not exclusive
+                // like the end of a match), we add 1 to make it
+                // inclusive.
+                mat = Some(HalfMatch::new(pattern, at + 1));
+            } else if sid.is_dead() {
+                return Ok(mat);
+            } else if sid.is_quit() {
+                return Err(MatchError::quit(input.haystack()[at], at).into());
+            }
+        }
+        if at == input.start() {
+            break;
+        }
+        at -= 1;
+        if at < min_start {
+            trace!(
+                "reached position {at} which is before the previous literal \
+				 match, quitting to avoid quadratic behavior",
+            );
+            return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+        }
+    }
+    let was_dead = sid.is_dead();
+    hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
+    // See the comments in the full DFA routine above for why we need this.
+    if at == input.start()
+        && mat.map_or(false, |m| m.offset() > input.start())
+        && !was_dead
+    {
+        trace!(
+            "reached beginning of search at offset {at} without hitting \
+             a dead state, quitting to avoid potential false positive match",
+        );
+        return Err(RetryError::Quadratic(RetryQuadraticError::new()));
+    }
+    Ok(mat)
+}
+
+#[cfg(feature = "dfa-build")]
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn dfa_eoi_rev(
+    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
+    input: &Input<'_>,
+    sid: &mut crate::util::primitives::StateID,
+    mat: &mut Option<HalfMatch>,
+) -> Result<(), MatchError> {
+    use crate::dfa::Automaton;
+
+    let sp = input.get_span();
+    if sp.start > 0 {
+        let byte = input.haystack()[sp.start - 1];
+        *sid = dfa.next_state(*sid, byte);
+        if dfa.is_match_state(*sid) {
+            let pattern = dfa.match_pattern(*sid, 0);
+            *mat = Some(HalfMatch::new(pattern, sp.start));
+        } else if dfa.is_quit_state(*sid) {
+            return Err(MatchError::quit(byte, sp.start - 1));
+        }
+    } else {
+        *sid = dfa.next_eoi_state(*sid);
+        if dfa.is_match_state(*sid) {
+            let pattern = dfa.match_pattern(*sid, 0);
+            *mat = Some(HalfMatch::new(pattern, 0));
+        }
+        // N.B. We don't have to check 'is_quit' here because the EOI
+        // transition can never lead to a quit state.
+        debug_assert!(!dfa.is_quit_state(*sid));
+    }
+    Ok(())
+}
+
+#[cfg(feature = "hybrid")]
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn hybrid_eoi_rev(
+    dfa: &crate::hybrid::dfa::DFA,
+    cache: &mut crate::hybrid::dfa::Cache,
+    input: &Input<'_>,
+    sid: &mut crate::hybrid::LazyStateID,
+    mat: &mut Option<HalfMatch>,
+) -> Result<(), MatchError> {
+    let sp = input.get_span();
+    if sp.start > 0 {
+        let byte = input.haystack()[sp.start - 1];
+        *sid = dfa
+            .next_state(cache, *sid, byte)
+            .map_err(|_| MatchError::gave_up(sp.start))?;
+        if sid.is_match() {
+            let pattern = dfa.match_pattern(cache, *sid, 0);
+            *mat = Some(HalfMatch::new(pattern, sp.start));
+        } else if sid.is_quit() {
+            return Err(MatchError::quit(byte, sp.start - 1));
+        }
+    } else {
+        *sid = dfa
+            .next_eoi_state(cache, *sid)
+            .map_err(|_| MatchError::gave_up(sp.start))?;
+        if sid.is_match() {
+            let pattern = dfa.match_pattern(cache, *sid, 0);
+            *mat = Some(HalfMatch::new(pattern, 0));
+        }
+        // N.B. We don't have to check 'is_quit' here because the EOI
+        // transition can never lead to a quit state.
+        debug_assert!(!sid.is_quit());
+    }
+    Ok(())
+}
--- a/vendor/regex-automata/src/meta/literal.rs
+++ b/vendor/regex-automata/src/meta/literal.rs
@@ -0,0 +1,81 @@
+use alloc::{vec, vec::Vec};
+
+use regex_syntax::hir::Hir;
+
+use crate::{meta::regex::RegexInfo, util::search::MatchKind};
+
+/// Pull out an alternation of literals from the given sequence of HIR
+/// expressions.
+///
+/// There are numerous ways for this to fail. Generally, this only applies
+/// to regexes of the form 'foo|bar|baz|...|quux'. It can also fail if there
+/// are "too few" alternates, in which case, the regex engine is likely faster.
+///
+/// And currently, this only returns something when 'hirs.len() == 1'.
+pub(crate) fn alternation_literals(
+    info: &RegexInfo,
+    hirs: &[&Hir],
+) -> Option<Vec<Vec<u8>>> {
+    use regex_syntax::hir::{HirKind, Literal};
+
+    // Might as well skip the work below if we know we can't build an
+    // Aho-Corasick searcher.
+    if !cfg!(feature = "perf-literal-multisubstring") {
+        return None;
+    }
+    // This is pretty hacky, but basically, if `is_alternation_literal` is
+    // true, then we can make several assumptions about the structure of our
+    // HIR. This is what justifies the `unreachable!` statements below.
+    if hirs.len() != 1
+        || !info.props()[0].look_set().is_empty()
+        || info.props()[0].explicit_captures_len() > 0
+        || !info.props()[0].is_alternation_literal()
+        || info.config().get_match_kind() != MatchKind::LeftmostFirst
+    {
+        return None;
+    }
+    let hir = &hirs[0];
+    let alts = match *hir.kind() {
+        HirKind::Alternation(ref alts) => alts,
+        _ => return None, // one literal isn't worth it
+    };
+
+    let mut lits = vec![];
+    for alt in alts {
+        let mut lit = vec![];
+        match *alt.kind() {
+            HirKind::Literal(Literal(ref bytes)) => {
+                lit.extend_from_slice(bytes)
+            }
+            HirKind::Concat(ref exprs) => {
+                for e in exprs {
+                    match *e.kind() {
+                        HirKind::Literal(Literal(ref bytes)) => {
+                            lit.extend_from_slice(bytes);
+                        }
+                        _ => unreachable!("expected literal, got {e:?}"),
+                    }
+                }
+            }
+            _ => unreachable!("expected literal or concat, got {alt:?}"),
+        }
+        lits.push(lit);
+    }
+    // Why do this? Well, when the number of literals is small, it's likely
+    // that we'll use the lazy DFA which is in turn likely to be faster than
+    // Aho-Corasick in such cases. Primarily because Aho-Corasick doesn't have
+    // a "lazy DFA" but either a contiguous NFA or a full DFA. We rarely use
+    // the latter because it is so hungry (in time and space), and the former
+    // is decently fast, but not as fast as a well oiled lazy DFA.
+    //
+    // However, once the number starts getting large, the lazy DFA is likely
+    // to start thrashing because of the modest default cache size. When
+    // exactly does this happen? Dunno. But at whatever point that is (we make
+    // a guess below based on ad hoc benchmarking), we'll want to cut over to
+    // Aho-Corasick, where even the contiguous NFA is likely to do much better.
+    if lits.len() < 3000 {
+        debug!("skipping Aho-Corasick because there are too few literals");
+        return None;
+    }
+    Some(lits)
+}
--- a/vendor/regex-automata/src/meta/mod.rs
+++ b/vendor/regex-automata/src/meta/mod.rs
@@ -0,0 +1,62 @@
+/*!
+Provides a regex matcher that composes several other regex matchers
+automatically.
+
+This module is home to a meta [`Regex`], which provides a convenient high
+level API for executing regular expressions in linear time.
+
+# Comparison with the `regex` crate
+
+A meta `Regex` is the implementation used directly by the `regex` crate.
+Indeed, the `regex` crate API is essentially just a light wrapper over a meta
+`Regex`. This means that if you need the full flexibility offered by this
+API, then you should be able to switch to using this API directly without
+any changes in match semantics or syntax. However, there are some API level
+differences:
+
+* The `regex` crate API returns match objects that include references to the
+haystack itself, which in turn makes it easy to access the matching strings
+without having to slice the haystack yourself. In contrast, a meta `Regex`
+returns match objects that only have offsets in them.
+* At time of writing, a meta `Regex` doesn't have some of the convenience
+routines that the `regex` crate has, such as replacements. Note though that
+[`Captures::interpolate_string`](crate::util::captures::Captures::interpolate_string)
+will handle the replacement string interpolation for you.
+* A meta `Regex` supports the [`Input`](crate::Input) abstraction, which
+provides a way to configure a search in more ways than is supported by the
+`regex` crate. For example, [`Input::anchored`](crate::Input::anchored) can
+be used to run an anchored search, regardless of whether the pattern is itself
+anchored with a `^`.
+* A meta `Regex` supports multi-pattern searching everywhere.
+Indeed, every [`Match`](crate::Match) returned by the search APIs
+include a [`PatternID`](crate::PatternID) indicating which pattern
+matched. In the single pattern case, all matches correspond to
+[`PatternID::ZERO`](crate::PatternID::ZERO). In contrast, the `regex` crate
+has distinct `Regex` and a `RegexSet` APIs. The former only supports a single
+pattern, while the latter supports multiple patterns but cannot report the
+offsets of a match.
+* A meta `Regex` provides the explicit capability of bypassing its internal
+memory pool for automatically acquiring mutable scratch space required by its
+internal regex engines. Namely, a [`Cache`] can be explicitly provided to lower
+level routines such as [`Regex::search_with`].
+
+*/
+
+pub use self::{
+    error::BuildError,
+    regex::{
+        Builder, Cache, CapturesMatches, Config, FindMatches, Regex, Split,
+        SplitN,
+    },
+};
+
+mod error;
+#[cfg(any(feature = "dfa-build", feature = "hybrid"))]
+mod limited;
+mod literal;
+mod regex;
+mod reverse_inner;
+#[cfg(any(feature = "dfa-build", feature = "hybrid"))]
+mod stopat;
+mod strategy;
+mod wrappers;
--- a/vendor/regex-automata/src/meta/regex.rs
+++ b/vendor/regex-automata/src/meta/regex.rs
--- a/vendor/regex-automata/src/meta/reverse_inner.rs
+++ b/vendor/regex-automata/src/meta/reverse_inner.rs
@@ -0,0 +1,220 @@
+/*!
+A module dedicated to plucking inner literals out of a regex pattern, and
+then constructing a prefilter for them. We also include a regex pattern
+"prefix" that corresponds to the bits of the regex that need to match before
+the literals do. The reverse inner optimization then proceeds by looking for
+matches of the inner literal(s), and then doing a reverse search of the prefix
+from the start of the literal match to find the overall start position of the
+match.
+
+The essential invariant we want to uphold here is that the literals we return
+reflect a set where *at least* one of them must match in order for the overall
+regex to match. We also need to maintain the invariant that the regex prefix
+returned corresponds to the entirety of the regex up until the literals we
+return.
+
+This somewhat limits what we can do. That is, if we a regex like
+`\w+(@!|%%)\w+`, then we can pluck the `{@!, %%}` out and build a prefilter
+from it. Then we just need to compile `\w+` in reverse. No fuss no muss. But if
+we have a regex like \d+@!|\w+%%`, then we get kind of stymied. Technically,
+we could still extract `{@!, %%}`, and it is true that at least of them must
+match. But then, what is our regex prefix? Again, in theory, that could be
+`\d+|\w+`, but that's not quite right, because the `\d+` only matches when `@!`
+matches, and `\w+` only matches when `%%` matches.
+
+All of that is technically possible to do, but it seemingly requires a lot of
+sophistication and machinery. Probably the way to tackle that is with some kind
+of formalism and approach this problem more generally.
+
+For now, the code below basically just looks for a top-level concatenation.
+And if it can find one, it looks for literals in each of the direct child
+sub-expressions of that concatenation. If some good ones are found, we return
+those and a concatenation of the Hir expressions seen up to that point.
+*/
+
+use alloc::vec::Vec;
+
+use regex_syntax::hir::{self, literal, Hir, HirKind};
+
+use crate::{util::prefilter::Prefilter, MatchKind};
+
+/// Attempts to extract an "inner" prefilter from the given HIR expressions. If
+/// one was found, then a concatenation of the HIR expressions that precede it
+/// is returned.
+///
+/// The idea here is that the prefilter returned can be used to find candidate
+/// matches. And then the HIR returned can be used to build a reverse regex
+/// matcher, which will find the start of the candidate match. Finally, the
+/// match still has to be confirmed with a normal anchored forward scan to find
+/// the end position of the match.
+///
+/// Note that this assumes leftmost-first match semantics, so callers must
+/// not call this otherwise.
+pub(crate) fn extract(hirs: &[&Hir]) -> Option<(Hir, Prefilter)> {
+    if hirs.len() != 1 {
+        debug!(
+            "skipping reverse inner optimization since it only \
+		 	 supports 1 pattern, {} were given",
+            hirs.len(),
+        );
+        return None;
+    }
+    let mut concat = match top_concat(hirs[0]) {
+        Some(concat) => concat,
+        None => {
+            debug!(
+                "skipping reverse inner optimization because a top-level \
+		 	     concatenation could not found",
+            );
+            return None;
+        }
+    };
+    // We skip the first HIR because if it did have a prefix prefilter in it,
+    // we probably wouldn't be here looking for an inner prefilter.
+    for i in 1..concat.len() {
+        let hir = &concat[i];
+        let pre = match prefilter(hir) {
+            None => continue,
+            Some(pre) => pre,
+        };
+        // Even if we got a prefilter, if it isn't consider "fast," then we
+        // probably don't want to bother with it. Namely, since the reverse
+        // inner optimization requires some overhead, it likely only makes
+        // sense if the prefilter scan itself is (believed) to be much faster
+        // than the regex engine.
+        if !pre.is_fast() {
+            debug!(
+                "skipping extracted inner prefilter because \
+				 it probably isn't fast"
+            );
+            continue;
+        }
+        let concat_suffix = Hir::concat(concat.split_off(i));
+        let concat_prefix = Hir::concat(concat);
+        // Look for a prefilter again. Why? Because above we only looked for
+        // a prefilter on the individual 'hir', but we might be able to find
+        // something better and more discriminatory by looking at the entire
+        // suffix. We don't do this above to avoid making this loop worst case
+        // quadratic in the length of 'concat'.
+        let pre2 = match prefilter(&concat_suffix) {
+            None => pre,
+            Some(pre2) => {
+                if pre2.is_fast() {
+                    pre2
+                } else {
+                    pre
+                }
+            }
+        };
+        return Some((concat_prefix, pre2));
+    }
+    debug!(
+        "skipping reverse inner optimization because a top-level \
+	     sub-expression with a fast prefilter could not be found"
+    );
+    None
+}
+
+/// Attempt to extract a prefilter from an HIR expression.
+///
+/// We do a little massaging here to do our best that the prefilter we get out
+/// of this is *probably* fast. Basically, the false positive rate has a much
+/// higher impact for things like the reverse inner optimization because more
+/// work needs to potentially be done for each candidate match.
+///
+/// Note that this assumes leftmost-first match semantics, so callers must
+/// not call this otherwise.
+fn prefilter(hir: &Hir) -> Option<Prefilter> {
+    let mut extractor = literal::Extractor::new();
+    extractor.kind(literal::ExtractKind::Prefix);
+    let mut prefixes = extractor.extract(hir);
+    debug!(
+        "inner prefixes (len={:?}) extracted before optimization: {:?}",
+        prefixes.len(),
+        prefixes
+    );
+    // Since these are inner literals, we know they cannot be exact. But the
+    // extractor doesn't know this. We mark them as inexact because this might
+    // impact literal optimization. Namely, optimization weights "all literals
+    // are exact" as very high, because it presumes that any match results in
+    // an overall match. But of course, that is not the case here.
+    //
+    // In practice, this avoids plucking out a ASCII-only \s as an alternation
+    // of single-byte whitespace characters.
+    prefixes.make_inexact();
+    prefixes.optimize_for_prefix_by_preference();
+    debug!(
+        "inner prefixes (len={:?}) extracted after optimization: {:?}",
+        prefixes.len(),
+        prefixes
+    );
+    prefixes
+        .literals()
+        .and_then(|lits| Prefilter::new(MatchKind::LeftmostFirst, lits))
+}
+
+/// Looks for a "top level" HirKind::Concat item in the given HIR. This will
+/// try to return one even if it's embedded in a capturing group, but is
+/// otherwise pretty conservative in what is returned.
+///
+/// The HIR returned is a complete copy of the concat with all capturing
+/// groups removed. In effect, the concat returned is "flattened" with respect
+/// to capturing groups. This makes the detection logic above for prefixes
+/// a bit simpler, and it works because 1) capturing groups never influence
+/// whether a match occurs or not and 2) capturing groups are not used when
+/// doing the reverse inner search to find the start of the match.
+fn top_concat(mut hir: &Hir) -> Option<Vec<Hir>> {
+    loop {
+        hir = match hir.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_)
+            | HirKind::Repetition(_)
+            | HirKind::Alternation(_) => return None,
+            HirKind::Capture(hir::Capture { ref sub, .. }) => sub,
+            HirKind::Concat(ref subs) => {
+                // We are careful to only do the flattening/copy when we know
+                // we have a "top level" concat we can inspect. This avoids
+                // doing extra work in cases where we definitely won't use it.
+                // (This might still be wasted work if we can't go on to find
+                // some literals to extract.)
+                let concat =
+                    Hir::concat(subs.iter().map(|h| flatten(h)).collect());
+                return match concat.into_kind() {
+                    HirKind::Concat(xs) => Some(xs),
+                    // It is actually possible for this case to occur, because
+                    // 'Hir::concat' might simplify the expression to the point
+                    // that concatenations are actually removed. One wonders
+                    // whether this leads to other cases where we should be
+                    // extracting literals, but in theory, I believe if we do
+                    // get here, then it means that a "real" prefilter failed
+                    // to be extracted and we should probably leave well enough
+                    // alone. (A "real" prefilter is unbothered by "top-level
+                    // concats" and "capturing groups.")
+                    _ => return None,
+                };
+            }
+        };
+    }
+}
+
+/// Returns a copy of the given HIR but with all capturing groups removed.
+fn flatten(hir: &Hir) -> Hir {
+    match hir.kind() {
+        HirKind::Empty => Hir::empty(),
+        HirKind::Literal(hir::Literal(ref x)) => Hir::literal(x.clone()),
+        HirKind::Class(ref x) => Hir::class(x.clone()),
+        HirKind::Look(ref x) => Hir::look(x.clone()),
+        HirKind::Repetition(ref x) => Hir::repetition(x.with(flatten(&x.sub))),
+        // This is the interesting case. We just drop the group information
+        // entirely and use the child HIR itself.
+        HirKind::Capture(hir::Capture { ref sub, .. }) => flatten(sub),
+        HirKind::Alternation(ref xs) => {
+            Hir::alternation(xs.iter().map(|x| flatten(x)).collect())
+        }
+        HirKind::Concat(ref xs) => {
+            Hir::concat(xs.iter().map(|x| flatten(x)).collect())
+        }
+    }
+}
--- a/vendor/regex-automata/src/meta/stopat.rs
+++ b/vendor/regex-automata/src/meta/stopat.rs
@@ -0,0 +1,212 @@
+/*!
+This module defines two bespoke forward DFA search routines. One for the lazy
+DFA and one for the fully compiled DFA. These routines differ from the normal
+ones by reporting the position at which the search terminates when a match
+*isn't* found.
+
+This position at which a search terminates is useful in contexts where the meta
+regex engine runs optimizations that could go quadratic if we aren't careful.
+Namely, a regex search *could* scan to the end of the haystack only to report a
+non-match. If the caller doesn't know that the search scanned to the end of the
+haystack, it might restart the search at the next literal candidate it finds
+and repeat the process.
+
+Providing the caller with the position at which the search stopped provides a
+way for the caller to determine the point at which subsequent scans should not
+pass. This is principally used in the "reverse inner" optimization, which works
+like this:
+
+1. Look for a match of an inner literal. Say, 'Z' in '\w+Z\d+'.
+2. At the spot where 'Z' matches, do a reverse anchored search from there for
+'\w+'.
+3. If the reverse search matches, it corresponds to the start position of a
+(possible) match. At this point, do a forward anchored search to find the end
+position. If an end position is found, then we have a match and we know its
+bounds.
+
+If the forward anchored search in (3) searches the entire rest of the haystack
+but reports a non-match, then a naive implementation of the above will continue
+back at step 1 looking for more candidates. There might still be a match to be
+found! It's possible. But we already scanned the whole haystack. So if we keep
+repeating the process, then we might wind up taking quadratic time in the size
+of the haystack, which is not great.
+
+So if the forward anchored search in (3) reports the position at which it
+stops, then we can detect whether quadratic behavior might be occurring in
+steps (1) and (2). For (1), it occurs if the literal candidate found occurs
+*before* the end of the previous search in (3), since that means we're now
+going to look for another match in a place where the forward search has already
+scanned. It is *correct* to do so, but our technique has become inefficient.
+For (2), quadratic behavior occurs similarly when its reverse search extends
+past the point where the previous forward search in (3) terminated. Indeed, to
+implement (2), we use the sibling 'limited' module for ensuring our reverse
+scan doesn't go further than we want.
+
+See the 'opt/reverse-inner' benchmarks in rebar for a real demonstration of
+how quadratic behavior is mitigated.
+*/
+
+use crate::{meta::error::RetryFailError, HalfMatch, Input, MatchError};
+
+#[cfg(feature = "dfa-build")]
+pub(crate) fn dfa_try_search_half_fwd(
+    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
+    input: &Input<'_>,
+) -> Result<Result<HalfMatch, usize>, RetryFailError> {
+    use crate::dfa::{accel, Automaton};
+
+    let mut mat = None;
+    let mut sid = dfa.start_state_forward(input)?;
+    let mut at = input.start();
+    while at < input.end() {
+        sid = dfa.next_state(sid, input.haystack()[at]);
+        if dfa.is_special_state(sid) {
+            if dfa.is_match_state(sid) {
+                let pattern = dfa.match_pattern(sid, 0);
+                mat = Some(HalfMatch::new(pattern, at));
+                if input.get_earliest() {
+                    return Ok(mat.ok_or(at));
+                }
+                if dfa.is_accel_state(sid) {
+                    let needs = dfa.accelerator(sid);
+                    at = accel::find_fwd(needs, input.haystack(), at)
+                        .unwrap_or(input.end());
+                    continue;
+                }
+            } else if dfa.is_accel_state(sid) {
+                let needs = dfa.accelerator(sid);
+                at = accel::find_fwd(needs, input.haystack(), at)
+                    .unwrap_or(input.end());
+                continue;
+            } else if dfa.is_dead_state(sid) {
+                return Ok(mat.ok_or(at));
+            } else if dfa.is_quit_state(sid) {
+                return Err(MatchError::quit(input.haystack()[at], at).into());
+            } else {
+                // Ideally we wouldn't use a DFA that specialized start states
+                // and thus 'is_start_state()' could never be true here, but in
+                // practice we reuse the DFA created for the full regex which
+                // will specialize start states whenever there is a prefilter.
+                debug_assert!(dfa.is_start_state(sid));
+            }
+        }
+        at += 1;
+    }
+    dfa_eoi_fwd(dfa, input, &mut sid, &mut mat)?;
+    Ok(mat.ok_or(at))
+}
+
+#[cfg(feature = "hybrid")]
+pub(crate) fn hybrid_try_search_half_fwd(
+    dfa: &crate::hybrid::dfa::DFA,
+    cache: &mut crate::hybrid::dfa::Cache,
+    input: &Input<'_>,
+) -> Result<Result<HalfMatch, usize>, RetryFailError> {
+    let mut mat = None;
+    let mut sid = dfa.start_state_forward(cache, input)?;
+    let mut at = input.start();
+    while at < input.end() {
+        sid = dfa
+            .next_state(cache, sid, input.haystack()[at])
+            .map_err(|_| MatchError::gave_up(at))?;
+        if sid.is_tagged() {
+            if sid.is_match() {
+                let pattern = dfa.match_pattern(cache, sid, 0);
+                mat = Some(HalfMatch::new(pattern, at));
+                if input.get_earliest() {
+                    return Ok(mat.ok_or(at));
+                }
+            } else if sid.is_dead() {
+                return Ok(mat.ok_or(at));
+            } else if sid.is_quit() {
+                return Err(MatchError::quit(input.haystack()[at], at).into());
+            } else {
+                // We should NEVER get an unknown state ID back from
+                // dfa.next_state().
+                debug_assert!(!sid.is_unknown());
+                // Ideally we wouldn't use a lazy DFA that specialized start
+                // states and thus 'sid.is_start()' could never be true here,
+                // but in practice we reuse the lazy DFA created for the full
+                // regex which will specialize start states whenever there is
+                // a prefilter.
+                debug_assert!(sid.is_start());
+            }
+        }
+        at += 1;
+    }
+    hybrid_eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?;
+    Ok(mat.ok_or(at))
+}
+
+#[cfg(feature = "dfa-build")]
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn dfa_eoi_fwd(
+    dfa: &crate::dfa::dense::DFA<alloc::vec::Vec<u32>>,
+    input: &Input<'_>,
+    sid: &mut crate::util::primitives::StateID,
+    mat: &mut Option<HalfMatch>,
+) -> Result<(), MatchError> {
+    use crate::dfa::Automaton;
+
+    let sp = input.get_span();
+    match input.haystack().get(sp.end) {
+        Some(&b) => {
+            *sid = dfa.next_state(*sid, b);
+            if dfa.is_match_state(*sid) {
+                let pattern = dfa.match_pattern(*sid, 0);
+                *mat = Some(HalfMatch::new(pattern, sp.end));
+            } else if dfa.is_quit_state(*sid) {
+                return Err(MatchError::quit(b, sp.end));
+            }
+        }
+        None => {
+            *sid = dfa.next_eoi_state(*sid);
+            if dfa.is_match_state(*sid) {
+                let pattern = dfa.match_pattern(*sid, 0);
+                *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
+            }
+            // N.B. We don't have to check 'is_quit' here because the EOI
+            // transition can never lead to a quit state.
+            debug_assert!(!dfa.is_quit_state(*sid));
+        }
+    }
+    Ok(())
+}
+
+#[cfg(feature = "hybrid")]
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn hybrid_eoi_fwd(
+    dfa: &crate::hybrid::dfa::DFA,
+    cache: &mut crate::hybrid::dfa::Cache,
+    input: &Input<'_>,
+    sid: &mut crate::hybrid::LazyStateID,
+    mat: &mut Option<HalfMatch>,
+) -> Result<(), MatchError> {
+    let sp = input.get_span();
+    match input.haystack().get(sp.end) {
+        Some(&b) => {
+            *sid = dfa
+                .next_state(cache, *sid, b)
+                .map_err(|_| MatchError::gave_up(sp.end))?;
+            if sid.is_match() {
+                let pattern = dfa.match_pattern(cache, *sid, 0);
+                *mat = Some(HalfMatch::new(pattern, sp.end));
+            } else if sid.is_quit() {
+                return Err(MatchError::quit(b, sp.end));
+            }
+        }
+        None => {
+            *sid = dfa
+                .next_eoi_state(cache, *sid)
+                .map_err(|_| MatchError::gave_up(input.haystack().len()))?;
+            if sid.is_match() {
+                let pattern = dfa.match_pattern(cache, *sid, 0);
+                *mat = Some(HalfMatch::new(pattern, input.haystack().len()));
+            }
+            // N.B. We don't have to check 'is_quit' here because the EOI
+            // transition can never lead to a quit state.
+            debug_assert!(!sid.is_quit());
+        }
+    }
+    Ok(())
+}
--- a/vendor/regex-automata/src/meta/strategy.rs
+++ b/vendor/regex-automata/src/meta/strategy.rs
--- a/vendor/regex-automata/src/meta/wrappers.rs
+++ b/vendor/regex-automata/src/meta/wrappers.rs