Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

307
vendor/memchr/src/tests/memchr/mod.rs vendored Normal file
View File

@@ -0,0 +1,307 @@
use alloc::{
string::{String, ToString},
vec,
vec::Vec,
};
use crate::ext::Byte;
pub(crate) mod naive;
#[macro_use]
pub(crate) mod prop;
const SEEDS: &'static [Seed] = &[
Seed { haystack: "a", needles: &[b'a'], positions: &[0] },
Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] },
Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] },
Seed { haystack: "", needles: &[b'a'], positions: &[] },
Seed { haystack: "z", needles: &[b'a'], positions: &[] },
Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
Seed { haystack: "zza", needles: &[b'a'], positions: &[2] },
Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] },
Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] },
Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] },
Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] },
Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] },
Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] },
Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] },
Seed {
haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
needles: &[b'a'],
positions: &[32],
},
// two needles (applied to memchr2 + memchr3)
Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] },
Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] },
Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] },
Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] },
// three needles (applied to memchr3)
Seed {
haystack: "xyz",
needles: &[b'x', b'y', b'z'],
positions: &[0, 1, 2],
},
Seed {
haystack: "zxy",
needles: &[b'x', b'y', b'z'],
positions: &[0, 1, 2],
},
Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] },
Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] },
];
/// Runs a host of substring search tests.
///
/// This has support for "partial" substring search implementations only work
/// for a subset of needles/haystacks. For example, the "packed pair" substring
/// search implementation only works for haystacks of some minimum length based
/// of the pair of bytes selected and the size of the vector used.
pub(crate) struct Runner {
needle_len: usize,
}
impl Runner {
/// Create a new test runner for forward and reverse byte search
/// implementations.
///
/// The `needle_len` given must be at most `3` and at least `1`. It
/// corresponds to the number of needle bytes to search for.
pub(crate) fn new(needle_len: usize) -> Runner {
assert!(needle_len >= 1, "needle_len must be at least 1");
assert!(needle_len <= 3, "needle_len must be at most 3");
Runner { needle_len }
}
/// Run all tests. This panics on the first failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
pub(crate) fn forward_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let results = match test(t.haystack.as_bytes(), &t.needles) {
None => continue,
Some(results) => results,
};
assert_eq!(
t.expected,
results,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Run all tests in the reverse direction. This panics on the first
/// failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
pub(crate) fn reverse_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let mut results = match test(t.haystack.as_bytes(), &t.needles)
{
None => continue,
Some(results) => results,
};
results.reverse();
assert_eq!(
t.expected,
results,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Run all tests as counting tests. This panics on the first failure.
///
/// That is, this only checks that the number of matches is correct and
/// not whether the offsets of each match are.
pub(crate) fn count_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let got = match test(t.haystack.as_bytes(), &t.needles) {
None => continue,
Some(got) => got,
};
assert_eq!(
t.expected.len(),
got,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Like `Runner::forward`, but for a function that returns only the next
/// match and not all matches.
///
/// If the function returns `None`, then it is skipped.
pub(crate) fn forward_oneshot<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
{
self.forward_iter(move |haystack, needles| {
let mut start = 0;
let mut results = vec![];
while let Some(i) = test(&haystack[start..], needles)? {
results.push(start + i);
start += i + 1;
}
Some(results)
})
}
/// Like `Runner::reverse`, but for a function that returns only the last
/// match and not all matches.
///
/// If the function returns `None`, then it is skipped.
pub(crate) fn reverse_oneshot<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
{
self.reverse_iter(move |haystack, needles| {
let mut end = haystack.len();
let mut results = vec![];
while let Some(i) = test(&haystack[..end], needles)? {
results.push(i);
end = i;
}
Some(results)
})
}
}
/// A single test for memr?chr{,2,3}.
#[derive(Clone, Debug)]
struct Test {
/// The string to search in.
haystack: String,
/// The needles to look for.
needles: Vec<u8>,
/// The offsets that are expected to be found for all needles in the
/// forward direction.
expected: Vec<usize>,
}
impl Test {
fn new(seed: &Seed) -> Test {
Test {
haystack: seed.haystack.to_string(),
needles: seed.needles.to_vec(),
expected: seed.positions.to_vec(),
}
}
}
/// Data that can be expanded into many memchr tests by padding out the corpus.
#[derive(Clone, Debug)]
struct Seed {
/// The thing to search. We use `&str` instead of `&[u8]` because they
/// are nicer to write in tests, and we don't miss much since memchr
/// doesn't care about UTF-8.
///
/// Corpora cannot contain either '%' or '#'. We use these bytes when
/// expanding test cases into many test cases, and we assume they are not
/// used. If they are used, `memchr_tests` will panic.
haystack: &'static str,
/// The needles to search for. This is intended to be an alternation of
/// needles. The number of needles may cause this test to be skipped for
/// some memchr variants. For example, a test with 2 needles cannot be used
/// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
/// However, a test with only 1 needle can be used to test all of `memchr`,
/// `memchr2` and `memchr3`. We achieve this by filling in the needles with
/// bytes that we never used in the corpus (such as '#').
needles: &'static [u8],
/// The positions expected to match for all of the needles.
positions: &'static [usize],
}
impl Seed {
/// Controls how much we expand the haystack on either side for each test.
/// We lower this on Miri because otherwise running the tests would take
/// forever.
const EXPAND_LEN: usize = {
#[cfg(not(miri))]
{
515
}
#[cfg(miri)]
{
6
}
};
/// Expand this test into many variations of the same test.
///
/// In particular, this will generate more tests with larger corpus sizes.
/// The expected positions are updated to maintain the integrity of the
/// test.
///
/// This is important in testing a memchr implementation, because there are
/// often different cases depending on the length of the corpus.
///
/// Note that we extend the corpus by adding `%` bytes, which we
/// don't otherwise use as a needle.
fn generate(&self) -> impl Iterator<Item = Test> {
let mut more = vec![];
// Add bytes to the start of the corpus.
for i in 0..Seed::EXPAND_LEN {
let mut t = Test::new(self);
let mut new: String = core::iter::repeat('%').take(i).collect();
new.push_str(&t.haystack);
t.haystack = new;
t.expected = t.expected.into_iter().map(|p| p + i).collect();
more.push(t);
}
// Add bytes to the end of the corpus.
for i in 1..Seed::EXPAND_LEN {
let mut t = Test::new(self);
let padding: String = core::iter::repeat('%').take(i).collect();
t.haystack.push_str(&padding);
more.push(t);
}
more.into_iter()
}
}

33
vendor/memchr/src/tests/memchr/naive.rs vendored Normal file
View File

@@ -0,0 +1,33 @@
pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().position(|&b| b == n1)
}
pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().position(|&b| b == n1 || b == n2)
}
pub(crate) fn memchr3(
n1: u8,
n2: u8,
n3: u8,
haystack: &[u8],
) -> Option<usize> {
haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
}
pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().rposition(|&b| b == n1)
}
pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().rposition(|&b| b == n1 || b == n2)
}
pub(crate) fn memrchr3(
n1: u8,
n2: u8,
n3: u8,
haystack: &[u8],
) -> Option<usize> {
haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
}

323
vendor/memchr/src/tests/memchr/prop.rs vendored Normal file
View File

@@ -0,0 +1,323 @@
/// Defines a host of quickcheck tests for the given memchr searcher.
#[cfg(miri)]
#[macro_export]
macro_rules! define_memchr_quickcheck {
($($tt:tt)*) => {};
}
/// Defines a host of quickcheck tests for the given memchr searcher.
#[cfg(not(miri))]
#[macro_export]
macro_rules! define_memchr_quickcheck {
($mod:ident) => {
define_memchr_quickcheck!($mod, new);
};
($mod:ident, $cons:ident) => {
use alloc::vec::Vec;
use quickcheck::TestResult;
use crate::tests::memchr::{
naive,
prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter},
};
quickcheck::quickcheck! {
fn qc_memchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult {
let expected = naive::memchr(n1, &corpus);
let got = match $mod::One::$cons(n1) {
None => return TestResult::discard(),
Some(f) => f.find(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memrchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult {
let expected = naive::memrchr(n1, &corpus);
let got = match $mod::One::$cons(n1) {
None => return TestResult::discard(),
Some(f) => f.rfind(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult {
let expected = naive::memchr2(n1, n2, &corpus);
let got = match $mod::Two::$cons(n1, n2) {
None => return TestResult::discard(),
Some(f) => f.find(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult {
let expected = naive::memrchr2(n1, n2, &corpus);
let got = match $mod::Two::$cons(n1, n2) {
None => return TestResult::discard(),
Some(f) => f.rfind(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memchr3_matches_naive(
n1: u8, n2: u8, n3: u8,
corpus: Vec<u8>
) -> TestResult {
let expected = naive::memchr3(n1, n2, n3, &corpus);
let got = match $mod::Three::$cons(n1, n2, n3) {
None => return TestResult::discard(),
Some(f) => f.find(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memrchr3_matches_naive(
n1: u8, n2: u8, n3: u8,
corpus: Vec<u8>
) -> TestResult {
let expected = naive::memrchr3(n1, n2, n3, &corpus);
let got = match $mod::Three::$cons(n1, n2, n3) {
None => return TestResult::discard(),
Some(f) => f.rfind(&corpus),
};
TestResult::from_bool(expected == got)
}
fn qc_memchr_double_ended_iter(
needle: u8, data: Vec<u8>, take_side: Vec<bool>
) -> TestResult {
// make nonempty
let mut take_side = take_side;
if take_side.is_empty() { take_side.push(true) };
let finder = match $mod::One::$cons(needle) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let iter = finder.iter(&data);
let got = double_ended_take(
iter,
take_side.iter().cycle().cloned(),
);
let expected = naive1_iter(needle, &data);
TestResult::from_bool(got.iter().cloned().eq(expected))
}
fn qc_memchr2_double_ended_iter(
needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool>
) -> TestResult {
// make nonempty
let mut take_side = take_side;
if take_side.is_empty() { take_side.push(true) };
let finder = match $mod::Two::$cons(needle1, needle2) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let iter = finder.iter(&data);
let got = double_ended_take(
iter,
take_side.iter().cycle().cloned(),
);
let expected = naive2_iter(needle1, needle2, &data);
TestResult::from_bool(got.iter().cloned().eq(expected))
}
fn qc_memchr3_double_ended_iter(
needle1: u8, needle2: u8, needle3: u8,
data: Vec<u8>, take_side: Vec<bool>
) -> TestResult {
// make nonempty
let mut take_side = take_side;
if take_side.is_empty() { take_side.push(true) };
let finder = match $mod::Three::$cons(needle1, needle2, needle3) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let iter = finder.iter(&data);
let got = double_ended_take(
iter,
take_side.iter().cycle().cloned(),
);
let expected = naive3_iter(needle1, needle2, needle3, &data);
TestResult::from_bool(got.iter().cloned().eq(expected))
}
fn qc_memchr1_iter(data: Vec<u8>) -> TestResult {
let needle = 0;
let finder = match $mod::One::$cons(needle) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data);
let expected = naive1_iter(needle, &data);
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr1_rev_iter(data: Vec<u8>) -> TestResult {
let needle = 0;
let finder = match $mod::One::$cons(needle) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data).rev();
let expected = naive1_iter(needle, &data).rev();
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr2_iter(data: Vec<u8>) -> TestResult {
let needle1 = 0;
let needle2 = 1;
let finder = match $mod::Two::$cons(needle1, needle2) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data);
let expected = naive2_iter(needle1, needle2, &data);
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr2_rev_iter(data: Vec<u8>) -> TestResult {
let needle1 = 0;
let needle2 = 1;
let finder = match $mod::Two::$cons(needle1, needle2) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data).rev();
let expected = naive2_iter(needle1, needle2, &data).rev();
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr3_iter(data: Vec<u8>) -> TestResult {
let needle1 = 0;
let needle2 = 1;
let needle3 = 2;
let finder = match $mod::Three::$cons(needle1, needle2, needle3) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data);
let expected = naive3_iter(needle1, needle2, needle3, &data);
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr3_rev_iter(data: Vec<u8>) -> TestResult {
let needle1 = 0;
let needle2 = 1;
let needle3 = 2;
let finder = match $mod::Three::$cons(needle1, needle2, needle3) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let got = finder.iter(&data).rev();
let expected = naive3_iter(needle1, needle2, needle3, &data).rev();
TestResult::from_bool(got.eq(expected))
}
fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> TestResult {
// test that the size hint is within reasonable bounds
let needle = 0;
let finder = match $mod::One::$cons(needle) {
None => return TestResult::discard(),
Some(finder) => finder,
};
let mut iter = finder.iter(&data);
let mut real_count = data
.iter()
.filter(|&&elt| elt == needle)
.count();
while let Some(index) = iter.next() {
real_count -= 1;
let (lower, upper) = iter.size_hint();
assert!(lower <= real_count);
assert!(upper.unwrap() >= real_count);
assert!(upper.unwrap() <= data.len() - index);
}
TestResult::passed()
}
}
};
}
// take items from a DEI, taking front for each true and back for each false.
// Return a vector with the concatenation of the fronts and the reverse of the
// backs.
#[cfg(not(miri))]
pub(crate) fn double_ended_take<I, J>(
mut iter: I,
take_side: J,
) -> alloc::vec::Vec<I::Item>
where
I: DoubleEndedIterator,
J: Iterator<Item = bool>,
{
let mut found_front = alloc::vec![];
let mut found_back = alloc::vec![];
for take_front in take_side {
if take_front {
if let Some(pos) = iter.next() {
found_front.push(pos);
} else {
break;
}
} else {
if let Some(pos) = iter.next_back() {
found_back.push(pos);
} else {
break;
}
};
}
let mut all_found = found_front;
all_found.extend(found_back.into_iter().rev());
all_found
}
// return an iterator of the 0-based indices of haystack that match the needle
#[cfg(not(miri))]
pub(crate) fn naive1_iter<'a>(
n1: u8,
haystack: &'a [u8],
) -> impl DoubleEndedIterator<Item = usize> + 'a {
haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0)
}
#[cfg(not(miri))]
pub(crate) fn naive2_iter<'a>(
n1: u8,
n2: u8,
haystack: &'a [u8],
) -> impl DoubleEndedIterator<Item = usize> + 'a {
haystack
.iter()
.enumerate()
.filter(move |&(_, &b)| b == n1 || b == n2)
.map(|t| t.0)
}
#[cfg(not(miri))]
pub(crate) fn naive3_iter<'a>(
n1: u8,
n2: u8,
n3: u8,
haystack: &'a [u8],
) -> impl DoubleEndedIterator<Item = usize> + 'a {
haystack
.iter()
.enumerate()
.filter(move |&(_, &b)| b == n1 || b == n2 || b == n3)
.map(|t| t.0)
}

15
vendor/memchr/src/tests/mod.rs vendored Normal file
View File

@@ -0,0 +1,15 @@
#[macro_use]
pub(crate) mod memchr;
pub(crate) mod packedpair;
#[macro_use]
pub(crate) mod substring;
// For debugging, particularly in CI, print out the byte order of the current
// target.
#[test]
fn byte_order() {
#[cfg(target_endian = "little")]
std::eprintln!("LITTLE ENDIAN");
#[cfg(target_endian = "big")]
std::eprintln!("BIG ENDIAN");
}

216
vendor/memchr/src/tests/packedpair.rs vendored Normal file
View File

@@ -0,0 +1,216 @@
use alloc::{boxed::Box, vec, vec::Vec};
/// A set of "packed pair" test seeds. Each seed serves as the base for the
/// generation of many other tests. In essence, the seed captures the pair of
/// bytes we used for a predicate and first byte among our needle. The tests
/// generated from each seed essentially vary the length of the needle and
/// haystack, while using the rare/first byte configuration from the seed.
///
/// The purpose of this is to test many different needle/haystack lengths.
/// In particular, some of the vector optimizations might only have bugs
/// in haystacks of a certain size.
const SEEDS: &[Seed] = &[
// Why not use different 'first' bytes? It seemed like a good idea to be
// able to configure it, but when I wrote the test generator below, it
// didn't seem necessary to use for reasons that I forget.
Seed { first: b'x', index1: b'y', index2: b'z' },
Seed { first: b'x', index1: b'x', index2: b'z' },
Seed { first: b'x', index1: b'y', index2: b'x' },
Seed { first: b'x', index1: b'x', index2: b'x' },
Seed { first: b'x', index1: b'y', index2: b'y' },
];
/// Runs a host of "packed pair" search tests.
///
/// These tests specifically look for the occurrence of a possible substring
/// match based on a pair of bytes matching at the right offsets.
pub(crate) struct Runner {
fwd: Option<
Box<
dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
>,
>,
}
impl Runner {
/// Create a new test runner for "packed pair" substring search.
pub(crate) fn new() -> Runner {
Runner { fwd: None }
}
/// Run all tests. This panics on the first failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
///
/// This runs tests on both the forward and reverse implementations given.
/// If either (or both) are missing, then tests for that implementation are
/// skipped.
pub(crate) fn run(self) {
if let Some(mut fwd) = self.fwd {
for seed in SEEDS.iter() {
for t in seed.generate() {
match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
None => continue,
Some(result) => {
assert_eq!(
t.fwd, result,
"FORWARD, needle: {:?}, haystack: {:?}, \
index1: {:?}, index2: {:?}",
t.needle, t.haystack, t.index1, t.index2,
)
}
}
}
}
}
}
/// Set the implementation for forward "packed pair" substring search.
///
/// If the closure returns `None`, then it is assumed that the given
/// test cannot be applied to the particular implementation and it is
/// skipped. For example, if a particular implementation only supports
/// needles or haystacks for some minimum length.
///
/// If this is not set, then forward "packed pair" search is not tested.
pub(crate) fn fwd(
mut self,
search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
) -> Runner {
self.fwd = Some(Box::new(search));
self
}
}
/// A test that represents the input and expected output to a "packed pair"
/// search function. The test should be able to run with any "packed pair"
/// implementation and get the expected output.
struct Test {
haystack: Vec<u8>,
needle: Vec<u8>,
index1: u8,
index2: u8,
fwd: Option<usize>,
}
impl Test {
/// Create a new "packed pair" test from a seed and some given offsets to
/// the pair of bytes to use as a predicate in the seed's needle.
///
/// If a valid test could not be constructed, then None is returned.
/// (Currently, we take the approach of massaging tests to be valid
/// instead of rejecting them outright.)
fn new(
seed: Seed,
index1: usize,
index2: usize,
haystack_len: usize,
needle_len: usize,
fwd: Option<usize>,
) -> Option<Test> {
let mut index1: u8 = index1.try_into().unwrap();
let mut index2: u8 = index2.try_into().unwrap();
// The '#' byte is never used in a haystack (unless we're expecting
// a match), while the '@' byte is never used in a needle.
let mut haystack = vec![b'@'; haystack_len];
let mut needle = vec![b'#'; needle_len];
needle[0] = seed.first;
needle[index1 as usize] = seed.index1;
needle[index2 as usize] = seed.index2;
// If we're expecting a match, then make sure the needle occurs
// in the haystack at the expected position.
if let Some(i) = fwd {
haystack[i..i + needle.len()].copy_from_slice(&needle);
}
// If the operations above lead to rare offsets pointing to the
// non-first occurrence of a byte, then adjust it. This might lead
// to redundant tests, but it's simpler than trying to change the
// generation process I think.
if let Some(i) = crate::memchr(seed.index1, &needle) {
index1 = u8::try_from(i).unwrap();
}
if let Some(i) = crate::memchr(seed.index2, &needle) {
index2 = u8::try_from(i).unwrap();
}
Some(Test { haystack, needle, index1, index2, fwd })
}
}
/// Data that describes a single prefilter test seed.
#[derive(Clone, Copy)]
struct Seed {
first: u8,
index1: u8,
index2: u8,
}
impl Seed {
const NEEDLE_LENGTH_LIMIT: usize = {
#[cfg(not(miri))]
{
33
}
#[cfg(miri)]
{
5
}
};
const HAYSTACK_LENGTH_LIMIT: usize = {
#[cfg(not(miri))]
{
65
}
#[cfg(miri)]
{
8
}
};
/// Generate a series of prefilter tests from this seed.
fn generate(self) -> impl Iterator<Item = Test> {
let len_start = 2;
// The iterator below generates *a lot* of tests. The number of
// tests was chosen somewhat empirically to be "bearable" when
// running the test suite.
//
// We use an iterator here because the collective haystacks of all
// these test cases add up to enough memory to OOM a conservative
// sandbox or a small laptop.
(len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| {
let index_start = len_start - 1;
(index_start..needle_len).flat_map(move |index1| {
(index1..needle_len).flat_map(move |index2| {
(needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
move |haystack_len| {
Test::new(
self,
index1,
index2,
haystack_len,
needle_len,
None,
)
.into_iter()
.chain(
(0..=(haystack_len - needle_len)).flat_map(
move |output| {
Test::new(
self,
index1,
index2,
haystack_len,
needle_len,
Some(output),
)
},
),
)
},
)
})
})
})
}
}

232
vendor/memchr/src/tests/substring/mod.rs vendored Normal file
View File

@@ -0,0 +1,232 @@
/*!
This module defines tests and test helpers for substring implementations.
*/
use alloc::{
boxed::Box,
format,
string::{String, ToString},
};
pub(crate) mod naive;
#[macro_use]
pub(crate) mod prop;
const SEEDS: &'static [Seed] = &[
Seed::new("", "", Some(0), Some(0)),
Seed::new("", "a", Some(0), Some(1)),
Seed::new("", "ab", Some(0), Some(2)),
Seed::new("", "abc", Some(0), Some(3)),
Seed::new("a", "", None, None),
Seed::new("a", "a", Some(0), Some(0)),
Seed::new("a", "aa", Some(0), Some(1)),
Seed::new("a", "ba", Some(1), Some(1)),
Seed::new("a", "bba", Some(2), Some(2)),
Seed::new("a", "bbba", Some(3), Some(3)),
Seed::new("a", "bbbab", Some(3), Some(3)),
Seed::new("a", "bbbabb", Some(3), Some(3)),
Seed::new("a", "bbbabbb", Some(3), Some(3)),
Seed::new("a", "bbbbbb", None, None),
Seed::new("ab", "", None, None),
Seed::new("ab", "a", None, None),
Seed::new("ab", "b", None, None),
Seed::new("ab", "ab", Some(0), Some(0)),
Seed::new("ab", "aab", Some(1), Some(1)),
Seed::new("ab", "aaab", Some(2), Some(2)),
Seed::new("ab", "abaab", Some(0), Some(3)),
Seed::new("ab", "baaab", Some(3), Some(3)),
Seed::new("ab", "acb", None, None),
Seed::new("ab", "abba", Some(0), Some(0)),
Seed::new("abc", "ab", None, None),
Seed::new("abc", "abc", Some(0), Some(0)),
Seed::new("abc", "abcz", Some(0), Some(0)),
Seed::new("abc", "abczz", Some(0), Some(0)),
Seed::new("abc", "zabc", Some(1), Some(1)),
Seed::new("abc", "zzabc", Some(2), Some(2)),
Seed::new("abc", "azbc", None, None),
Seed::new("abc", "abzc", None, None),
Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)),
Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)),
Seed::new(
"xyz",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz",
Some(32),
Some(32),
),
Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)),
Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None),
];
/// Runs a host of substring search tests.
///
/// This has support for "partial" substring search implementations only work
/// for a subset of needles/haystacks. For example, the "packed pair" substring
/// search implementation only works for haystacks of some minimum length based
/// of the pair of bytes selected and the size of the vector used.
pub(crate) struct Runner {
fwd: Option<
Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>,
>,
rev: Option<
Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>,
>,
}
impl Runner {
/// Create a new test runner for forward and reverse substring search
/// implementations.
pub(crate) fn new() -> Runner {
Runner { fwd: None, rev: None }
}
/// Run all tests. This panics on the first failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
///
/// This runs tests on both the forward and reverse implementations given.
/// If either (or both) are missing, then tests for that implementation are
/// skipped.
pub(crate) fn run(self) {
if let Some(mut fwd) = self.fwd {
for seed in SEEDS.iter() {
for t in seed.generate() {
match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) {
None => continue,
Some(result) => {
assert_eq!(
t.fwd, result,
"FORWARD, needle: {:?}, haystack: {:?}",
t.needle, t.haystack,
);
}
}
}
}
}
if let Some(mut rev) = self.rev {
for seed in SEEDS.iter() {
for t in seed.generate() {
match rev(t.haystack.as_bytes(), t.needle.as_bytes()) {
None => continue,
Some(result) => {
assert_eq!(
t.rev, result,
"REVERSE, needle: {:?}, haystack: {:?}",
t.needle, t.haystack,
);
}
}
}
}
}
}
/// Set the implementation for forward substring search.
///
/// If the closure returns `None`, then it is assumed that the given
/// test cannot be applied to the particular implementation and it is
/// skipped. For example, if a particular implementation only supports
/// needles or haystacks for some minimum length.
///
/// If this is not set, then forward substring search is not tested.
pub(crate) fn fwd(
mut self,
search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
) -> Runner {
self.fwd = Some(Box::new(search));
self
}
/// Set the implementation for reverse substring search.
///
/// If the closure returns `None`, then it is assumed that the given
/// test cannot be applied to the particular implementation and it is
/// skipped. For example, if a particular implementation only supports
/// needles or haystacks for some minimum length.
///
/// If this is not set, then reverse substring search is not tested.
pub(crate) fn rev(
mut self,
search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
) -> Runner {
self.rev = Some(Box::new(search));
self
}
}
/// A single substring test for forward and reverse searches.
#[derive(Clone, Debug)]
struct Test {
needle: String,
haystack: String,
fwd: Option<usize>,
rev: Option<usize>,
}
/// A single substring test for forward and reverse searches.
///
/// Each seed is valid on its own, but it also serves as a starting point
/// to generate more tests. Namely, we pad out the haystacks with other
/// characters so that we get more complete coverage. This is especially useful
/// for testing vector algorithms that tend to have weird special cases for
/// alignment and loop unrolling.
///
/// Padding works by assuming certain characters never otherwise appear in a
/// needle or a haystack. Neither should contain a `#` character.
#[derive(Clone, Copy, Debug)]
struct Seed {
needle: &'static str,
haystack: &'static str,
fwd: Option<usize>,
rev: Option<usize>,
}
impl Seed {
const MAX_PAD: usize = 34;
const fn new(
needle: &'static str,
haystack: &'static str,
fwd: Option<usize>,
rev: Option<usize>,
) -> Seed {
Seed { needle, haystack, fwd, rev }
}
fn generate(self) -> impl Iterator<Item = Test> {
assert!(!self.needle.contains('#'), "needle must not contain '#'");
assert!(!self.haystack.contains('#'), "haystack must not contain '#'");
(0..=Seed::MAX_PAD)
// Generate tests for padding at the beginning of haystack.
.map(move |pad| {
let needle = self.needle.to_string();
let prefix = "#".repeat(pad);
let haystack = format!("{}{}", prefix, self.haystack);
let fwd = if needle.is_empty() {
Some(0)
} else {
self.fwd.map(|i| pad + i)
};
let rev = if needle.is_empty() {
Some(haystack.len())
} else {
self.rev.map(|i| pad + i)
};
Test { needle, haystack, fwd, rev }
})
// Generate tests for padding at the end of haystack.
.chain((1..=Seed::MAX_PAD).map(move |pad| {
let needle = self.needle.to_string();
let suffix = "#".repeat(pad);
let haystack = format!("{}{}", self.haystack, suffix);
let fwd = if needle.is_empty() { Some(0) } else { self.fwd };
let rev = if needle.is_empty() {
Some(haystack.len())
} else {
self.rev
};
Test { needle, haystack, fwd, rev }
}))
}
}

View File

@@ -0,0 +1,45 @@
/*!
This module defines "naive" implementations of substring search.
These are sometimes useful to compare with "real" substring implementations.
The idea is that they are so simple that they are unlikely to be incorrect.
*/
/// Naively search forwards for the given needle in the given haystack.
pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1);
for i in 0..end {
if needle == &haystack[i..i + needle.len()] {
return Some(i);
}
}
None
}
/// Naively search in reverse for the given needle in the given haystack.
pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1);
for i in (0..end).rev() {
if needle == &haystack[i..i + needle.len()] {
return Some(i);
}
}
None
}
#[cfg(test)]
mod tests {
use crate::tests::substring;
use super::*;
#[test]
fn forward() {
substring::Runner::new().fwd(|h, n| Some(find(h, n))).run()
}
#[test]
fn reverse() {
substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run()
}
}

View File

@@ -0,0 +1,126 @@
/*!
This module defines a few quickcheck properties for substring search.
It also provides a forward and reverse macro for conveniently defining
quickcheck tests that run these properties over any substring search
implementation.
*/
use crate::tests::substring::naive;
/// $fwd is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the
/// routine returns `None`, then it's skipped, which is useful for substring
/// implementations that don't work for all inputs.
#[macro_export]
macro_rules! define_substring_forward_quickcheck {
($fwd:expr) => {
#[cfg(not(miri))]
quickcheck::quickcheck! {
fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool {
crate::tests::substring::prop::prefix_is_substring(&bs, $fwd)
}
fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool {
crate::tests::substring::prop::suffix_is_substring(&bs, $fwd)
}
fn qc_fwd_matches_naive(
haystack: alloc::vec::Vec<u8>,
needle: alloc::vec::Vec<u8>
) -> bool {
crate::tests::substring::prop::same_as_naive(
false,
&haystack,
&needle,
$fwd,
)
}
}
};
}
/// $rev is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the
/// routine returns `None`, then it's skipped, which is useful for substring
/// implementations that don't work for all inputs.
#[macro_export]
macro_rules! define_substring_reverse_quickcheck {
($rev:expr) => {
#[cfg(not(miri))]
quickcheck::quickcheck! {
fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool {
crate::tests::substring::prop::prefix_is_substring(&bs, $rev)
}
fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool {
crate::tests::substring::prop::suffix_is_substring(&bs, $rev)
}
fn qc_rev_matches_naive(
haystack: alloc::vec::Vec<u8>,
needle: alloc::vec::Vec<u8>
) -> bool {
crate::tests::substring::prop::same_as_naive(
true,
&haystack,
&needle,
$rev,
)
}
}
};
}
/// Check that every prefix of the given byte string is a substring.
pub(crate) fn prefix_is_substring(
bs: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>,
) -> bool {
for i in 0..bs.len().saturating_sub(1) {
let prefix = &bs[..i];
let result = match search(bs, prefix) {
None => continue,
Some(result) => result,
};
if !result.is_some() {
return false;
}
}
true
}
/// Check that every suffix of the given byte string is a substring.
pub(crate) fn suffix_is_substring(
bs: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>,
) -> bool {
for i in 0..bs.len().saturating_sub(1) {
let suffix = &bs[i..];
let result = match search(bs, suffix) {
None => continue,
Some(result) => result,
};
if !result.is_some() {
return false;
}
}
true
}
/// Check that naive substring search matches the result of the given search
/// algorithm.
pub(crate) fn same_as_naive(
reverse: bool,
haystack: &[u8],
needle: &[u8],
mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>,
) -> bool {
let result = match search(haystack, needle) {
None => return true,
Some(result) => result,
};
if reverse {
result == naive::rfind(haystack, needle)
} else {
result == naive::find(haystack, needle)
}
}