Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/unicode-segmentation/benches/chars.rs
+++ b/vendor/unicode-segmentation/benches/chars.rs
@@ -0,0 +1,59 @@
+//! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based
+//! `std::str::chars`.
+//!
+//! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it
+//! does not consider the complexity of grapheme clusters. The question in this benchmark
+//! is how much slower full unicode handling is.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+const FILES: &[&str] = &[
+    "arabic",
+    "english",
+    "hindi",
+    "japanese",
+    "korean",
+    "mandarin",
+    "russian",
+    "source_code",
+];
+
+#[inline(always)]
+fn grapheme(text: &str) {
+    for c in UnicodeSegmentation::graphemes(black_box(text), true) {
+        black_box(c);
+    }
+}
+
+#[inline(always)]
+fn scalar(text: &str) {
+    for c in black_box(text).chars() {
+        black_box(c);
+    }
+}
+
+fn bench_all(c: &mut Criterion) {
+    let mut group = c.benchmark_group("chars");
+
+    for file in FILES {
+        group.bench_with_input(
+            BenchmarkId::new("grapheme", file),
+            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            |b, content| b.iter(|| grapheme(content)),
+        );
+    }
+
+    for file in FILES {
+        group.bench_with_input(
+            BenchmarkId::new("scalar", file),
+            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            |b, content| b.iter(|| scalar(content)),
+        );
+    }
+}
+
+criterion_group!(benches, bench_all);
+criterion_main!(benches);
--- a/vendor/unicode-segmentation/benches/word_bounds.rs
+++ b/vendor/unicode-segmentation/benches/word_bounds.rs
@@ -0,0 +1,37 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+const FILES: &[&str] = &[
+    "arabic",
+    "english",
+    "hindi",
+    "japanese",
+    "korean",
+    "mandarin",
+    "russian",
+    "source_code",
+];
+
+#[inline(always)]
+fn grapheme(text: &str) {
+    for w in text.split_word_bounds() {
+        black_box(w);
+    }
+}
+
+fn bench_all(c: &mut Criterion) {
+    let mut group = c.benchmark_group("word_bounds");
+
+    for file in FILES {
+        group.bench_with_input(
+            BenchmarkId::new("grapheme", file),
+            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            |b, content| b.iter(|| grapheme(content)),
+        );
+    }
+}
+
+criterion_group!(benches, bench_all);
+criterion_main!(benches);
--- a/vendor/unicode-segmentation/benches/words.rs
+++ b/vendor/unicode-segmentation/benches/words.rs
@@ -0,0 +1,59 @@
+//! Compares the performance of `UnicodeSegmentation::unicode_words` with stdlib's UTF-8
+//! scalar-based `std::str::split_whitespace`.
+//!
+//! It is expected that `std::str::split_whitespace` is faster than
+//! `UnicodeSegmentation::unicode_words` since it does not consider the complexity of grapheme
+//! clusters. The question in this benchmark is how much slower full unicode handling is.
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use std::fs;
+use unicode_segmentation::UnicodeSegmentation;
+
+const FILES: &[&str] = &[
+    "arabic",
+    "english",
+    "hindi",
+    "japanese",
+    "korean",
+    "mandarin",
+    "russian",
+    "source_code",
+];
+
+#[inline(always)]
+fn grapheme(text: &str) {
+    for w in text.unicode_words() {
+        black_box(w);
+    }
+}
+
+#[inline(always)]
+fn scalar(text: &str) {
+    for w in text.split_whitespace() {
+        black_box(w);
+    }
+}
+
+fn bench_all(c: &mut Criterion) {
+    let mut group = c.benchmark_group("words");
+
+    for file in FILES {
+        group.bench_with_input(
+            BenchmarkId::new("grapheme", file),
+            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            |b, content| b.iter(|| grapheme(content)),
+        );
+    }
+
+    for file in FILES {
+        group.bench_with_input(
+            BenchmarkId::new("scalar", file),
+            &fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
+            |b, content| b.iter(|| scalar(content)),
+        );
+    }
+}
+
+criterion_group!(benches, bench_all);
+criterion_main!(benches);