Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
//! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based
//! `std::str::chars`.
//!
//! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it
//! does not consider the complexity of grapheme clusters. The question in this benchmark
//! is how much slower full unicode handling is.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::fs;
use unicode_segmentation::UnicodeSegmentation;
const FILES: &[&str] = &[
"arabic",
"english",
"hindi",
"japanese",
"korean",
"mandarin",
"russian",
"source_code",
];
#[inline(always)]
fn grapheme(text: &str) {
for c in UnicodeSegmentation::graphemes(black_box(text), true) {
black_box(c);
}
}
#[inline(always)]
fn scalar(text: &str) {
for c in black_box(text).chars() {
black_box(c);
}
}
fn bench_all(c: &mut Criterion) {
let mut group = c.benchmark_group("chars");
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}
for file in FILES {
group.bench_with_input(
BenchmarkId::new("scalar", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|b, content| b.iter(|| scalar(content)),
);
}
}
criterion_group!(benches, bench_all);
criterion_main!(benches);

View File

@@ -0,0 +1,37 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::fs;
use unicode_segmentation::UnicodeSegmentation;
const FILES: &[&str] = &[
"arabic",
"english",
"hindi",
"japanese",
"korean",
"mandarin",
"russian",
"source_code",
];
#[inline(always)]
fn grapheme(text: &str) {
for w in text.split_word_bounds() {
black_box(w);
}
}
fn bench_all(c: &mut Criterion) {
let mut group = c.benchmark_group("word_bounds");
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}
}
criterion_group!(benches, bench_all);
criterion_main!(benches);

View File

@@ -0,0 +1,59 @@
//! Compares the performance of `UnicodeSegmentation::unicode_words` with stdlib's UTF-8
//! scalar-based `std::str::split_whitespace`.
//!
//! It is expected that `std::str::split_whitespace` is faster than
//! `UnicodeSegmentation::unicode_words` since it does not consider the complexity of grapheme
//! clusters. The question in this benchmark is how much slower full unicode handling is.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::fs;
use unicode_segmentation::UnicodeSegmentation;
const FILES: &[&str] = &[
"arabic",
"english",
"hindi",
"japanese",
"korean",
"mandarin",
"russian",
"source_code",
];
#[inline(always)]
fn grapheme(text: &str) {
for w in text.unicode_words() {
black_box(w);
}
}
#[inline(always)]
fn scalar(text: &str) {
for w in text.split_whitespace() {
black_box(w);
}
}
fn bench_all(c: &mut Criterion) {
let mut group = c.benchmark_group("words");
for file in FILES {
group.bench_with_input(
BenchmarkId::new("grapheme", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|b, content| b.iter(|| grapheme(content)),
);
}
for file in FILES {
group.bench_with_input(
BenchmarkId::new("scalar", file),
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|b, content| b.iter(|| scalar(content)),
);
}
}
criterion_group!(benches, bench_all);
criterion_main!(benches);