Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

130
vendor/radsort/src/double_buffer.rs vendored Normal file
View File

@@ -0,0 +1,130 @@
use core::{mem::MaybeUninit, slice};
use alloc::{boxed::Box, vec::Vec};
/// Double buffer. Wraps a mutable slice and allocates a scratch memory of the same size, so that
/// elements can be freely scattered from buffer to buffer.
///
/// # Drop behavior
///
/// Drop ensures that the mutable slice this buffer was constructed with contains all the original
/// elements.
pub struct DoubleBuffer<'a, T> {
slice: &'a mut [MaybeUninit<T>],
scratch: Box<[MaybeUninit<T>]>,
slice_is_write: bool,
}
impl<'a, T> DoubleBuffer<'a, T> {
/// Creates a double buffer, allocating a scratch buffer of the same length as the input slice.
///
/// The supplied slice becomes the read buffer, the scratch buffer becomes the write buffer.
pub fn new(slice: &'a mut [T]) -> Self {
// SAFETY: The Drop impl ensures that the slice is initialized.
let slice = unsafe { slice_as_uninit_mut(slice) };
let scratch = {
let mut v = Vec::with_capacity(slice.len());
// SAFETY: we just allocated this capacity and MaybeUninit can be garbage.
unsafe {
v.set_len(slice.len());
}
v.into_boxed_slice()
};
DoubleBuffer {
slice,
scratch,
slice_is_write: false,
}
}
/// Scatters the elements from the read buffer to the computed indices in
/// the write buffer. The read buffer is iterated from the beginning.
///
/// Call `swap` after this function to commit the write buffer state.
///
/// Returning an out-of-bounds index from the indexer causes this function
/// to immediately return, without iterating over the remaining elements.
pub fn scatter<F>(&mut self, mut indexer: F)
where
F: FnMut(&T) -> usize,
{
let (read, write) = self.as_read_write();
let len = write.len();
for t in read {
let index = indexer(t);
if index >= len {
return;
}
let write_ptr = write[index].as_mut_ptr();
unsafe {
// SAFETY: both pointers are valid for T, aligned, and nonoverlapping
write_ptr.copy_from_nonoverlapping(t as *const T, 1);
}
}
}
/// Returns the current read and write buffers.
fn as_read_write(&mut self) -> (&[T], &mut [MaybeUninit<T>]) {
let (read, write): (&[MaybeUninit<T>], &mut [MaybeUninit<T>]) = if self.slice_is_write {
(self.scratch.as_ref(), self.slice)
} else {
(self.slice, self.scratch.as_mut())
};
// SAFETY: The read buffer is always initialized.
let read = unsafe { slice_assume_init_ref(read) };
(read, write)
}
/// Swaps the read and write buffer, committing the write buffer state.
///
/// # Safety
///
/// The caller must ensure that every element of the write buffer was
/// written to before calling this function.
pub unsafe fn swap(&mut self) {
self.slice_is_write = !self.slice_is_write;
}
}
/// Ensures that the input slice contains all the original elements.
impl<'a, T> Drop for DoubleBuffer<'a, T> {
fn drop(&mut self) {
if self.slice_is_write {
// The input slice is the write buffer, copy the consistent state from the read buffer
unsafe {
// SAFETY: `scratch` is the read buffer, it is initialized. The length is the same.
self.slice
.as_mut_ptr()
.copy_from_nonoverlapping(self.scratch.as_ptr(), self.slice.len());
}
self.slice_is_write = false;
}
}
}
/// Get a slice of the initialized items.
///
/// # Safety
///
/// The caller must ensure that all the items are initialized.
#[inline(always)]
pub unsafe fn slice_assume_init_ref<T>(slice: &[MaybeUninit<T>]) -> &[T] {
// SAFETY: `[MaybeUninit<T>]` and `[T]` have the same layout.
unsafe { slice::from_raw_parts(slice.as_ptr() as *const T, slice.len()) }
}
/// View the mutable slice of `T` as a slice of `MaybeUnint<T>`.
///
/// # Safety
///
/// The caller must ensure that all the items of the returned slice are
/// initialized before dropping it.
#[inline(always)]
pub unsafe fn slice_as_uninit_mut<T>(slice: &mut [T]) -> &mut [MaybeUninit<T>] {
// SAFETY: `[MaybeUninit<T>]` and `[T]` have the same layout.
unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut MaybeUninit<T>, slice.len()) }
}

436
vendor/radsort/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,436 @@
//! `radsort` is a radix sort implementation for sorting by scalar keys
//! (integers, floats, chars, bools).
//!
//! All built-in scalar types can be used as sorting keys: Booleans, characters,
//! integers, and floating point-numbers. To sort by multiple keys, put them in
//! a tuple, starting from the most significant key. See [`Key`] for a full list
//! of supported keys.
//!
//! - best and worst-case running time is `O(n)` see [benchmarks] for more
//! detailed performance characteristics
//! - space complexity is `O(n)` direct sort allocates temporary storage the
//! size of the slice, for indirect see [`sort_by_cached_key`]
//! - stable, i.e. does not reorder equal elements
//! - uses `#![no_std]`, but needs an allocator
//!
//! This sort can be several times faster than `slice::sort` and
//! `slice::sort_unstable`, typically on large slices (hundreds of elements or
//! more). It performs worse on short slices and when using wide keys
//! (16 bytes). See [benchmarks] to get a better picture of the performance
//! characteristics.
//!
//! `radsort` is an implementation of LSB radix sort, using counting sort to
//! sort the slice by each digit (byte) of the key. As an optimization, the
//! slice is sorted only by digits which differ between the keys. See the
//! [`unopt`] module for more details and functions which don't use this
//! optimization.
//!
//! This implementation is based on radix sort by Pierre Terdiman,
//! published at
//! [http://codercorner.com/RadixSortRevisited.htm](http://codercorner.com/RadixSortRevisited.htm),
//! with select optimizations published by Michael Herf at
//! [http://stereopsis.com/radix.html](http://stereopsis.com/radix.html).
//!
//! # Floating-point numbers
//!
//! Floating-point number keys are effectively sorted according to their partial
//! order (see [`PartialOrd`]), with `NaN` values at the beginning (before the
//! negative infinity) and at the end (after the positive infinity), depending
//! on the sign bit of each `NaN`.
//!
//! # Examples
//!
//! Slices of scalar types (integers, floating-point numbers, Booleans, and
//! characters) can be sorted directly:
//! ```rust
//! let mut data = [2i32, -1, 1, 0, -2];
//!
//! radsort::sort(&mut data);
//!
//! assert_eq!(data, [-2, -1, 0, 1, 2]);
//! ```
//!
//! Use a key extraction function to sort other types:
//! ```rust
//! let mut friends = ["Punchy", "Isabelle", "Sly", "Puddles", "Gladys"];
//!
//! // sort by the length of the string in bytes
//! radsort::sort_by_key(&mut friends, |s| s.len());
//!
//! assert_eq!(friends, ["Sly", "Punchy", "Gladys", "Puddles", "Isabelle"]);
//! ```
//!
//! To sort by two or more keys, put them in a tuple, starting with the most
//! significant key:
//! ```rust
//! # #[derive(Debug, PartialEq)]
//! struct Height { feet: u8, inches: u8, }
//!
//! let mut heights = [
//! Height { feet: 6, inches: 1 },
//! Height { feet: 5, inches: 9 },
//! Height { feet: 6, inches: 0 },
//! ];
//!
//! // sort by feet, if feet are equal, sort by inches
//! radsort::sort_by_key(&mut heights, |h| (h.feet, h.inches));
//!
//! assert_eq!(heights, [
//! Height { feet: 5, inches: 9 },
//! Height { feet: 6, inches: 0 },
//! Height { feet: 6, inches: 1 },
//! ]);
//! ```
//!
//! [`Key`]: ./trait.Key.html
//! [`unopt`]: ./unopt/index.html
//! [benchmarks]: https://github.com/JakubValtar/radsort/wiki/Benchmarks
//! [`sort_by_cached_key`]: ./fn.sort_by_cached_key.html
//! [`PartialOrd`]: https://doc.rust-lang.org/std/cmp/trait.PartialOrd.html
#![no_std]
extern crate alloc;
use alloc::vec::Vec;
mod double_buffer;
mod scalar;
mod sort;
use scalar::Scalar;
use sort::RadixKey;
/// Sorts the slice.
///
/// Slice elements can be any scalar type. See [`Key`] for a full list.
///
/// This sort is stable (i.e., does not reorder equal elements) and `O(w n)`,
/// where `w` is the size of the key in bytes.
///
/// Allocates temporary storage the size of the slice.
///
/// # Examples
/// ```rust
/// let mut data = [5i32, -1, 3, 15, -42];
///
/// radsort::sort(&mut data);
///
/// assert_eq!(data, [-42, -1, 3, 5, 15]);
/// ```
/// [`Key`]: trait.Key.html
#[inline]
pub fn sort<T: Key>(slice: &mut [T]) {
Key::sort_by_key(slice, |v| *v, false);
}
/// Sorts the slice using a key extraction function.
///
/// Key can be any scalar type. See [`Key`] for a full list.
///
/// This sort is stable (i.e., does not reorder equal elements) and `O(w m n)`,
/// where the key function is `O(m)` and `w` is the size of the key in bytes.
///
/// Allocates temporary storage the size of the slice.
///
/// See [`sort_by_cached_key`] if you use expensive key function or if you need
/// to sort large elements.
///
/// # Panics
///
/// Can panic if the key function returns different keys for the same element
/// when called repeatedly. The panic is on a best-effort basis. In case of
/// panic, the order of elements in the slice is not specified.
///
/// # Examples
///
/// ```rust
/// let mut friends = ["Punchy", "Isabelle", "Sly", "Puddles", "Gladys"];
///
/// // sort by the length of the string in bytes
/// radsort::sort_by_key(&mut friends, |s| s.len());
///
/// assert_eq!(friends, ["Sly", "Punchy", "Gladys", "Puddles", "Isabelle"]);
/// ```
///
/// [`Key`]: trait.Key.html
/// [`sort_by_cached_key`]: fn.sort_by_cached_key.html
#[inline]
pub fn sort_by_key<T, F, K>(slice: &mut [T], mut key_fn: F)
where
F: FnMut(&T) -> K,
K: Key,
{
Key::sort_by_key(slice, |t| key_fn(t), false);
}
/// Sorts the slice indirectly, using a key extraction function and caching the keys.
///
/// Key can be any scalar type. See [`Key`] for a full list.
///
/// This sort is stable (i.e., does not reorder equal elements) and
/// `O(m n + w n)`, where the key function is `O(m)`.
///
/// This function can be significantly faster for sorting by an expensive key
/// function or for sorting large elements. The keys are extracted, sorted, and
/// then the elements of the slice are reordered in-place. This saves CPU cycles
/// in case of an expensive key function and saves memory bandwidth in case of
/// large elements.
///
/// For sorting small elements by simple key functions (e.g., functions that are
/// property accesses or basic operations), [`sort_by_key`] is likely to be
/// faster.
///
/// In the worst case, allocates temporary storage in a `Vec<(K, usize)>` twice
/// the length of the slice.
///
/// # Examples
///
/// ```rust
/// let mut data = ["-6", "2", "15", "-1", "0"];
///
/// radsort::sort_by_cached_key(&mut data, |s| s.parse::<i32>().unwrap());
///
/// assert_eq!(data, ["-6", "-1", "0", "2", "15"]);
/// ```
///
/// [`Key`]: ./trait.Key.html
/// [`sort_by_key`]: fn.sort_by_key.html
#[inline]
pub fn sort_by_cached_key<T, F, K>(slice: &mut [T], key_fn: F)
where
F: FnMut(&T) -> K,
K: Key,
{
sort_by_cached_key_internal(slice, key_fn, false);
}
/// Sorting functions which don't use optimizations based on the values
/// of the keys. Useful for benchmarks and consistent performance.
///
/// For each digit (byte) of the key, `radsort` reorders the slice once.
/// Functions in the crate root sort only by the bytes which differ between the
/// keys. This can lead to large differences in sorting time, based on the
/// values in the slice.
///
/// For example, sorting `u32` all less than `u8::MAX` will sort only by
/// the least significant byte and skip the three most significant bytes,
/// which are zero; this cuts the sorting time to roughly one quarter, plus
/// the overhead of analyzing the keys.
///
/// Unlike functions in the crate root, functions in this module don't use
/// this optimization and sort by all bytes of the key. This leads to worse but
/// more consistent performance. The effects of the CPU cache will still play a
/// role, but at least the number of executed instructions will not depend on
/// the values in the slice, only on the length of the slice and the width of
/// the key type.
pub mod unopt {
use super::*;
/// Version of [`sort`](../fn.sort.html) which does not skip digits (bytes).
///
/// See the [module documentation](./index.html) for more details.
#[inline]
pub fn sort<T: Key>(slice: &mut [T]) {
Key::sort_by_key(slice, |v| *v, true);
}
/// Version of [`sort_by_key`](../fn.sort_by_key.html) which does not skip digits (bytes).
///
/// See the [module documentation](./index.html) for more details.
#[inline]
pub fn sort_by_key<T, F, K>(slice: &mut [T], mut key_fn: F)
where
F: FnMut(&T) -> K,
K: Key,
{
Key::sort_by_key(slice, |t| key_fn(t), true);
}
/// Version of [`sort_by_cached_key`](../fn.sort_by_cached_key.html) which does not skip digits (bytes).
///
/// See the [module documentation](./index.html) for more details.
#[inline]
pub fn sort_by_cached_key<T, F, K>(slice: &mut [T], key_fn: F)
where
F: FnMut(&T) -> K,
K: Key,
{
sort_by_cached_key_internal(slice, key_fn, true);
}
}
#[inline]
fn sort_by_cached_key_internal<T, F, K>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> K,
K: Key,
{
// Adapted from std::slice::sort_by_cached_key
macro_rules! radsort_by_cached_key {
($index:ty) => {{
let mut indices: Vec<(K, $index)> = slice
.iter()
.map(|t| key_fn(t))
.enumerate()
.map(|(i, k)| (k, i as $index))
.collect();
Key::sort_by_key(&mut indices, |(k, _)| *k, unopt);
for i in 0..slice.len() {
let mut index = indices[i].1;
while (index as usize) < i {
// The previous value was swapped somewhere else. The index to which
// the original value was swapped was marked into the index array.
// Follow the indices to find out where the original value ended up.
index = indices[index as usize].1;
}
// Mark down the index to which the current value goes
indices[i].1 = index;
slice.swap(i, index as usize);
}
}};
}
let len = slice.len();
if len < 2 {
return;
}
let sz_u8 = core::mem::size_of::<(K, u8)>();
let sz_u16 = core::mem::size_of::<(K, u16)>();
#[cfg(not(target_pointer_width = "16"))]
let sz_u32 = core::mem::size_of::<(K, u32)>();
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
let sz_usize = core::mem::size_of::<(K, usize)>();
if sz_u8 < sz_u16 && len <= (u8::MAX as usize + 1) {
return radsort_by_cached_key!(u8);
}
#[cfg(not(target_pointer_width = "16"))]
if sz_u16 < sz_u32 && len <= (u16::MAX as usize + 1) {
return radsort_by_cached_key!(u16);
}
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
if sz_u32 < sz_usize && len <= (u32::MAX as usize + 1) {
return radsort_by_cached_key!(u32);
}
radsort_by_cached_key!(usize)
}
/// Types which can be used as sorting keys.
///
/// Implemented for all scalar types and their tuples.
///
/// Slices of types for which `Key` is implemented can be sorted directly using
/// [`sort`]. Slices of other types can be sorted using [`sort_by_key`] with a
/// key extraction function.
///
/// [`sort`]: fn.sort.html
/// [`sort_by_key`]: fn.sort_by_key.html
pub trait Key: Copy + private::Sealed {
// If this crate didn't support tuples, this trait wouldn't be needed and
// Scalar could be exposed directly to users as the `Key` trait.
/// Sorts the slice using `Self` as the type of the key.
///
/// You shouldn't need to call this directly, use one of the functions in
/// the [crate root](index.html#functions) instead.
#[doc(hidden)]
fn sort_by_key<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self;
}
macro_rules! impl_for_scalar { ($($t:ty)*) => ($(
impl Key for $t {
#[doc(hidden)]
#[inline]
fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where F: FnMut(&T) -> Self
{
RadixKey::radix_sort(slice, |t| key_fn(t).to_radix_key(), unopt);
}
}
)*) }
impl_for_scalar! {
bool char
u8 u16 u32 u64 u128 usize
i8 i16 i32 i64 i128 isize
f32 f64
}
impl<A: Key> Key for (A,) {
#[doc(hidden)]
#[inline]
fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self,
{
A::sort_by_key(slice, |t| key_fn(t).0, unopt);
}
}
impl<A: Key, B: Key> Key for (A, B) {
#[doc(hidden)]
#[inline]
fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self,
{
B::sort_by_key(slice, |t| key_fn(t).1, unopt);
A::sort_by_key(slice, |t| key_fn(t).0, unopt);
}
}
impl<A: Key, B: Key, C: Key> Key for (A, B, C) {
#[doc(hidden)]
#[inline]
fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self,
{
C::sort_by_key(slice, |t| key_fn(t).2, unopt);
B::sort_by_key(slice, |t| key_fn(t).1, unopt);
A::sort_by_key(slice, |t| key_fn(t).0, unopt);
}
}
impl<A: Key, B: Key, C: Key, D: Key> Key for (A, B, C, D) {
#[doc(hidden)]
#[inline]
fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self,
{
D::sort_by_key(slice, |t| key_fn(t).3, unopt);
C::sort_by_key(slice, |t| key_fn(t).2, unopt);
B::sort_by_key(slice, |t| key_fn(t).1, unopt);
A::sort_by_key(slice, |t| key_fn(t).0, unopt);
}
}
mod private {
use super::*;
/// This trait serves as a seal for the `Key` trait to prevent downstream
/// implementations.
pub trait Sealed {}
macro_rules! sealed_impl { ($($t:ty)*) => ($(
impl Sealed for $t {}
)*) }
sealed_impl! {
bool char
u8 u16 u32 u64 u128 usize
i8 i16 i32 i64 i128 isize
f32 f64
}
impl<A: Key> Sealed for (A,) {}
impl<A: Key, B: Key> Sealed for (A, B) {}
impl<A: Key, B: Key, C: Key> Sealed for (A, B, C) {}
impl<A: Key, B: Key, C: Key, D: Key> Sealed for (A, B, C, D) {}
}

310
vendor/radsort/src/scalar.rs vendored Normal file
View File

@@ -0,0 +1,310 @@
//! Conversions from scalar types to radix keys, which can be sorted bitwise.
use core::mem;
use crate::sort::RadixKey;
/// Scalar types which can be converted to radix sorting keys.
pub trait Scalar: Copy + private::Sealed {
type ToRadixKey: RadixKey;
/// Maps the value to a radix sorting key, preserving the sorting order.
fn to_radix_key(self) -> Self::ToRadixKey;
}
/// Implements `Scalar` for an unsigned integer type(s).
///
/// Since we use unsigned integers as radix sorting keys, we directly return the
/// value.
macro_rules! key_impl_unsigned {
($($t:ty)*) => ($( key_impl_unsigned!($t => $t); )*);
($t:ty => $radix_key:ty) => (
impl Scalar for $t {
type ToRadixKey = $radix_key;
#[inline(always)]
fn to_radix_key(self) -> Self::ToRadixKey {
self as $radix_key
}
}
)
}
key_impl_unsigned! { u8 u16 u32 u64 u128 }
#[cfg(target_pointer_width = "16")]
key_impl_unsigned!(usize => u16);
#[cfg(target_pointer_width = "32")]
key_impl_unsigned!(usize => u32);
#[cfg(target_pointer_width = "64")]
key_impl_unsigned!(usize => u64);
key_impl_unsigned!(bool => u8);
key_impl_unsigned!(char => u32);
/// Implements `Scalar` for a signed integer type(s).
///
/// Signed integers are mapped to unsigned integers of the same width.
///
/// # Conversion
///
/// In two's complement, negative integers have the most significant bit set.
/// When we cast to an unsigned integer, we end up with negative integers
/// ordered after positive integers. To correct the order, we flip the sign bit.
///
/// ```plaintext
/// -128: 1000_0000 0000_0000
/// -1: 1111_1111 0111_0000
/// 0: 0000_0000 -> 1000_0000
/// 1: 0000_0001 1000_0001
/// 128: 0111_1111 1111_1111
/// ```
macro_rules! key_impl_signed {
($($t:ty => $radix_key:ty),*) => ($(
impl Scalar for $t {
type ToRadixKey = $radix_key;
#[inline(always)]
fn to_radix_key(self) -> Self::ToRadixKey {
const BIT_COUNT: usize = 8 * mem::size_of::<$t>();
const SIGN_BIT: $radix_key = 1 << (BIT_COUNT-1);
(self as $radix_key) ^ SIGN_BIT
}
}
)*)
}
key_impl_signed! {
i8 => u8,
i16 => u16,
i32 => u32,
i64 => u64,
i128 => u128
}
#[cfg(target_pointer_width = "16")]
key_impl_signed!(isize => u16);
#[cfg(target_pointer_width = "32")]
key_impl_signed!(isize => u32);
#[cfg(target_pointer_width = "64")]
key_impl_signed!(isize => u64);
/// Implements `Scalar` for a floating-point number type(s).
///
/// Floating-point numbers are mapped to unsigned integers of the same width.
///
/// # Conversion
///
/// IEEE 754 floating point numbers have a sign bit, an exponent, and a
/// mantissa. We can treat the exponent and the mantissa as a single block
/// denoting the magnitude.
///
/// This leaves us with a sign-magnitude representation. Magnitude increases
/// away from zero and the sign bit tells us in which direction.
///
/// After transmuting to unsigned integers, we have two problems:
/// - because of the sign bit, negative numbers end up after the positive
/// - negative numbers go in the opposite direction, because we went from
/// sign-magnitude representation (increases away from zero) to two's
/// complement (increases away from negative infinity)
///
/// To fix these problems, we:
/// - flip the sign bit, this makes negative numbers sort before positive
/// - flip the magnitude bits of negative numbers, this reverses the order of
/// negative values
///
/// This gives us a simple way to map floating-point numbers to unsigned
/// integers:
/// - sign bit 0: flip the sign bit
/// - sign bit 1: flip all the bits
///
/// These are halfs (~`f16`) for brevity, `f32` and `f64` only have more bits in
/// the middle.
///
/// ```plaintext
/// negative NaN 1_11111_xxxxxxxxx1 0_00000_xxxxxxxxx0
/// NEG_INFINITY 1_11111_0000000000 0_00000_1111111111
/// MIN 1_11110_1111111111 -> 0_00001_0000000000 flip all the bits
/// -1.0 1_01111_0000000000 0_10000_1111111111
/// MAX_NEGATIVE 1_00000_0000000001 0_11111_1111111110
/// -0.0 1_00000_0000000000 0_11111_1111111111
/// --------------------------------------------------------------------------
/// 0.0 0_00000_0000000000 1_00000_0000000000
/// MIN_POSITIVE 0_00000_0000000001 1_00000_0000000001
/// 1.0 0_01111_0000000000 -> 1_01111_0000000000 flip the sign bit
/// MAX 0_11110_1111111111 1_11110_1111111111
/// INFINITY 0_11111_0000000000 1_11111_0000000000
/// positive NaN 0_11111_xxxxxxxxx1 1_11111_xxxxxxxxx1
/// ```
///
/// # Special values
///
/// As shown above, infinities are sorted correctly before and after min and max
/// values. NaN values, depending on their sign bit, end up in two blocks at the
/// very beginning and at the very end.
macro_rules! key_impl_float {
// signed_key type is needed for arithmetic right shift
($($t:ty => $radix_key:ty : $signed_key:ty),*) => ($(
impl Scalar for $t {
type ToRadixKey = $radix_key;
#[inline(always)]
fn to_radix_key(self) -> Self::ToRadixKey {
const BIT_COUNT: usize = 8 * mem::size_of::<$t>();
// all floats need to have the sign bit flipped
const FLIP_SIGN_MASK: $radix_key = 1 << (BIT_COUNT-1); // 0x800...
let bits = self.to_bits();
// negative floats need to have the rest flipped as well, extend the sign bit to the
// whole width with arithmetic right shift to get a flip mask 0x00...0 or 0xFF...F
let flip_negative_mask = ((bits as $signed_key) >> (BIT_COUNT-1)) as $radix_key;
bits ^ (flip_negative_mask | FLIP_SIGN_MASK)
}
}
)*)
}
key_impl_float! {
f32 => u32 : i32,
f64 => u64 : i64
}
mod private {
/// This trait serves as a seal for the `Scalar` trait to prevent downstream
/// implementations.
pub trait Sealed {}
macro_rules! sealed_impl { ($($t:ty)*) => ($(
impl Sealed for $t {}
)*) }
sealed_impl! {
bool char
u8 u16 u32 u64 u128 usize
i8 i16 i32 i64 i128 isize
f32 f64
}
}
#[cfg(test)]
mod tests {
//! Tests that `to_radix_key` implementations preserve the order of the
//! values. Tests use `std::slice::sort_by_key` to make sure that the
//! sorting function is reliable.
use super::*;
#[test]
fn test_key_bool() {
assert!(false.to_radix_key() < true.to_radix_key());
}
#[test]
fn test_key_char() {
#[rustfmt::skip]
let mut actual = [
'\u{0}', '\u{1}', '\u{F}', '\u{7F}', // 1-byte sequence
'\u{80}', '\u{81}', '\u{FF}', '\u{7FF}', // 2-byte sequence
'\u{800}', '\u{801}', '\u{FFF}', '\u{FFFF}', // 3-byte sequence
'\u{10000}', '\u{10001}', '\u{FFFFF}', '\u{10FFFF}' // 4-byte sequence
];
let expected = actual;
actual.reverse();
actual.sort_by_key(|v| v.to_radix_key());
assert_eq!(actual, expected);
}
#[test]
fn test_key_numeric() {
macro_rules! implement {
($($t:ident)*) => ($(
let mut actual = [
$t::MIN, $t::MIN+1, $t::MIN / 2,
$t::MIN >> (mem::size_of::<$t>() * 8 / 2),
$t::MAX, $t::MAX-1, $t::MAX / 2,
$t::MAX >> (mem::size_of::<$t>() * 8 / 2),
(-1i8) as $t, 0, 1,
];
let mut expected = actual;
expected.sort();
actual.sort_by_key(|v| v.to_radix_key());
assert_eq!(actual, expected);
)*)
}
implement! {
u8 u16 u32 u64 u128 usize
i8 i16 i32 i64 i128 isize
}
}
#[test]
#[allow(clippy::inconsistent_digit_grouping)]
fn test_key_float() {
{
// F32
#[allow(clippy::unusual_byte_groupings)]
let mut actual = [
f32::from_bits(0b1_11111111_11111111111111111111111), // negative NaN
f32::from_bits(0b1_11111111_00000000000000000000001), // negative NaN
f32::from_bits(0b1_11111111_00000000000000000000000), // negative infinity
f32::from_bits(0b1_11111110_11111111111111111111111), // min
f32::from_bits(0b1_01111111_00000000000000000000000), // negative one
f32::from_bits(0b1_01111110_11111111111111111111111), // smallest larger than negative one
f32::from_bits(0b1_00000001_00000000000000000000000), // max negative
f32::from_bits(0b1_00000000_11111111111111111111111), // min negative subnormal
f32::from_bits(0b1_00000000_00000000000000000000001), // max negative subnormal
f32::from_bits(0b1_00000000_00000000000000000000000), // negative zero
f32::from_bits(0b0_00000000_00000000000000000000000), // positive zero
f32::from_bits(0b0_00000000_00000000000000000000001), // min positive subnormal
f32::from_bits(0b0_00000000_11111111111111111111111), // max positive subnormal
f32::from_bits(0b0_00000001_00000000000000000000000), // min positive
f32::from_bits(0b0_01111110_11111111111111111111111), // largest smaller than positive one
f32::from_bits(0b0_01111111_00000000000000000000000), // positive one
f32::from_bits(0b0_11111110_11111111111111111111111), // max
f32::from_bits(0b0_11111111_00000000000000000000000), // positive infinity
f32::from_bits(0b0_11111111_00000000000000000000001), // positive NaN
f32::from_bits(0b0_11111111_11111111111111111111111), // positive NaN
];
let expected = actual;
actual.reverse();
actual.sort_by_key(|v| v.to_radix_key());
for (a, e) in actual.iter().zip(expected.iter()) {
assert_eq!(a.to_bits(), e.to_bits());
}
}
{
// F64
#[rustfmt::skip]
#[allow(clippy::unusual_byte_groupings)]
let mut actual = [
f64::from_bits(0b1_11111111111_1111111111111111111111111111111111111111111111111111), // negative NaN
f64::from_bits(0b1_11111111111_0000000000000000000000000000000000000000000000000001), // negative NaN
f64::from_bits(0b1_11111111111_0000000000000000000000000000000000000000000000000000), // negative infinity
f64::from_bits(0b1_11111111110_1111111111111111111111111111111111111111111111111111), // min
f64::from_bits(0b1_01111111111_0000000000000000000000000000000000000000000000000000), // negative one
f64::from_bits(0b1_01111111110_1111111111111111111111111111111111111111111111111111), // min larger than negative one
f64::from_bits(0b1_00000000001_0000000000000000000000000000000000000000000000000000), // max negative
f64::from_bits(0b1_00000000000_1111111111111111111111111111111111111111111111111111), // min negative subnormal
f64::from_bits(0b1_00000000000_0000000000000000000000000000000000000000000000000001), // max negative subnormal
f64::from_bits(0b1_00000000000_0000000000000000000000000000000000000000000000000000), // negative zero
f64::from_bits(0b0_00000000000_0000000000000000000000000000000000000000000000000000), // positive zero
f64::from_bits(0b0_00000000000_0000000000000000000000000000000000000000000000000001), // min positive subnormal
f64::from_bits(0b0_00000000000_1111111111111111111111111111111111111111111111111111), // max positive subnormal
f64::from_bits(0b0_00000000001_0000000000000000000000000000000000000000000000000000), // min positive
f64::from_bits(0b0_01111111110_1111111111111111111111111111111111111111111111111111), // max smaller than positive one
f64::from_bits(0b0_01111111111_0000000000000000000000000000000000000000000000000000), // positive one
f64::from_bits(0b0_11111111110_1111111111111111111111111111111111111111111111111111), // max
f64::from_bits(0b0_11111111111_0000000000000000000000000000000000000000000000000000), // positive infinity
f64::from_bits(0b0_11111111111_0000000000000000000000000000000000000000000000000001), // positive NaN
f64::from_bits(0b0_11111111111_1111111111111111111111111111111111111111111111111111), // positive NaN
];
let expected = actual;
actual.reverse();
actual.sort_by_key(|v| v.to_radix_key());
for (a, e) in actual.iter().zip(expected.iter()) {
assert_eq!(a.to_bits(), e.to_bits());
}
}
}
}

215
vendor/radsort/src/sort.rs vendored Normal file
View File

@@ -0,0 +1,215 @@
//! Implementations of radix keys and sorting functions.
use core::mem;
use crate::{double_buffer::DoubleBuffer, Key};
/// Unsigned integers used as sorting keys for radix sort.
///
/// These keys can be sorted bitwise. For conversion from scalar types, see
/// [`Scalar::to_radix_key()`].
///
/// [`Scalar::to_radix_key()`]: ../scalar/trait.Scalar.html#tymethod.to_radix_key
pub trait RadixKey: Key {
/// Sorts the slice using provided key extraction function.
/// Runs one of the other functions, based on the length of the slice.
#[inline]
fn radix_sort<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self,
{
// Sorting has no meaningful behavior on zero-sized types.
if mem::size_of::<T>() == 0 {
return;
}
let len = slice.len();
if len < 2 {
return;
}
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
{
if len <= u32::MAX as usize {
Self::radix_sort_u32(slice, |t| key_fn(t), unopt);
return;
}
}
Self::radix_sort_usize(slice, |t| key_fn(t), unopt);
}
/// Sorting for slices with up to `u32::MAX` elements, which is a majority
/// of cases. Uses `u32` indices for histograms and offsets to save cache
/// space.
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
fn radix_sort_u32<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self;
/// Sorting function for slices with up to `usize::MAX` elements.
fn radix_sort_usize<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
where
F: FnMut(&T) -> Self;
}
macro_rules! sort_impl {
($name:ident, $radix_key_type:ty, $offset_type:ty) => {
#[inline(never)] // Don't inline, the offset array needs a lot of stack
fn $name<T, F>(input: &mut [T], mut key_fn: F, unopt: bool)
where
F: FnMut(&T) -> $radix_key_type,
{
// This implementation is radix 256, so the size of a digit is 8 bits / one byte.
// You can experiment with different digit sizes by changing this constant, but
// according to my benchmarks, the overhead from arbitrary shifting and masking
// will be higher than what you save by having less digits.
const DIGIT_BITS: usize = 8;
const RADIX_KEY_BITS: usize = mem::size_of::<$radix_key_type>() * 8;
// Have one bucket for each possible value of the digit
const BUCKET_COUNT: usize = 1 << DIGIT_BITS;
const DIGIT_COUNT: usize = (RADIX_KEY_BITS + DIGIT_BITS - 1) / DIGIT_BITS;
let digit_skip_enabled: bool = !unopt;
/// Extracts the digit from the key, starting with the least significant digit.
/// The digit is used as a bucket index.
#[inline(always)]
fn extract_digit(key: $radix_key_type, digit: usize) -> usize {
const DIGIT_MASK: $radix_key_type = ((1 << DIGIT_BITS) - 1) as $radix_key_type;
((key >> (digit * DIGIT_BITS)) & DIGIT_MASK) as usize
}
// In the worst case (`u128` key, `input.len() >= u32::MAX`) uses 32 KiB on the stack.
let mut offsets = [[0 as $offset_type; BUCKET_COUNT]; DIGIT_COUNT];
let mut skip_digit = [false; DIGIT_COUNT];
{
// Calculate bucket offsets for each digit.
// Calculate histograms/bucket sizes and store in `offsets`.
for t in input.iter() {
let key = key_fn(t);
for digit in 0..DIGIT_COUNT {
offsets[digit][extract_digit(key, digit)] += 1;
}
}
if digit_skip_enabled {
// For each digit, check if all the elements are in the same bucket.
// If so, we can skip the whole digit. Instead of checking all the buckets,
// we pick a key and check whether the bucket contains all the elements.
let last_key = key_fn(input.last().unwrap());
for digit in 0..DIGIT_COUNT {
let last_bucket = extract_digit(last_key, digit);
let skip = offsets[digit][last_bucket] == input.len() as $offset_type;
skip_digit[digit] = skip;
}
}
// Turn the histogram/bucket sizes into bucket offsets by calculating a prefix sum.
// Sizes: |---b1---|-b2-|---b3---|----b4----|
// Offsets: 0 b1 b1+b2 b1+b2+b3
for digit in 0..DIGIT_COUNT {
if !(digit_skip_enabled && skip_digit[digit]) {
let mut offset_acc = 0;
for count in offsets[digit].iter_mut() {
let offset = offset_acc;
offset_acc += *count;
*count = offset;
}
}
}
// The `offsets` array now contains bucket offsets for each digit.
}
let len = input.len();
// Drop impl of DoubleBuffer ensures that `input` is consistent,
// e.g. in case of panic in the key function.
let mut buffer = DoubleBuffer::new(input);
// This is the main sorting loop. We sort the elements by each digit of the key,
// starting from the least-significant. After sorting by the last, most significant
// digit, our elements are sorted.
for digit in 0..DIGIT_COUNT {
if !(digit_skip_enabled && skip_digit[digit]) {
// Initial offset of each bucket.
let init_offsets = &offsets[digit];
// Offset of the first empty index in each bucket.
let mut working_offsets = *init_offsets;
buffer.scatter(|t| {
let key = key_fn(t);
let bucket = extract_digit(key, digit);
let offset = &mut working_offsets[bucket];
let index = *offset as usize;
// Increment the offset of the bucket. Use wrapping add in case the
// key function is unreliable and the bucket overflowed.
*offset = offset.wrapping_add(1);
index
});
// Check that each bucket had the same number of insertions as we expected.
// If this is not true, then the key function is unreliable and some elements
// in the write buffer were not written to.
//
// If the key function is unreliable, but the sizes of buckets ended up being
// the same, it would not get detected. This is sound, the only consequence is
// that the elements won't be sorted right.
{
// The `working_offsets` array now contains the end offset of each bucket.
// If the bucket is full, the working offset is now equal to the original
// offset of the next bucket. The working offset of the last bucket should
// be equal to the number of elements.
let bucket_sizes_match = working_offsets[0..BUCKET_COUNT - 1]
== offsets[digit][1..BUCKET_COUNT]
&& working_offsets[BUCKET_COUNT - 1] == len as $offset_type;
if !bucket_sizes_match {
// The bucket sizes do not match expected sizes, the key function is
// unreliable (programming mistake).
//
// The Drop impl will copy the last completed buffer into the slice.
drop(buffer);
panic!(
"The key function is not reliable: when called repeatedly, \
it returned different keys for the same element."
)
}
}
unsafe {
// SAFETY: we just ensured that every index was written to.
buffer.swap();
}
}
}
// The Drop impl will copy the last completed buffer into the slice.
drop(buffer);
}
};
}
macro_rules! radix_key_impl {
($($key_type:ty)*) => ($(
impl RadixKey for $key_type {
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
sort_impl!(radix_sort_u32, $key_type, u32);
sort_impl!(radix_sort_usize, $key_type, usize);
}
)*)
}
radix_key_impl! { u8 u16 u32 u64 u128 }