Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/radsort/src/double_buffer.rs
+++ b/vendor/radsort/src/double_buffer.rs
@@ -0,0 +1,130 @@
+use core::{mem::MaybeUninit, slice};
+
+use alloc::{boxed::Box, vec::Vec};
+
+/// Double buffer. Wraps a mutable slice and allocates a scratch memory of the same size, so that
+/// elements can be freely scattered from buffer to buffer.
+///
+/// # Drop behavior
+///
+/// Drop ensures that the mutable slice this buffer was constructed with contains all the original
+/// elements.
+pub struct DoubleBuffer<'a, T> {
+    slice: &'a mut [MaybeUninit<T>],
+    scratch: Box<[MaybeUninit<T>]>,
+    slice_is_write: bool,
+}
+
+impl<'a, T> DoubleBuffer<'a, T> {
+    /// Creates a double buffer, allocating a scratch buffer of the same length as the input slice.
+    ///
+    /// The supplied slice becomes the read buffer, the scratch buffer becomes the write buffer.
+    pub fn new(slice: &'a mut [T]) -> Self {
+        // SAFETY: The Drop impl ensures that the slice is initialized.
+        let slice = unsafe { slice_as_uninit_mut(slice) };
+        let scratch = {
+            let mut v = Vec::with_capacity(slice.len());
+            // SAFETY: we just allocated this capacity and MaybeUninit can be garbage.
+            unsafe {
+                v.set_len(slice.len());
+            }
+            v.into_boxed_slice()
+        };
+        DoubleBuffer {
+            slice,
+            scratch,
+            slice_is_write: false,
+        }
+    }
+
+    /// Scatters the elements from the read buffer to the computed indices in
+    /// the write buffer. The read buffer is iterated from the beginning.
+    ///
+    /// Call `swap` after this function to commit the write buffer state.
+    ///
+    /// Returning an out-of-bounds index from the indexer causes this function
+    /// to immediately return, without iterating over the remaining elements.
+    pub fn scatter<F>(&mut self, mut indexer: F)
+    where
+        F: FnMut(&T) -> usize,
+    {
+        let (read, write) = self.as_read_write();
+
+        let len = write.len();
+
+        for t in read {
+            let index = indexer(t);
+            if index >= len {
+                return;
+            }
+            let write_ptr = write[index].as_mut_ptr();
+            unsafe {
+                // SAFETY: both pointers are valid for T, aligned, and nonoverlapping
+                write_ptr.copy_from_nonoverlapping(t as *const T, 1);
+            }
+        }
+    }
+
+    /// Returns the current read and write buffers.
+    fn as_read_write(&mut self) -> (&[T], &mut [MaybeUninit<T>]) {
+        let (read, write): (&[MaybeUninit<T>], &mut [MaybeUninit<T>]) = if self.slice_is_write {
+            (self.scratch.as_ref(), self.slice)
+        } else {
+            (self.slice, self.scratch.as_mut())
+        };
+
+        // SAFETY: The read buffer is always initialized.
+        let read = unsafe { slice_assume_init_ref(read) };
+
+        (read, write)
+    }
+
+    /// Swaps the read and write buffer, committing the write buffer state.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure that every element of the write buffer was
+    /// written to before calling this function.
+    pub unsafe fn swap(&mut self) {
+        self.slice_is_write = !self.slice_is_write;
+    }
+}
+
+/// Ensures that the input slice contains all the original elements.
+impl<'a, T> Drop for DoubleBuffer<'a, T> {
+    fn drop(&mut self) {
+        if self.slice_is_write {
+            // The input slice is the write buffer, copy the consistent state from the read buffer
+            unsafe {
+                // SAFETY: `scratch` is the read buffer, it is initialized. The length is the same.
+                self.slice
+                    .as_mut_ptr()
+                    .copy_from_nonoverlapping(self.scratch.as_ptr(), self.slice.len());
+            }
+            self.slice_is_write = false;
+        }
+    }
+}
+
+/// Get a slice of the initialized items.
+///
+/// # Safety
+///
+/// The caller must ensure that all the items are initialized.
+#[inline(always)]
+pub unsafe fn slice_assume_init_ref<T>(slice: &[MaybeUninit<T>]) -> &[T] {
+    // SAFETY: `[MaybeUninit<T>]` and `[T]` have the same layout.
+    unsafe { slice::from_raw_parts(slice.as_ptr() as *const T, slice.len()) }
+}
+
+/// View the mutable slice of `T` as a slice of `MaybeUnint<T>`.
+///
+/// # Safety
+///
+/// The caller must ensure that all the items of the returned slice are
+/// initialized before dropping it.
+#[inline(always)]
+pub unsafe fn slice_as_uninit_mut<T>(slice: &mut [T]) -> &mut [MaybeUninit<T>] {
+    // SAFETY: `[MaybeUninit<T>]` and `[T]` have the same layout.
+    unsafe { slice::from_raw_parts_mut(slice.as_mut_ptr() as *mut MaybeUninit<T>, slice.len()) }
+}
--- a/vendor/radsort/src/lib.rs
+++ b/vendor/radsort/src/lib.rs
@@ -0,0 +1,436 @@
+//! `radsort` is a radix sort implementation for sorting by scalar keys
+//! (integers, floats, chars, bools).
+//!
+//! All built-in scalar types can be used as sorting keys: Booleans, characters,
+//! integers, and floating point-numbers. To sort by multiple keys, put them in
+//! a tuple, starting from the most significant key. See [`Key`] for a full list
+//! of supported keys.
+//!
+//! - best and worst-case running time is `O(n)` – see [benchmarks] for more
+//!   detailed performance characteristics
+//! - space complexity is `O(n)` – direct sort allocates temporary storage the
+//!   size of the slice, for indirect see [`sort_by_cached_key`]
+//! - stable, i.e. does not reorder equal elements
+//! - uses `#![no_std]`, but needs an allocator
+//!
+//! This sort can be several times faster than `slice::sort` and
+//! `slice::sort_unstable`, typically on large slices (hundreds of elements or
+//! more). It performs worse on short slices and when using wide keys
+//! (16 bytes). See [benchmarks] to get a better picture of the performance
+//! characteristics.
+//!
+//! `radsort` is an implementation of LSB radix sort, using counting sort to
+//! sort the slice by each digit (byte) of the key. As an optimization, the
+//! slice is sorted only by digits which differ between the keys. See the
+//! [`unopt`] module for more details and functions which don't use this
+//! optimization.
+//!
+//! This implementation is based on radix sort by Pierre Terdiman,
+//! published at
+//! [http://codercorner.com/RadixSortRevisited.htm](http://codercorner.com/RadixSortRevisited.htm),
+//! with select optimizations published by Michael Herf at
+//! [http://stereopsis.com/radix.html](http://stereopsis.com/radix.html).
+//!
+//! # Floating-point numbers
+//!
+//! Floating-point number keys are effectively sorted according to their partial
+//! order (see [`PartialOrd`]), with `NaN` values at the beginning (before the
+//! negative infinity) and at the end (after the positive infinity), depending
+//! on the sign bit of each `NaN`.
+//!
+//! # Examples
+//!
+//! Slices of scalar types (integers, floating-point numbers, Booleans, and
+//! characters) can be sorted directly:
+//! ```rust
+//! let mut data = [2i32, -1, 1, 0, -2];
+//!
+//! radsort::sort(&mut data);
+//!
+//! assert_eq!(data, [-2, -1, 0, 1, 2]);
+//! ```
+//!
+//! Use a key extraction function to sort other types:
+//! ```rust
+//! let mut friends = ["Punchy", "Isabelle", "Sly", "Puddles", "Gladys"];
+//!
+//! // sort by the length of the string in bytes
+//! radsort::sort_by_key(&mut friends, |s| s.len());
+//!
+//! assert_eq!(friends, ["Sly", "Punchy", "Gladys", "Puddles", "Isabelle"]);
+//! ```
+//!
+//! To sort by two or more keys, put them in a tuple, starting with the most
+//! significant key:
+//! ```rust
+//! # #[derive(Debug, PartialEq)]
+//! struct Height { feet: u8, inches: u8, }
+//!
+//! let mut heights = [
+//!     Height { feet: 6, inches: 1 },
+//!     Height { feet: 5, inches: 9 },
+//!     Height { feet: 6, inches: 0 },
+//! ];
+//!
+//! // sort by feet, if feet are equal, sort by inches
+//! radsort::sort_by_key(&mut heights, |h| (h.feet, h.inches));
+//!
+//! assert_eq!(heights, [
+//!     Height { feet: 5, inches: 9 },
+//!     Height { feet: 6, inches: 0 },
+//!     Height { feet: 6, inches: 1 },
+//! ]);
+//! ```
+//!
+//! [`Key`]: ./trait.Key.html
+//! [`unopt`]: ./unopt/index.html
+//! [benchmarks]: https://github.com/JakubValtar/radsort/wiki/Benchmarks
+//! [`sort_by_cached_key`]: ./fn.sort_by_cached_key.html
+//! [`PartialOrd`]: https://doc.rust-lang.org/std/cmp/trait.PartialOrd.html
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::vec::Vec;
+
+mod double_buffer;
+mod scalar;
+mod sort;
+
+use scalar::Scalar;
+use sort::RadixKey;
+
+/// Sorts the slice.
+///
+/// Slice elements can be any scalar type. See [`Key`] for a full list.
+///
+/// This sort is stable (i.e., does not reorder equal elements) and `O(w n)`,
+/// where `w` is the size of the key in bytes.
+///
+/// Allocates temporary storage the size of the slice.
+///
+/// # Examples
+/// ```rust
+/// let mut data = [5i32, -1, 3, 15, -42];
+///
+/// radsort::sort(&mut data);
+///
+/// assert_eq!(data, [-42, -1, 3, 5, 15]);
+/// ```
+/// [`Key`]: trait.Key.html
+#[inline]
+pub fn sort<T: Key>(slice: &mut [T]) {
+    Key::sort_by_key(slice, |v| *v, false);
+}
+
+/// Sorts the slice using a key extraction function.
+///
+/// Key can be any scalar type. See [`Key`] for a full list.
+///
+/// This sort is stable (i.e., does not reorder equal elements) and `O(w m n)`,
+/// where the key function is `O(m)` and `w` is the size of the key in bytes.
+///
+/// Allocates temporary storage the size of the slice.
+///
+/// See [`sort_by_cached_key`] if you use expensive key function or if you need
+/// to sort large elements.
+///
+/// # Panics
+///
+/// Can panic if the key function returns different keys for the same element
+/// when called repeatedly. The panic is on a best-effort basis. In case of
+/// panic, the order of elements in the slice is not specified.
+///
+/// # Examples
+///
+/// ```rust
+/// let mut friends = ["Punchy", "Isabelle", "Sly", "Puddles", "Gladys"];
+///
+/// // sort by the length of the string in bytes
+/// radsort::sort_by_key(&mut friends, |s| s.len());
+///
+/// assert_eq!(friends, ["Sly", "Punchy", "Gladys", "Puddles", "Isabelle"]);
+/// ```
+///
+/// [`Key`]: trait.Key.html
+/// [`sort_by_cached_key`]: fn.sort_by_cached_key.html
+#[inline]
+pub fn sort_by_key<T, F, K>(slice: &mut [T], mut key_fn: F)
+where
+    F: FnMut(&T) -> K,
+    K: Key,
+{
+    Key::sort_by_key(slice, |t| key_fn(t), false);
+}
+
+/// Sorts the slice indirectly, using a key extraction function and caching the keys.
+///
+/// Key can be any scalar type. See [`Key`] for a full list.
+///
+/// This sort is stable (i.e., does not reorder equal elements) and
+/// `O(m n + w n)`, where the key function is `O(m)`.
+///
+/// This function can be significantly faster for sorting by an expensive key
+/// function or for sorting large elements. The keys are extracted, sorted, and
+/// then the elements of the slice are reordered in-place. This saves CPU cycles
+/// in case of an expensive key function and saves memory bandwidth in case of
+/// large elements.
+///
+/// For sorting small elements by simple key functions (e.g., functions that are
+/// property accesses or basic operations), [`sort_by_key`] is likely to be
+/// faster.
+///
+/// In the worst case, allocates temporary storage in a `Vec<(K, usize)>` twice
+/// the length of the slice.
+///
+/// # Examples
+///
+/// ```rust
+/// let mut data = ["-6", "2", "15", "-1", "0"];
+///
+/// radsort::sort_by_cached_key(&mut data, |s| s.parse::<i32>().unwrap());
+///
+/// assert_eq!(data, ["-6", "-1", "0", "2", "15"]);
+/// ```
+///
+/// [`Key`]: ./trait.Key.html
+/// [`sort_by_key`]: fn.sort_by_key.html
+#[inline]
+pub fn sort_by_cached_key<T, F, K>(slice: &mut [T], key_fn: F)
+where
+    F: FnMut(&T) -> K,
+    K: Key,
+{
+    sort_by_cached_key_internal(slice, key_fn, false);
+}
+
+/// Sorting functions which don't use optimizations based on the values
+/// of the keys. Useful for benchmarks and consistent performance.
+///
+/// For each digit (byte) of the key, `radsort` reorders the slice once.
+/// Functions in the crate root sort only by the bytes which differ between the
+/// keys. This can lead to large differences in sorting time, based on the
+/// values in the slice.
+///
+/// For example, sorting `u32` all less than `u8::MAX` will sort only by
+/// the least significant byte and skip the three most significant bytes,
+/// which are zero; this cuts the sorting time to roughly one quarter, plus
+/// the overhead of analyzing the keys.
+///
+/// Unlike functions in the crate root, functions in this module don't use
+/// this optimization and sort by all bytes of the key. This leads to worse but
+/// more consistent performance. The effects of the CPU cache will still play a
+/// role, but at least the number of executed instructions will not depend on
+/// the values in the slice, only on the length of the slice and the width of
+/// the key type.
+pub mod unopt {
+
+    use super::*;
+
+    /// Version of [`sort`](../fn.sort.html) which does not skip digits (bytes).
+    ///
+    /// See the [module documentation](./index.html) for more details.
+    #[inline]
+    pub fn sort<T: Key>(slice: &mut [T]) {
+        Key::sort_by_key(slice, |v| *v, true);
+    }
+
+    /// Version of [`sort_by_key`](../fn.sort_by_key.html) which does not skip digits (bytes).
+    ///
+    /// See the [module documentation](./index.html) for more details.
+    #[inline]
+    pub fn sort_by_key<T, F, K>(slice: &mut [T], mut key_fn: F)
+    where
+        F: FnMut(&T) -> K,
+        K: Key,
+    {
+        Key::sort_by_key(slice, |t| key_fn(t), true);
+    }
+
+    /// Version of [`sort_by_cached_key`](../fn.sort_by_cached_key.html) which does not skip digits (bytes).
+    ///
+    /// See the [module documentation](./index.html) for more details.
+    #[inline]
+    pub fn sort_by_cached_key<T, F, K>(slice: &mut [T], key_fn: F)
+    where
+        F: FnMut(&T) -> K,
+        K: Key,
+    {
+        sort_by_cached_key_internal(slice, key_fn, true);
+    }
+}
+
+#[inline]
+fn sort_by_cached_key_internal<T, F, K>(slice: &mut [T], mut key_fn: F, unopt: bool)
+where
+    F: FnMut(&T) -> K,
+    K: Key,
+{
+    // Adapted from std::slice::sort_by_cached_key
+
+    macro_rules! radsort_by_cached_key {
+        ($index:ty) => {{
+            let mut indices: Vec<(K, $index)> = slice
+                .iter()
+                .map(|t| key_fn(t))
+                .enumerate()
+                .map(|(i, k)| (k, i as $index))
+                .collect();
+
+            Key::sort_by_key(&mut indices, |(k, _)| *k, unopt);
+
+            for i in 0..slice.len() {
+                let mut index = indices[i].1;
+                while (index as usize) < i {
+                    // The previous value was swapped somewhere else. The index to which
+                    // the original value was swapped was marked into the index array.
+                    // Follow the indices to find out where the original value ended up.
+                    index = indices[index as usize].1;
+                }
+                // Mark down the index to which the current value goes
+                indices[i].1 = index;
+                slice.swap(i, index as usize);
+            }
+        }};
+    }
+
+    let len = slice.len();
+    if len < 2 {
+        return;
+    }
+
+    let sz_u8 = core::mem::size_of::<(K, u8)>();
+    let sz_u16 = core::mem::size_of::<(K, u16)>();
+    #[cfg(not(target_pointer_width = "16"))]
+    let sz_u32 = core::mem::size_of::<(K, u32)>();
+    #[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+    let sz_usize = core::mem::size_of::<(K, usize)>();
+
+    if sz_u8 < sz_u16 && len <= (u8::MAX as usize + 1) {
+        return radsort_by_cached_key!(u8);
+    }
+    #[cfg(not(target_pointer_width = "16"))]
+    if sz_u16 < sz_u32 && len <= (u16::MAX as usize + 1) {
+        return radsort_by_cached_key!(u16);
+    }
+    #[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+    if sz_u32 < sz_usize && len <= (u32::MAX as usize + 1) {
+        return radsort_by_cached_key!(u32);
+    }
+
+    radsort_by_cached_key!(usize)
+}
+
+/// Types which can be used as sorting keys.
+///
+/// Implemented for all scalar types and their tuples.
+///
+/// Slices of types for which `Key` is implemented can be sorted directly using
+/// [`sort`]. Slices of other types can be sorted using [`sort_by_key`] with a
+/// key extraction function.
+///
+/// [`sort`]: fn.sort.html
+/// [`sort_by_key`]: fn.sort_by_key.html
+pub trait Key: Copy + private::Sealed {
+    // If this crate didn't support tuples, this trait wouldn't be needed and
+    // Scalar could be exposed directly to users as the `Key` trait.
+
+    /// Sorts the slice using `Self` as the type of the key.
+    ///
+    /// You shouldn't need to call this directly, use one of the functions in
+    /// the [crate root](index.html#functions) instead.
+    #[doc(hidden)]
+    fn sort_by_key<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self;
+}
+
+macro_rules! impl_for_scalar { ($($t:ty)*) => ($(
+    impl Key for $t {
+        #[doc(hidden)]
+        #[inline]
+        fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+            where F: FnMut(&T) -> Self
+        {
+            RadixKey::radix_sort(slice, |t| key_fn(t).to_radix_key(), unopt);
+        }
+    }
+)*) }
+
+impl_for_scalar! {
+    bool char
+    u8 u16 u32 u64 u128 usize
+    i8 i16 i32 i64 i128 isize
+    f32 f64
+}
+
+impl<A: Key> Key for (A,) {
+    #[doc(hidden)]
+    #[inline]
+    fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self,
+    {
+        A::sort_by_key(slice, |t| key_fn(t).0, unopt);
+    }
+}
+
+impl<A: Key, B: Key> Key for (A, B) {
+    #[doc(hidden)]
+    #[inline]
+    fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self,
+    {
+        B::sort_by_key(slice, |t| key_fn(t).1, unopt);
+        A::sort_by_key(slice, |t| key_fn(t).0, unopt);
+    }
+}
+
+impl<A: Key, B: Key, C: Key> Key for (A, B, C) {
+    #[doc(hidden)]
+    #[inline]
+    fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self,
+    {
+        C::sort_by_key(slice, |t| key_fn(t).2, unopt);
+        B::sort_by_key(slice, |t| key_fn(t).1, unopt);
+        A::sort_by_key(slice, |t| key_fn(t).0, unopt);
+    }
+}
+
+impl<A: Key, B: Key, C: Key, D: Key> Key for (A, B, C, D) {
+    #[doc(hidden)]
+    #[inline]
+    fn sort_by_key<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self,
+    {
+        D::sort_by_key(slice, |t| key_fn(t).3, unopt);
+        C::sort_by_key(slice, |t| key_fn(t).2, unopt);
+        B::sort_by_key(slice, |t| key_fn(t).1, unopt);
+        A::sort_by_key(slice, |t| key_fn(t).0, unopt);
+    }
+}
+
+mod private {
+    use super::*;
+    /// This trait serves as a seal for the `Key` trait to prevent downstream
+    /// implementations.
+    pub trait Sealed {}
+    macro_rules! sealed_impl { ($($t:ty)*) => ($(
+        impl Sealed for $t {}
+    )*) }
+    sealed_impl! {
+        bool char
+        u8 u16 u32 u64 u128 usize
+        i8 i16 i32 i64 i128 isize
+        f32 f64
+    }
+    impl<A: Key> Sealed for (A,) {}
+    impl<A: Key, B: Key> Sealed for (A, B) {}
+    impl<A: Key, B: Key, C: Key> Sealed for (A, B, C) {}
+    impl<A: Key, B: Key, C: Key, D: Key> Sealed for (A, B, C, D) {}
+}
--- a/vendor/radsort/src/scalar.rs
+++ b/vendor/radsort/src/scalar.rs
@@ -0,0 +1,310 @@
+//! Conversions from scalar types to radix keys, which can be sorted bitwise.
+
+use core::mem;
+
+use crate::sort::RadixKey;
+
+/// Scalar types which can be converted to radix sorting keys.
+pub trait Scalar: Copy + private::Sealed {
+    type ToRadixKey: RadixKey;
+
+    /// Maps the value to a radix sorting key, preserving the sorting order.
+    fn to_radix_key(self) -> Self::ToRadixKey;
+}
+
+/// Implements `Scalar` for an unsigned integer type(s).
+///
+/// Since we use unsigned integers as radix sorting keys, we directly return the
+/// value.
+macro_rules! key_impl_unsigned {
+    ($($t:ty)*) => ($( key_impl_unsigned!($t => $t); )*);
+    ($t:ty => $radix_key:ty) => (
+        impl Scalar for $t {
+            type ToRadixKey = $radix_key;
+            #[inline(always)]
+            fn to_radix_key(self) -> Self::ToRadixKey {
+                self as $radix_key
+            }
+        }
+    )
+}
+
+key_impl_unsigned! { u8 u16 u32 u64 u128 }
+
+#[cfg(target_pointer_width = "16")]
+key_impl_unsigned!(usize => u16);
+
+#[cfg(target_pointer_width = "32")]
+key_impl_unsigned!(usize => u32);
+
+#[cfg(target_pointer_width = "64")]
+key_impl_unsigned!(usize => u64);
+
+key_impl_unsigned!(bool => u8);
+key_impl_unsigned!(char => u32);
+
+/// Implements `Scalar` for a signed integer type(s).
+///
+/// Signed integers are mapped to unsigned integers of the same width.
+///
+/// # Conversion
+///
+/// In two's complement, negative integers have the most significant bit set.
+/// When we cast to an unsigned integer, we end up with negative integers
+/// ordered after positive integers. To correct the order, we flip the sign bit.
+///
+/// ```plaintext
+/// -128: 1000_0000    0000_0000
+///   -1: 1111_1111    0111_0000
+///    0: 0000_0000 -> 1000_0000
+///    1: 0000_0001    1000_0001
+///  128: 0111_1111    1111_1111
+/// ```
+macro_rules! key_impl_signed {
+    ($($t:ty => $radix_key:ty),*) => ($(
+        impl Scalar for $t {
+            type ToRadixKey = $radix_key;
+            #[inline(always)]
+            fn to_radix_key(self) -> Self::ToRadixKey {
+                const BIT_COUNT: usize = 8 * mem::size_of::<$t>();
+                const SIGN_BIT: $radix_key = 1 << (BIT_COUNT-1);
+                (self as $radix_key) ^ SIGN_BIT
+            }
+        }
+    )*)
+}
+
+key_impl_signed! {
+    i8 => u8,
+    i16 => u16,
+    i32 => u32,
+    i64 => u64,
+    i128 => u128
+}
+
+#[cfg(target_pointer_width = "16")]
+key_impl_signed!(isize => u16);
+
+#[cfg(target_pointer_width = "32")]
+key_impl_signed!(isize => u32);
+
+#[cfg(target_pointer_width = "64")]
+key_impl_signed!(isize => u64);
+
+/// Implements `Scalar` for a floating-point number type(s).
+///
+/// Floating-point numbers are mapped to unsigned integers of the same width.
+///
+/// # Conversion
+///
+/// IEEE 754 floating point numbers have a sign bit, an exponent, and a
+/// mantissa. We can treat the exponent and the mantissa as a single block
+/// denoting the magnitude.
+///
+/// This leaves us with a sign-magnitude representation. Magnitude increases
+/// away from zero and the sign bit tells us in which direction.
+///
+/// After transmuting to unsigned integers, we have two problems:
+/// - because of the sign bit, negative numbers end up after the positive
+/// - negative numbers go in the opposite direction, because we went from
+///     sign-magnitude representation (increases away from zero) to two's
+///     complement (increases away from negative infinity)
+///
+/// To fix these problems, we:
+/// - flip the sign bit, this makes negative numbers sort before positive
+/// - flip the magnitude bits of negative numbers, this reverses the order of
+///     negative values
+///
+/// This gives us a simple way to map floating-point numbers to unsigned
+/// integers:
+/// - sign bit 0: flip the sign bit
+/// - sign bit 1: flip all the bits
+///
+/// These are halfs (~`f16`) for brevity, `f32` and `f64` only have more bits in
+/// the middle.
+///
+/// ```plaintext
+/// negative NaN  1_11111_xxxxxxxxx1    0_00000_xxxxxxxxx0
+/// NEG_INFINITY  1_11111_0000000000    0_00000_1111111111
+/// MIN           1_11110_1111111111 -> 0_00001_0000000000  flip all the bits
+/// -1.0          1_01111_0000000000    0_10000_1111111111
+/// MAX_NEGATIVE  1_00000_0000000001    0_11111_1111111110
+/// -0.0          1_00000_0000000000    0_11111_1111111111
+/// --------------------------------------------------------------------------
+/// 0.0           0_00000_0000000000    1_00000_0000000000
+/// MIN_POSITIVE  0_00000_0000000001    1_00000_0000000001
+/// 1.0           0_01111_0000000000 -> 1_01111_0000000000  flip the sign bit
+/// MAX           0_11110_1111111111    1_11110_1111111111
+/// INFINITY      0_11111_0000000000    1_11111_0000000000
+/// positive NaN  0_11111_xxxxxxxxx1    1_11111_xxxxxxxxx1
+/// ```
+///
+/// # Special values
+///
+/// As shown above, infinities are sorted correctly before and after min and max
+/// values. NaN values, depending on their sign bit, end up in two blocks at the
+/// very beginning and at the very end.
+macro_rules! key_impl_float {
+
+    // signed_key type is needed for arithmetic right shift
+    ($($t:ty => $radix_key:ty : $signed_key:ty),*) => ($(
+        impl Scalar for $t {
+            type ToRadixKey = $radix_key;
+            #[inline(always)]
+            fn to_radix_key(self) -> Self::ToRadixKey {
+                const BIT_COUNT: usize = 8 * mem::size_of::<$t>();
+                // all floats need to have the sign bit flipped
+                const FLIP_SIGN_MASK: $radix_key = 1 << (BIT_COUNT-1); // 0x800...
+
+                let bits = self.to_bits();
+                // negative floats need to have the rest flipped as well, extend the sign bit to the
+                // whole width with arithmetic right shift to get a flip mask 0x00...0 or 0xFF...F
+                let flip_negative_mask = ((bits as $signed_key) >> (BIT_COUNT-1)) as $radix_key;
+
+                bits ^ (flip_negative_mask | FLIP_SIGN_MASK)
+            }
+        }
+    )*)
+}
+
+key_impl_float! {
+    f32 => u32 : i32,
+    f64 => u64 : i64
+}
+
+mod private {
+    /// This trait serves as a seal for the `Scalar` trait to prevent downstream
+    /// implementations.
+    pub trait Sealed {}
+    macro_rules! sealed_impl { ($($t:ty)*) => ($(
+        impl Sealed for $t {}
+    )*) }
+    sealed_impl! {
+        bool char
+        u8 u16 u32 u64 u128 usize
+        i8 i16 i32 i64 i128 isize
+        f32 f64
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    //! Tests that `to_radix_key` implementations preserve the order of the
+    //! values. Tests use `std::slice::sort_by_key` to make sure that the
+    //! sorting function is reliable.
+
+    use super::*;
+
+    #[test]
+    fn test_key_bool() {
+        assert!(false.to_radix_key() < true.to_radix_key());
+    }
+
+    #[test]
+    fn test_key_char() {
+        #[rustfmt::skip]
+        let mut actual = [
+            '\u{0}',     '\u{1}',     '\u{F}',     '\u{7F}',    // 1-byte sequence
+            '\u{80}',    '\u{81}',    '\u{FF}',    '\u{7FF}',   // 2-byte sequence
+            '\u{800}',   '\u{801}',   '\u{FFF}',   '\u{FFFF}',  // 3-byte sequence
+            '\u{10000}', '\u{10001}', '\u{FFFFF}', '\u{10FFFF}' // 4-byte sequence
+        ];
+        let expected = actual;
+        actual.reverse();
+        actual.sort_by_key(|v| v.to_radix_key());
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_key_numeric() {
+        macro_rules! implement {
+            ($($t:ident)*) => ($(
+                let mut actual = [
+                    $t::MIN, $t::MIN+1, $t::MIN / 2,
+                    $t::MIN >> (mem::size_of::<$t>() * 8 / 2),
+                    $t::MAX, $t::MAX-1, $t::MAX / 2,
+                    $t::MAX >> (mem::size_of::<$t>() * 8 / 2),
+                    (-1i8) as $t, 0, 1,
+                ];
+                let mut expected = actual;
+                expected.sort();
+                actual.sort_by_key(|v| v.to_radix_key());
+                assert_eq!(actual, expected);
+            )*)
+        }
+        implement! {
+            u8 u16 u32 u64 u128 usize
+            i8 i16 i32 i64 i128 isize
+        }
+    }
+
+    #[test]
+    #[allow(clippy::inconsistent_digit_grouping)]
+    fn test_key_float() {
+        {
+            // F32
+            #[allow(clippy::unusual_byte_groupings)]
+            let mut actual = [
+                f32::from_bits(0b1_11111111_11111111111111111111111), // negative NaN
+                f32::from_bits(0b1_11111111_00000000000000000000001), // negative NaN
+                f32::from_bits(0b1_11111111_00000000000000000000000), // negative infinity
+                f32::from_bits(0b1_11111110_11111111111111111111111), // min
+                f32::from_bits(0b1_01111111_00000000000000000000000), // negative one
+                f32::from_bits(0b1_01111110_11111111111111111111111), // smallest larger than negative one
+                f32::from_bits(0b1_00000001_00000000000000000000000), // max negative
+                f32::from_bits(0b1_00000000_11111111111111111111111), // min negative subnormal
+                f32::from_bits(0b1_00000000_00000000000000000000001), // max negative subnormal
+                f32::from_bits(0b1_00000000_00000000000000000000000), // negative zero
+                f32::from_bits(0b0_00000000_00000000000000000000000), // positive zero
+                f32::from_bits(0b0_00000000_00000000000000000000001), // min positive subnormal
+                f32::from_bits(0b0_00000000_11111111111111111111111), // max positive subnormal
+                f32::from_bits(0b0_00000001_00000000000000000000000), // min positive
+                f32::from_bits(0b0_01111110_11111111111111111111111), // largest smaller than positive one
+                f32::from_bits(0b0_01111111_00000000000000000000000), // positive one
+                f32::from_bits(0b0_11111110_11111111111111111111111), // max
+                f32::from_bits(0b0_11111111_00000000000000000000000), // positive infinity
+                f32::from_bits(0b0_11111111_00000000000000000000001), // positive NaN
+                f32::from_bits(0b0_11111111_11111111111111111111111), // positive NaN
+            ];
+            let expected = actual;
+            actual.reverse();
+            actual.sort_by_key(|v| v.to_radix_key());
+            for (a, e) in actual.iter().zip(expected.iter()) {
+                assert_eq!(a.to_bits(), e.to_bits());
+            }
+        }
+        {
+            // F64
+            #[rustfmt::skip]
+            #[allow(clippy::unusual_byte_groupings)]
+            let mut actual = [
+                f64::from_bits(0b1_11111111111_1111111111111111111111111111111111111111111111111111), // negative NaN
+                f64::from_bits(0b1_11111111111_0000000000000000000000000000000000000000000000000001), // negative NaN
+                f64::from_bits(0b1_11111111111_0000000000000000000000000000000000000000000000000000), // negative infinity
+                f64::from_bits(0b1_11111111110_1111111111111111111111111111111111111111111111111111), // min
+                f64::from_bits(0b1_01111111111_0000000000000000000000000000000000000000000000000000), // negative one
+                f64::from_bits(0b1_01111111110_1111111111111111111111111111111111111111111111111111), // min larger than negative one
+                f64::from_bits(0b1_00000000001_0000000000000000000000000000000000000000000000000000), // max negative
+                f64::from_bits(0b1_00000000000_1111111111111111111111111111111111111111111111111111), // min negative subnormal
+                f64::from_bits(0b1_00000000000_0000000000000000000000000000000000000000000000000001), // max negative subnormal
+                f64::from_bits(0b1_00000000000_0000000000000000000000000000000000000000000000000000), // negative zero
+                f64::from_bits(0b0_00000000000_0000000000000000000000000000000000000000000000000000), // positive zero
+                f64::from_bits(0b0_00000000000_0000000000000000000000000000000000000000000000000001), // min positive subnormal
+                f64::from_bits(0b0_00000000000_1111111111111111111111111111111111111111111111111111), // max positive subnormal
+                f64::from_bits(0b0_00000000001_0000000000000000000000000000000000000000000000000000), // min positive
+                f64::from_bits(0b0_01111111110_1111111111111111111111111111111111111111111111111111), // max smaller than positive one
+                f64::from_bits(0b0_01111111111_0000000000000000000000000000000000000000000000000000), // positive one
+                f64::from_bits(0b0_11111111110_1111111111111111111111111111111111111111111111111111), // max
+                f64::from_bits(0b0_11111111111_0000000000000000000000000000000000000000000000000000), // positive infinity
+                f64::from_bits(0b0_11111111111_0000000000000000000000000000000000000000000000000001), // positive NaN
+                f64::from_bits(0b0_11111111111_1111111111111111111111111111111111111111111111111111), // positive NaN
+            ];
+            let expected = actual;
+            actual.reverse();
+            actual.sort_by_key(|v| v.to_radix_key());
+            for (a, e) in actual.iter().zip(expected.iter()) {
+                assert_eq!(a.to_bits(), e.to_bits());
+            }
+        }
+    }
+}
--- a/vendor/radsort/src/sort.rs
+++ b/vendor/radsort/src/sort.rs
@@ -0,0 +1,215 @@
+//! Implementations of radix keys and sorting functions.
+
+use core::mem;
+
+use crate::{double_buffer::DoubleBuffer, Key};
+
+/// Unsigned integers used as sorting keys for radix sort.
+///
+/// These keys can be sorted bitwise. For conversion from scalar types, see
+/// [`Scalar::to_radix_key()`].
+///
+/// [`Scalar::to_radix_key()`]: ../scalar/trait.Scalar.html#tymethod.to_radix_key
+pub trait RadixKey: Key {
+    /// Sorts the slice using provided key extraction function.
+    /// Runs one of the other functions, based on the length of the slice.
+    #[inline]
+    fn radix_sort<T, F>(slice: &mut [T], mut key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self,
+    {
+        // Sorting has no meaningful behavior on zero-sized types.
+        if mem::size_of::<T>() == 0 {
+            return;
+        }
+
+        let len = slice.len();
+        if len < 2 {
+            return;
+        }
+
+        #[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+        {
+            if len <= u32::MAX as usize {
+                Self::radix_sort_u32(slice, |t| key_fn(t), unopt);
+                return;
+            }
+        }
+
+        Self::radix_sort_usize(slice, |t| key_fn(t), unopt);
+    }
+
+    /// Sorting for slices with up to `u32::MAX` elements, which is a majority
+    /// of cases. Uses `u32` indices for histograms and offsets to save cache
+    /// space.
+    #[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+    fn radix_sort_u32<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self;
+
+    /// Sorting function for slices with up to `usize::MAX` elements.
+    fn radix_sort_usize<T, F>(slice: &mut [T], key_fn: F, unopt: bool)
+    where
+        F: FnMut(&T) -> Self;
+}
+
+macro_rules! sort_impl {
+    ($name:ident, $radix_key_type:ty, $offset_type:ty) => {
+        #[inline(never)] // Don't inline, the offset array needs a lot of stack
+        fn $name<T, F>(input: &mut [T], mut key_fn: F, unopt: bool)
+        where
+            F: FnMut(&T) -> $radix_key_type,
+        {
+            // This implementation is radix 256, so the size of a digit is 8 bits / one byte.
+            // You can experiment with different digit sizes by changing this constant, but
+            // according to my benchmarks, the overhead from arbitrary shifting and masking
+            // will be higher than what you save by having less digits.
+            const DIGIT_BITS: usize = 8;
+
+            const RADIX_KEY_BITS: usize = mem::size_of::<$radix_key_type>() * 8;
+
+            // Have one bucket for each possible value of the digit
+            const BUCKET_COUNT: usize = 1 << DIGIT_BITS;
+
+            const DIGIT_COUNT: usize = (RADIX_KEY_BITS + DIGIT_BITS - 1) / DIGIT_BITS;
+
+            let digit_skip_enabled: bool = !unopt;
+
+            /// Extracts the digit from the key, starting with the least significant digit.
+            /// The digit is used as a bucket index.
+            #[inline(always)]
+            fn extract_digit(key: $radix_key_type, digit: usize) -> usize {
+                const DIGIT_MASK: $radix_key_type = ((1 << DIGIT_BITS) - 1) as $radix_key_type;
+                ((key >> (digit * DIGIT_BITS)) & DIGIT_MASK) as usize
+            }
+
+            // In the worst case (`u128` key, `input.len() >= u32::MAX`) uses 32 KiB on the stack.
+            let mut offsets = [[0 as $offset_type; BUCKET_COUNT]; DIGIT_COUNT];
+            let mut skip_digit = [false; DIGIT_COUNT];
+
+            {
+                // Calculate bucket offsets for each digit.
+
+                // Calculate histograms/bucket sizes and store in `offsets`.
+                for t in input.iter() {
+                    let key = key_fn(t);
+                    for digit in 0..DIGIT_COUNT {
+                        offsets[digit][extract_digit(key, digit)] += 1;
+                    }
+                }
+
+                if digit_skip_enabled {
+                    // For each digit, check if all the elements are in the same bucket.
+                    // If so, we can skip the whole digit. Instead of checking all the buckets,
+                    // we pick a key and check whether the bucket contains all the elements.
+                    let last_key = key_fn(input.last().unwrap());
+                    for digit in 0..DIGIT_COUNT {
+                        let last_bucket = extract_digit(last_key, digit);
+                        let skip = offsets[digit][last_bucket] == input.len() as $offset_type;
+                        skip_digit[digit] = skip;
+                    }
+                }
+
+                // Turn the histogram/bucket sizes into bucket offsets by calculating a prefix sum.
+                // Sizes:     |---b1---|-b2-|---b3---|----b4----|
+                // Offsets:   0        b1   b1+b2    b1+b2+b3
+                for digit in 0..DIGIT_COUNT {
+                    if !(digit_skip_enabled && skip_digit[digit]) {
+                        let mut offset_acc = 0;
+                        for count in offsets[digit].iter_mut() {
+                            let offset = offset_acc;
+                            offset_acc += *count;
+                            *count = offset;
+                        }
+                    }
+                }
+
+                // The `offsets` array now contains bucket offsets for each digit.
+            }
+
+            let len = input.len();
+
+            // Drop impl of DoubleBuffer ensures that `input` is consistent,
+            // e.g. in case of panic in the key function.
+            let mut buffer = DoubleBuffer::new(input);
+
+            // This is the main sorting loop. We sort the elements by each digit of the key,
+            // starting from the least-significant. After sorting by the last, most significant
+            // digit, our elements are sorted.
+            for digit in 0..DIGIT_COUNT {
+                if !(digit_skip_enabled && skip_digit[digit]) {
+                    // Initial offset of each bucket.
+                    let init_offsets = &offsets[digit];
+                    // Offset of the first empty index in each bucket.
+                    let mut working_offsets = *init_offsets;
+
+                    buffer.scatter(|t| {
+                        let key = key_fn(t);
+                        let bucket = extract_digit(key, digit);
+
+                        let offset = &mut working_offsets[bucket];
+
+                        let index = *offset as usize;
+
+                        // Increment the offset of the bucket. Use wrapping add in case the
+                        // key function is unreliable and the bucket overflowed.
+                        *offset = offset.wrapping_add(1);
+
+                        index
+                    });
+
+                    // Check that each bucket had the same number of insertions as we expected.
+                    // If this is not true, then the key function is unreliable and some elements
+                    // in the write buffer were not written to.
+                    //
+                    // If the key function is unreliable, but the sizes of buckets ended up being
+                    // the same, it would not get detected. This is sound, the only consequence is
+                    // that the elements won't be sorted right.
+                    {
+                        // The `working_offsets` array now contains the end offset of each bucket.
+                        // If the bucket is full, the working offset is now equal to the original
+                        // offset of the next bucket. The working offset of the last bucket should
+                        // be equal to the number of elements.
+                        let bucket_sizes_match = working_offsets[0..BUCKET_COUNT - 1]
+                            == offsets[digit][1..BUCKET_COUNT]
+                            && working_offsets[BUCKET_COUNT - 1] == len as $offset_type;
+
+                        if !bucket_sizes_match {
+                            // The bucket sizes do not match expected sizes, the key function is
+                            // unreliable (programming mistake).
+                            //
+                            // The Drop impl will copy the last completed buffer into the slice.
+                            drop(buffer);
+                            panic!(
+                                "The key function is not reliable: when called repeatedly, \
+                                it returned different keys for the same element."
+                            )
+                        }
+                    }
+
+                    unsafe {
+                        // SAFETY: we just ensured that every index was written to.
+                        buffer.swap();
+                    }
+                }
+            }
+
+            // The Drop impl will copy the last completed buffer into the slice.
+            drop(buffer);
+        }
+    };
+}
+
+macro_rules! radix_key_impl {
+    ($($key_type:ty)*) => ($(
+        impl RadixKey for $key_type {
+
+            #[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+            sort_impl!(radix_sort_u32, $key_type, u32);
+
+            sort_impl!(radix_sort_usize, $key_type, usize);
+        }
+    )*)
+}
+
+radix_key_impl! { u8 u16 u32 u64 u128 }