Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/half/.cargo-checksum.json
+++ b/vendor/half/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"CHANGELOG.md":"fcef01224dc41fb792a3d33e817e9de950ed94e74b7645512fd30545b5f30930","Cargo.lock":"3c0026d74bc22500e3fd5289c1c2b9dcd07d66ead302361cd6b379fffd91e3bc","Cargo.toml":"a2d670b4c0afe5ac5115de4da5a83c6c3e5feeeafe0b5b169a5e5c2b03ef2f11","LICENSE-APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE-MIT":"508a77d2e7b51d98adeed32648ad124b7b30241a8e70b2e72c99f92d8e5874d1","Makefile.toml":"9a7c73f926e9a03e24331852fe31c5c0b034215ea18c23de91b5d98e097b9ed9","README.md":"f48dacc314fbd367551ec9e00edab552449117668bc34e83b1941cb3babbd08b","benches/convert.rs":"95c905467f132ea37e79b704a8fc3a0843f5f9dc6d61750c4abc57408321b8f6","src/bfloat.rs":"c9d8984d4be5a3536d8dd7bb0a587856094492c8b2c49351c324801319f40c93","src/bfloat/convert.rs":"daab845ea86e45449550f7039f6b5a550da763ca65839b8b3ced1fcc9fa439cc","src/binary16.rs":"5119ccd45f3697b58813c5d55cc8e969a6dcedc37393382d0406e23e6aaa837b","src/binary16/arch.rs":"f34369c39545e53a8ee9ef3a29f6c837272e5cbf70d86cb815234c42f5bf721c","src/binary16/arch/aarch64.rs":"a912917e13c01f175576edcdad720d19ce6e53f32e37e5b39027b1188fc99fb0","src/binary16/arch/x86.rs":"c2f4ae517f3216c0716dca52837f8aff89186856e6e7eac894e8b644b2124a75","src/leading_zeros.rs":"37fe7595856f9d0b1ed0a806314ab7a8860bdebf4223fb3747768a077935bea0","src/lib.rs":"62fd970b472f946253ad3a79b0865fa77a098e0e2059240e17c2f3128cfeaf57","src/num_traits.rs":"79e6c08a5aa5cc47543e0ab65797adfb50bbc8092dd1f7f292d8640f3806b2b2","src/rand_distr.rs":"0a738b423beae579017291180b1e89ef9e2edc2a484392838b388b3a200bab1e","src/slice.rs":"b42be83032cd4beaa43c2ca6e66bd86db6908a1add00feabf1b9c8c8bd48b1fc","src/vec.rs":"1858a6485b26027ad7160a834977c424b05983ab42de2e618aa3f9d11221b04d"},"package":"459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"}
--- a/vendor/half/CHANGELOG.md
+++ b/vendor/half/CHANGELOG.md
@@ -0,0 +1,419 @@
+# Changelog
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
+and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [2.6.0] - 2024-04-08 <a name="2.6.0"></a>
+### Changed
+- Fixed some incorrect minimum supported versions of dependencies that weren't caught due to
+  improper `Cargo.lock`:
+  * `num-traits` 0.2.14 -> 0.2.16
+  * `zerocopy` 0.8.0 -> 0.8.23
+  * `arbitrary` 1.3.2 -> 1.4.1
+
+### Added
+- `f16` and `bf16` now implement `Immutable` and `KnownLayout` for `zerocopy` crate. By [@usamoi].
+
+## [2.5.0] - 2024-03-13 <a name="2.5.0"></a>
+### Changed
+- Updated optional dependencies to latest major versions: 
+  * `zercopy` 0.6 -> 0.8
+  * `rand` 0.8 -> 0.9
+  * `rand_distr` 0.4 -> 0.5
+  * `rkyv` 0.7 -> 0.8
+  * (dev) `criterion` 0.4 -> 0.5
+- Minimum supported Rust version has been changed to 1.81 due to above dependency updates.
+- Minor restructuring of included license file locations to be more consistent with crates ecosystem.
+
+### Added
+- Added support for `arbitrary` crate. Fixes [#110]. By [@FL33TW00D].
+- New `num-traits` implementations: `FromBytes` and `ToBytes` for `f16` and `bf16`. By [@kpreid].
+
+### Fixed
+- Suppressed unexpected_cfg lint warnings on newer versions of stable Rust.
+- Resolved ambiguous rustdoc warnings due to new unstable `f16` primitive in compiler.
+
+## [2.4.1] - 2024-04-06 <a name="2.4.1"></a>
+### Fixed
+- Missing macro import causing build failure on `no_std` + `alloc` feature set. Fixes [#107].
+- Clippy warning on nightly rust.
+
+## [2.4.0] - 2024-02-25 <a name="2.4.0"></a>
+### Added
+- Optional `rkyv` support. Fixes [#100], by [@comath].
+- New `num-traits` implementations: `AsPrimitive<f16>` for `bf16` and `AsPrimitive<bf16>` for
+  `f16`, allowing lossy conversions between the two types. By [@charles-r-earp].
+- `Cargo.lock` added to vcs as is now recommended for library crates.
+### Fixed
+- Remove some unit NaN conversion sign tests due to non-deterministic hardware. Fixes [#103].
+- Redundant import warnings on nightly Rust.
+
+## [2.3.1] - 2023-06-24 <a name="2.3.1"></a>
+### Fixed
+- Compile error on x86 (not x86_64) targets. Fixes [#93].
+
+## [2.3.0] - 2023-06-24 <a name="2.3.0"></a>
+### Added
+- Support for Kani Rust Verifier. By [@cameron1024].
+- Support for `rand_distr::Distribution` implementations behind `rand_distr` optional cargo
+  feature. By [@coreylowman].
+- Floating point formatting options in `Display` and `Debug` implementations. By [@eiz].
+
+### Changed
+- **Breaking Change** Minimum supported Rust version is now 1.70.
+- **Breaking Change** Minimum supported Rust version policy reverted to original policy of allowing
+  minimum supported Rust version updates for minor releases instead of only major to avoid
+  segmentation and allow optimizing hardware implementations without unnecessary major releases.
+- Hardware intrinsics/assembly is finally available on stable Rust, including using hardware
+  feature detection (`std` only), including:
+    - AArch64 now uses FP16 hardware instructions for conversions and math operations when
+    available.
+    - x86/x86-64 now uses F16C hardware instructions for conversions (but no math operations) when
+    available. Fixes [#54].
+
+### Deprecated
+- `use-intrinsics` cargo feature no longer used. Hardware support will now always be used whenever
+  possible. A future version may output deprecation warnings if this feature is enabled.
+
+### Fixed
+- Improve code generation of `leading_zeros` functions by inlining. By [@encounter].
+- `Sum` implementation of `bf16` incorrectly performed product instead of sum. By [@wx-csy].
+- Compile failed when `serde` cargo feature enabled but `std` not enabled.
+- Incorrect black boxing of benchmark tests.
+- Rustdoc cfg display on docs.rs not getting enabled.
+
+## [2.2.1] - 2023-01-08 <a name="2.2.1"></a>
+### Changed
+- Reduced unnecessary bounds checks for SIMD operations on slices. By [@Shnatsel].
+- Further slice conversion optimizations for slices. Resolves [#66].
+
+## [2.2.0] - 2022-12-30 <a name="2.2.0"></a>
+### Added
+- Add `serialize_as_f32` and `serialize_as_string` functions when `serde` cargo feature is enabled.
+  They allowing customizing the serialization by using 
+  `#[serde(serialize_with="f16::serialize_as_f32")]` attribute in serde derive macros. Closes [#60].
+- Deserialize now supports deserializing from `f32`, `f64`, and string values in addition to its
+  previous default deserialization. Closes [#60].
+
+### Changed
+- Add `#[inline]` on fallback functions, which improved conversion execution on non-nightly rust 
+  by up to 50%. By [@Shnatsel].
+
+## [2.1.0] - 2022-07-18 <a name="2.1.0"></a>
+### Added
+- Add support for target_arch `spirv`. Some traits and functions are unavailble on this
+  architecture. By [@charles-r-earp].
+- Add `total_cmp` method to both float types. Closes [#55], by [@joseluis].
+
+## [2.0.0] - 2022-06-21 <a name="2.0.0"></a>
+### Changed
+- **Breaking Change** Minimum supported Rust version is now 1.58.
+- **Breaking Change** `std` is now enabled as a default cargo feature. Disable default features to
+  continue using `no_std` support.
+- Migrated to Rust Edition 2021.
+- Added `#[must_use]` attributes to functions, as appropriate.
+
+### Fixed
+- Fix a soundness bug with `slice::as_ptr` not correctly using mutable reference. By [@Nilstrieb].
+
+### Added
+- Added `const` conversion methods to both `f16` and `bf16`. These methods never use hardware
+  intrinsics, unlike the current conversion methods, which is why they are separated into new
+  methods. The following `const` methods were added:
+  - `from_f32_const`
+  - `from_f64_const`
+  - `to_f32_const`
+  - `to_f64_const`
+- Added `Neg` trait support for borrowed values `&f16` and `&bf16`. By [@pthariensflame].
+- Added `AsPrimitive` implementations from and to self, `usize`, and `isize`. By [@kali].
+
+### Removed
+- **Breaking Change** The deprecated `serialize` cargo feature has been removed. Use `serde` cargo
+  feature instead.
+- **Breaking Change** The deprecated `consts` module has been removed. Use associated constants on
+  `f16` instead.
+- **Breaking Change** The following deprecated functions have been removed:
+  - `f16::as_bits`
+  - `slice::from_bits_mut`
+  - `slice::to_bits_mut`
+  - `slice::from_bits`
+  - `slice::to_bits`
+  - `vec::from_bits`
+  - `vec::to_bits`
+
+## [1.8.2] - 2021-10-22 <a name="1.8.2"></a>
+### Fixed
+- Remove cargo resolver=2 from manifest to resolve errors in older versions of Rust that still
+  worked with 1.8.0. Going forward, MSRV increases will be major version increases. Fixes [#48].
+
+## [1.8.1] - 2021-10-21 - **Yanked** <a name="1.8.1"></a>
+### ***Yanked***
+*Not recommended due to introducing compilation error in Rust versions that worked with 1.8.0.*
+### Changed
+- Now uses cargo resolver version 2 to prevent dev-dependencies from enabling `std` feature on
+  optional dependencies.
+
+### Fixed
+- Fixed compile failure when `std` feature is not enabled and `num-traits` is enabled under new
+  resolver. Now properly uses `libm` num-traits feature.
+
+## [1.8.0] - 2021-10-13 <a name="1.8.0"></a>
+### Changed
+- Now always implements `Add`, `Div`, `Mul`, `Neg`, `Rem`, and `Sub` traits. 
+  Previously, these were only implemented under the `num-traits` feature. Keep in mind they still
+  convert to `f32` and back in the implementation.
+- Minimum supported Rust version is now 1.51.
+- Made crate package [REUSE compliant](https://reuse.software/).
+- Docs now use intra-doc links instead of manual (and hard to maintain) links.
+- The following methods on both `f16` and `bf16` are now `const`:
+  - `to_le_bytes`
+  - `to_be_bytes`
+  - `to_ne_bytes`
+  - `from_le_bytes`
+  - `from_be_bytes`
+  - `from_ne_bytes`
+  - `is_normal`
+  - `classify`
+  - `signum`
+
+### Added
+- Added optional implementations of `zerocopy` traits `AsBytes` and `FromBytes`
+  under `zerocopy` cargo feature. By [@samcrow].
+- Implemented the `core::iter::Product` and `core::iter::Sum` traits, with the same caveat as above
+  about converting to `f32` and back under the hood.
+- Added new associated const `NEG_ONE` to both `f16` and `bf16`.
+- Added the following new methods on both `f16` and `bf16`:
+  - `copysign`
+  - `max`
+  - `min`
+  - `clamp`
+
+### Fixed
+- Fixed a number of minor lints discovered due to improved CI.
+
+## [1.7.1] - 2021-01-17 <a name="1.7.1"></a>
+### Fixed
+- Docs.rs now generates docs for `bytemuck` and `num-traits` optional features.
+
+## [1.7.0] - 2021-01-17 <a name="1.7.0"></a>
+### Added
+- Added optional implementations of `bytemuck` traits `Zeroable` and `Pod` under `bytemuck` cargo
+  feature. By [@charles-r-earp].
+- Added optional implementations of `num-traits` traits `ToPrimitive` and `FromPrimitive` under
+  `num-traits` cargo feature. By [@charles-r-earp].
+- Added implementations of `Binary`, `Octal`, `LowerHex`, and `UpperHex` string format traits to
+  format raw `f16`/`bf16` bytes to string.
+
+### Changed
+- `Debug` trait implementation now formats `f16`/`bf16` as float instead of raw bytes hex. Use newly
+  implemented formatting traits to format in hex instead of `Debug`. Fixes [#37].
+
+
+## [1.6.0] - 2020-05-09 <a name="1.6.0"></a>
+### Added
+- Added `LOG2_10` and `LOG10_2` constants to both `f16` and `bf16`, which were added to `f32` and
+  `f64` in the standard library in 1.43.0. By [@tspiteri].
+- Added `to_le/be/ne_bytes` and `from_le/be/ne_bytes` to both `f16` and `bf16`, which were added to
+  the standard library in 1.40.0. By [@bzm3r].
+
+## [1.5.0] - 2020-03-03 <a name="1.5.0"></a>
+### Added
+- Added the `alloc` feature to support the `alloc` crate in `no_std` environments. By [@zserik]. The
+  `vec` module is now available with either `alloc` or `std` feature.
+
+## [1.4.1] - 2020-02-10 <a name="1.4.1"></a>
+### Fixed
+- Added `#[repr(transparent)]` to `f16`/`bf16` to remove undefined behavior. By [@jfrimmel].
+
+## [1.4.0] - 2019-10-13 <a name="1.4.0"></a>
+### Added
+- Added a `bf16` type implementing the alternative
+  [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) 16-bit floating point
+  format. By [@tspiteri].
+- `f16::from_bits`, `f16::to_bits`, `f16::is_nan`, `f16::is_infinite`, `f16::is_finite`,
+  `f16::is_sign_positive`, and `f16::is_sign_negative` are now `const` fns.
+- `slice::HalfBitsSliceExt` and `slice::HalfBitsSliceExt` extension traits have been added for
+  performing efficient reinterpret casts and conversions of slices to and from `[f16]` and
+  `[bf16]`.  These traits will use hardware SIMD conversion instructions when available and the
+  `use-intrinsics` cargo feature is enabled.
+- `vec::HalfBitsVecExt` and `vec::HalfFloatVecExt` extension traits have been added for
+   performing efficient reinterpret casts to and from `Vec<f16>` and `Vec<bf16>`. These traits
+   are only available with the `std` cargo feature.
+- `prelude` has been added, for easy importing of most common functionality. Currently the
+  prelude imports `f16`, `bf16`, and the new slice and vec extension traits.
+- New associated constants on `f16` type to replace deprecated `consts` module.
+
+### Fixed
+- Software conversion (when not using `use-intrinsics` feature) now matches hardware rounding
+  by rounding to nearest, ties to even. Fixes [#24], by [@tspiteri].
+- NaN value conversions now behave like `f32` to `f64` conversions, retaining sign. Fixes [#23],
+  by [@tspiteri].
+
+### Changed
+- Minimum rustc version bumped to 1.32.
+- Runtime target host feature detection is now used if both `std` and `use-intrinsics` features are
+  enabled and the compile target host does not support required features.
+- When `use-intrinsics` feature is enabled, will now always compile and run without error correctly
+  regardless of compile target options.
+
+### Deprecated
+- `consts` module and all its constants have been deprecated; use the associated constants on `f16`
+  instead.
+- `slice::from_bits` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast` instead.
+- `slice::from_bits_mut` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast_mut`
+  instead.
+- `slice::to_bits` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast` instead.
+- `slice::to_bits_mut` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast_mut`
+  instead.
+- `vec::from_bits` has been deprecated; use `vec::HalfBitsVecExt::reinterpret_into` instead.
+- `vec::to_bits` has been deprecated; use `vec::HalfFloatVecExt::reinterpret_into` instead.
+
+## [1.3.1] - 2019-10-04 <a name="1.3.1"></a>
+### Fixed
+- Corrected values of constants `EPSILON`, `MAX_10_EXP`, `MAX_EXP`, `MIN_10_EXP`, and `MIN_EXP`
+  in `consts` module, as well as setting `consts::NAN` to match value of `f32::NAN` converted to
+  `f16`. By [@tspiteri].
+
+## [1.3.0] - 2018-10-02 <a name="1.3.0"></a>
+### Added
+- `slice::from_bits_mut` and `slice::to_bits_mut` for conversion between mutable `u16` and `f16`
+  slices. Fixes [#16], by [@johannesvollmer].
+
+## [1.2.0] - 2018-09-03 <a name="1.2.0"></a>
+### Added
+- `slice` and optional `vec` (only included with `std` feature) modules for conversions between
+  `u16` and `f16` buffers. Fixes [#14], by [@johannesvollmer].
+- `to_bits` added to replace `as_bits`. Fixes [#12], by [@tspiteri].
+### Fixed
+- `serde` optional dependency no longer uses its default `std` feature.
+### Deprecated
+- `as_bits` has been deprecated; use `to_bits` instead.
+- `serialize` cargo feature is deprecated; use `serde` instead.
+
+## [1.1.2] - 2018-07-12 <a name="1.1.2"></a>
+### Fixed
+- Fixed compilation error in 1.1.1 on rustc < 1.27, now compiles again on rustc >= 1.10. Fixes
+  [#11].
+
+## [1.1.1] - 2018-06-24 - **Yanked** <a name="1.1.1"></a>
+### ***Yanked***
+*Not recommended due to introducing compilation error on rustc versions prior to 1.27.*
+### Fixed
+- Fix subnormal float conversions when `use-intrinsics` is not enabled. By [@Moongoodboy-K].
+
+## [1.1.0] - 2018-03-17 <a name="1.1.0"></a>
+### Added
+- Made `to_f32` and `to_f64` public. Fixes [#7], by [@PSeitz].
+
+## [1.0.2] - 2018-01-12 <a name="1.0.2"></a>
+### Changed
+- Update behavior of `is_sign_positive` and `is_sign_negative` to match the IEEE754 conforming
+  behavior of the standard library since Rust 1.20.0. Fixes [#3], by [@tspiteri].
+- Small optimization on `is_nan` and `is_infinite` from [@tspiteri].
+### Fixed
+- Fix comparisons of +0 to -0 and comparisons involving negative numbers. Fixes [#2], by
+  [@tspiteri].
+- Fix loss of sign when converting `f16` and `f32` to `f16`, and case where `f64` NaN could be
+  converted to `f16` infinity instead of NaN. Fixes [#5], by [@tspiteri].
+
+## [1.0.1] - 2017-08-30 <a name="1.0.1"></a>
+### Added
+- More README documentation.
+- Badges and categories in crate metadata.
+### Changed
+- `serde` dependency updated to 1.0 stable.
+- Writing changelog manually.
+
+## [1.0.0] - 2017-02-03 <a name="1.0.0"></a>
+### Added
+- Update to `serde` 0.9 and stable Rust 1.15 for `serialize` feature.
+
+## [0.1.1] - 2017-01-08 <a name="0.1.1"></a>
+### Added
+- Add `serde` support under new `serialize` feature.
+### Changed
+- Use `no_std` for crate by default.
+
+## 0.1.0 - 2016-03-17 <a name="0.1.0"></a>
+### Added
+- Initial release of `f16` type.
+
+[#2]: https://github.com/starkat99/half-rs/issues/2
+[#3]: https://github.com/starkat99/half-rs/issues/3
+[#5]: https://github.com/starkat99/half-rs/issues/5
+[#7]: https://github.com/starkat99/half-rs/issues/7
+[#11]: https://github.com/starkat99/half-rs/issues/11
+[#12]: https://github.com/starkat99/half-rs/issues/12
+[#14]: https://github.com/starkat99/half-rs/issues/14
+[#16]: https://github.com/starkat99/half-rs/issues/16
+[#23]: https://github.com/starkat99/half-rs/issues/23
+[#24]: https://github.com/starkat99/half-rs/issues/24
+[#37]: https://github.com/starkat99/half-rs/issues/37
+[#48]: https://github.com/starkat99/half-rs/issues/48
+[#55]: https://github.com/starkat99/half-rs/issues/55
+[#60]: https://github.com/starkat99/half-rs/issues/60
+[#66]: https://github.com/starkat99/half-rs/issues/66
+[#54]: https://github.com/starkat99/half-rs/issues/54
+[#93]: https://github.com/starkat99/half-rs/issues/54
+[#100]: https://github.com/starkat99/half-rs/issues/100
+[#103]: https://github.com/starkat99/half-rs/issues/103
+[#107]: https://github.com/starkat99/half-rs/issues/107
+[#110]: https://github.com/starkat99/half-rs/issues/110
+
+[@tspiteri]: https://github.com/tspiteri
+[@PSeitz]: https://github.com/PSeitz
+[@Moongoodboy-K]: https://github.com/Moongoodboy-K
+[@johannesvollmer]: https://github.com/johannesvollmer
+[@jfrimmel]: https://github.com/jfrimmel
+[@zserik]: https://github.com/zserik
+[@bzm3r]: https://github.com/bzm3r
+[@charles-r-earp]: https://github.com/charles-r-earp
+[@samcrow]: https://github.com/samcrow
+[@pthariensflame]: https://github.com/pthariensflame
+[@kali]: https://github.com/kali
+[@Nilstrieb]: https://github.com/Nilstrieb
+[@joseluis]: https://github.com/joseluis
+[@Shnatsel]: https://github.com/Shnatsel
+[@cameron1024]: https://github.com/cameron1024
+[@encounter]: https://github.com/encounter
+[@coreylowman]: https://github.com/coreylowman
+[@wx-csy]: https://github.com/wx-csy
+[@eiz]: https://github.com/eiz
+[@comath]: https://github.com/comath
+[@FL33TW00D]: https://github.com/FL33TW00D
+[@kpreid]: https://github.com/kpreid
+[@usamoi]: https://github.com/usamoi
+
+
+[Unreleased]: https://github.com/starkat99/half-rs/compare/v2.6.0...HEAD
+[2.6.0]: https://github.com/starkat99/half-rs/compare/v2.5.0...v2.6.0
+[2.5.0]: https://github.com/starkat99/half-rs/compare/v2.4.1...v2.5.0
+[2.4.1]: https://github.com/starkat99/half-rs/compare/v2.4.0...v2.4.1
+[2.4.0]: https://github.com/starkat99/half-rs/compare/v2.3.1...v2.4.0
+[2.3.1]: https://github.com/starkat99/half-rs/compare/v2.3.0...v2.3.1
+[2.3.0]: https://github.com/starkat99/half-rs/compare/v2.2.1...v2.3.0
+[2.2.1]: https://github.com/starkat99/half-rs/compare/v2.2.0...v2.2.1
+[2.2.0]: https://github.com/starkat99/half-rs/compare/v2.1.0...v2.2.0
+[2.1.0]: https://github.com/starkat99/half-rs/compare/v2.0.0...v2.1.0
+[2.0.0]: https://github.com/starkat99/half-rs/compare/v1.8.2...v2.0.0
+[1.8.2]: https://github.com/starkat99/half-rs/compare/v1.8.1...v1.8.2
+[1.8.1]: https://github.com/starkat99/half-rs/compare/v1.8.0...v1.8.1
+[1.8.0]: https://github.com/starkat99/half-rs/compare/v1.7.1...v1.8.0
+[1.7.1]: https://github.com/starkat99/half-rs/compare/v1.7.0...v1.7.1
+[1.7.0]: https://github.com/starkat99/half-rs/compare/v1.6.0...v1.7.0
+[1.6.0]: https://github.com/starkat99/half-rs/compare/v1.5.0...v1.6.0
+[1.5.0]: https://github.com/starkat99/half-rs/compare/v1.4.1...v1.5.0
+[1.4.1]: https://github.com/starkat99/half-rs/compare/v1.4.0...v1.4.1
+[1.4.0]: https://github.com/starkat99/half-rs/compare/v1.3.1...v1.4.0
+[1.3.1]: https://github.com/starkat99/half-rs/compare/v1.3.0...v1.3.1
+[1.3.0]: https://github.com/starkat99/half-rs/compare/v1.2.0...v1.3.0
+[1.2.0]: https://github.com/starkat99/half-rs/compare/v1.1.2...v1.2.0
+[1.1.2]: https://github.com/starkat99/half-rs/compare/v1.1.1...v1.1.2
+[1.1.1]: https://github.com/starkat99/half-rs/compare/v1.1.0...v1.1.1
+[1.1.0]: https://github.com/starkat99/half-rs/compare/v1.0.2...v1.1.0
+[1.0.2]: https://github.com/starkat99/half-rs/compare/v1.0.1...v1.0.2
+[1.0.1]: https://github.com/starkat99/half-rs/compare/v1.0.0...v1.0.1
+[1.0.0]: https://github.com/starkat99/half-rs/compare/v0.1.1...v1.0.0
+[0.1.1]: https://github.com/starkat99/half-rs/compare/v0.1.0...v0.1.1
--- a/vendor/half/Cargo.lock
+++ b/vendor/half/Cargo.lock
--- a/vendor/half/Cargo.toml
+++ b/vendor/half/Cargo.toml
@@ -0,0 +1,133 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.81"
+name = "half"
+version = "2.6.0"
+authors = ["Kathryn Long <squeeself@gmail.com>"]
+build = false
+exclude = [
+    ".git*",
+    ".editorconfig",
+    ".circleci",
+]
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
+description = "Half-precision floating point f16 and bf16 types for Rust implementing the IEEE 754-2008 standard binary16 and bfloat16 types."
+readme = "README.md"
+keywords = [
+    "f16",
+    "bfloat16",
+    "no_std",
+]
+categories = [
+    "no-std",
+    "data-structures",
+    "encoding",
+]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/VoidStarKat/half-rs"
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = [
+    "--cfg",
+    "docsrs",
+]
+
+[features]
+alloc = []
+default = ["std"]
+rand_distr = [
+    "dep:rand",
+    "dep:rand_distr",
+]
+std = ["alloc"]
+use-intrinsics = []
+
+[lib]
+name = "half"
+path = "src/lib.rs"
+
+[[bench]]
+name = "convert"
+path = "benches/convert.rs"
+harness = false
+
+[dependencies.arbitrary]
+version = "1.4.1"
+features = ["derive"]
+optional = true
+
+[dependencies.bytemuck]
+version = "1.4.1"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.cfg-if]
+version = "1.0.0"
+
+[dependencies.num-traits]
+version = "0.2.16"
+features = ["libm"]
+optional = true
+default-features = false
+
+[dependencies.rand]
+version = "0.9.0"
+features = ["thread_rng"]
+optional = true
+default-features = false
+
+[dependencies.rand_distr]
+version = "0.5.0"
+optional = true
+default-features = false
+
+[dependencies.rkyv]
+version = "0.8.0"
+optional = true
+
+[dependencies.serde]
+version = "1.0"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.zerocopy]
+version = "0.8.23"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dev-dependencies.criterion]
+version = "0.5"
+
+[dev-dependencies.crunchy]
+version = "0.2.2"
+
+[dev-dependencies.quickcheck]
+version = "1.0"
+
+[dev-dependencies.quickcheck_macros]
+version = "1.0"
+
+[dev-dependencies.rand]
+version = "0.9.0"
+
+[target.'cfg(target_arch = "spirv")'.dependencies.crunchy]
+version = "0.2.2"
--- a/vendor/half/LICENSE-APACHE
+++ b/vendor/half/LICENSE-APACHE
@@ -0,0 +1,176 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
--- a/vendor/half/LICENSE-MIT
+++ b/vendor/half/LICENSE-MIT
@@ -0,0 +1,19 @@
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/vendor/half/Makefile.toml
+++ b/vendor/half/Makefile.toml
@@ -0,0 +1,78 @@
+[config]
+min_version = "0.35.0"
+
+[env]
+CI_CARGO_TEST_FLAGS = { value = "--locked -- --nocapture", condition = { env_true = [
+    "CARGO_MAKE_CI",
+] } }
+CARGO_MAKE_CARGO_ALL_FEATURES = { source = "${CARGO_MAKE_RUST_CHANNEL}", default_value = "--all-features", mapping = { "nightly" = "--all-features" } }
+CARGO_MAKE_CLIPPY_ARGS = { value = "${CARGO_MAKE_CLIPPY_ALL_FEATURES_WARN}", condition = { env_true = [
+    "CARGO_MAKE_CI",
+] } }
+
+# Override for CI flag additions
+[tasks.test]
+args = [
+    "test",
+    "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
+    "@@split(CARGO_MAKE_CARGO_BUILD_TEST_FLAGS, )",
+    "@@split(CI_CARGO_TEST_FLAGS, )",
+]
+
+# Let clippy run on non-nightly CI
+[tasks.clippy-ci-flow]
+condition = { env_set = ["CARGO_MAKE_RUN_CLIPPY"] }
+
+# Let format check run on non-nightly CI
+[tasks.check-format-ci-flow]
+condition = { env_set = ["CARGO_MAKE_RUN_CHECK_FORMAT"] }
+
+[tasks.check-docs]
+description = "Checks docs for errors."
+category = "Documentation"
+install_crate = false
+env = { RUSTDOCFLAGS = "-D warnings" }
+command = "cargo"
+args = [
+    "doc",
+    "--workspace",
+    "--no-deps",
+    "@@remove-empty(CARGO_MAKE_CARGO_VERBOSE_FLAGS)",
+    "${CARGO_MAKE_CARGO_ALL_FEATURES}",
+]
+
+# Build & Test with no features enabled
+[tasks.post-ci-flow]
+run_task = [
+    { name = [
+        "check-docs",
+        "build-no-std",
+        "test-no-std",
+        "build-no-std-alloc",
+        "test-no-std-alloc",
+    ] },
+]
+
+[tasks.build-no-std]
+description = "Build without any features"
+category = "Build"
+env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
+run_task = "build"
+
+[tasks.test-no-std]
+description = "Run tests without any features"
+category = "Test"
+env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features" }
+run_task = "test"
+
+[tasks.build-no-std-alloc]
+description = "Build without any features except alloc"
+category = "Build"
+env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" }
+run_task = "build"
+
+[tasks.test-no-std-alloc]
+description = "Run tests without any features except alloc"
+category = "Test"
+env = { CARGO_MAKE_CARGO_BUILD_TEST_FLAGS = "--no-default-features --features alloc" }
+run_task = "test"
--- a/vendor/half/README.md
+++ b/vendor/half/README.md
@@ -0,0 +1,90 @@
+# `f16` and `bf16` floating point types for Rust
+[![Crates.io](https://img.shields.io/crates/v/half.svg)](https://crates.io/crates/half/) [![Documentation](https://docs.rs/half/badge.svg)](https://docs.rs/half/) ![Crates.io](https://img.shields.io/crates/l/half) [![Build status](https://github.com/starkat99/half-rs/actions/workflows/rust.yml/badge.svg?branch=main&event=push)](https://github.com/starkat99/half-rs/actions/workflows/rust.yml) [![CircleCI](https://dl.circleci.com/status-badge/img/gh/starkat99/half-rs/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/starkat99/half-rs/tree/main)
+
+This crate implements a half-precision floating point `f16` type for Rust implementing the IEEE
+754-2008 standard [`binary16`](https://en.wikipedia.org/wiki/Half-precision_floating-point_format)
+a.k.a "half" format, as well as a `bf16` type implementing the
+[`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) format.
+
+## Usage
+
+The `f16` and `bf16` types attempt to match existing Rust floating point type functionality where possible, and provides both conversion operations (such as to/from `f32` and `f64`) and basic
+arithmetic operations. Hardware support for these operations will be used whenever hardware support
+is available—either through instrinsics or targeted assembly—although a nightly Rust toolchain may
+be required for some hardware.
+
+This crate provides [`no_std`](https://rust-embedded.github.io/book/intro/no-std.html) support so can easily be used in embedded code where a smaller float format is most useful.
+
+*Requires Rust 1.81 or greater.* If you need support for older versions of Rust, use previous 
+versions of this crate.
+
+See the [crate documentation](https://docs.rs/half/) for more details.
+
+### Optional Features
+
+- **`alloc`** — Enable use of the [`alloc`](https://doc.rust-lang.org/alloc/) crate when not using
+  the `std` library.
+
+  This enables the `vec` module, which contains zero-copy conversions for the `Vec` type. This
+  allows fast conversion between raw `Vec<u16>` bits and `Vec<f16>` or `Vec<bf16>` arrays, and vice
+  versa.
+
+- **`std`** — Enable features that depend on the Rust `std` library, including everything in the
+  `alloc` feature.
+
+  Enabling the `std` feature enables runtime CPU feature detection of hardware support.
+  Without this feature detection, harware is only used when compiler target supports them.
+
+- **`serde`** - Implement `Serialize` and `Deserialize` traits for `f16` and `bf16`. This adds a
+  dependency on the [`serde`](https://crates.io/crates/serde) crate.
+
+- **`num-traits`** — Enable `ToPrimitive`, `FromPrimitive`, `ToBytes`, `FromBytes`, `Num`, `Float`,
+  `FloatCore` and `Bounded` trait implementations from the
+  [`num-traits`](https://crates.io/crates/num-traits) crate.
+
+- **`bytemuck`** — Enable `Zeroable` and `Pod` trait implementations from the
+  [`bytemuck`](https://crates.io/crates/bytemuck) crate.
+
+- **`zerocopy`** — Enable `IntoBytes` and `FromBytes` trait implementations from the 
+  [`zerocopy`](https://crates.io/crates/zerocopy) crate.
+
+- **`rand_distr`** — Enable sampling from distributions like `StandardUniform` and `StandardNormal` 
+  from the [`rand_distr`](https://crates.io/crates/rand_distr) crate.
+
+- **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`](https://crates.io/crates/rkyv) crate.
+
+- **`aribtrary`** -- Enable fuzzing support with [`arbitrary`](https://crates.io/crates/arbitrary) 
+  crate by implementing `Arbitrary` trait.
+
+### Hardware support
+
+The following list details hardware support for floating point types in this crate. When using `std`
+library, runtime CPU target detection will be used. To get the most performance benefits, compile
+for specific CPU features which avoids the runtime overhead and works in a `no_std` environment.
+
+| Architecture | CPU Target Feature | Notes |
+| ------------ | ------------------ | ----- |
+| `x86`/`x86_64` | `f16c` | This supports conversion to/from `f16` only (including vector SIMD) and does not support any `bf16` or arithmetic operations. |
+| `aarch64` | `fp16` | This supports all operations on `f16` only. |
+
+### More Documentation
+
+- [Crate API Reference](https://docs.rs/half/)
+- [Latest Changes](CHANGELOG.md)
+
+## License
+
+All files in this library are dual-licensed and distributed under the terms of either of:
+
+* [MIT License](LICENSE-MIT)
+  ([http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT))
+* [Apache License, Version 2.0](LICENSE-APACHE)
+  ([http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0))
+
+at your option.
+
+### Contributing
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the
+work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any
+additional terms or conditions.
--- a/vendor/half/benches/convert.rs
+++ b/vendor/half/benches/convert.rs
@@ -0,0 +1,343 @@
+use criterion::{black_box, criterion_group, criterion_main, Bencher, BenchmarkId, Criterion};
+use half::prelude::*;
+use std::{f32, f64, iter};
+
+const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024;
+
+fn bench_f32_to_f16(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert f16 From f32");
+    for val in &[
+        0.,
+        -0.,
+        1.,
+        f32::MIN,
+        f32::MAX,
+        f32::MIN_POSITIVE,
+        f32::NEG_INFINITY,
+        f32::INFINITY,
+        f32::NAN,
+        f32::consts::E,
+        f32::consts::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("f16::from_f32", val), val, |b, i| {
+            b.iter(|| f16::from_f32(*i))
+        });
+    }
+}
+
+fn bench_f64_to_f16(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert f16 From f64");
+    for val in &[
+        0.,
+        -0.,
+        1.,
+        f64::MIN,
+        f64::MAX,
+        f64::MIN_POSITIVE,
+        f64::NEG_INFINITY,
+        f64::INFINITY,
+        f64::NAN,
+        f64::consts::E,
+        f64::consts::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("f16::from_f64", val), val, |b, i| {
+            b.iter(|| f16::from_f64(*i))
+        });
+    }
+}
+
+fn bench_f16_to_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert f16 to f32");
+    for val in &[
+        f16::ZERO,
+        f16::NEG_ZERO,
+        f16::ONE,
+        f16::MIN,
+        f16::MAX,
+        f16::MIN_POSITIVE,
+        f16::NEG_INFINITY,
+        f16::INFINITY,
+        f16::NAN,
+        f16::E,
+        f16::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("f16::to_f32", val), val, |b, i| {
+            b.iter(|| i.to_f32())
+        });
+    }
+}
+
+fn bench_f16_to_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert f16 to f64");
+    for val in &[
+        f16::ZERO,
+        f16::NEG_ZERO,
+        f16::ONE,
+        f16::MIN,
+        f16::MAX,
+        f16::MIN_POSITIVE,
+        f16::NEG_INFINITY,
+        f16::INFINITY,
+        f16::NAN,
+        f16::E,
+        f16::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("f16::to_f64", val), val, |b, i| {
+            b.iter(|| i.to_f64())
+        });
+    }
+}
+
+criterion_group!(
+    f16_sisd,
+    bench_f32_to_f16,
+    bench_f64_to_f16,
+    bench_f16_to_f32,
+    bench_f16_to_f64
+);
+
+fn bench_slice_f32_to_f16(c: &mut Criterion) {
+    let mut constant_buffer = [f16::ZERO; 11];
+    let constants = [
+        0.,
+        -0.,
+        1.,
+        f32::MIN,
+        f32::MAX,
+        f32::MIN_POSITIVE,
+        f32::NEG_INFINITY,
+        f32::INFINITY,
+        f32::NAN,
+        f32::consts::E,
+        f32::consts::PI,
+    ];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_from_f32_slice/constants",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&mut constant_buffer).convert_from_f32_slice(black_box(&constants)))
+        },
+    );
+
+    let large: Vec<_> = iter::repeat(0)
+        .enumerate()
+        .map(|(i, _)| i as f32)
+        .take(SIMD_LARGE_BENCH_SLICE_LEN)
+        .collect();
+    let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_from_f32_slice/large",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&mut large_buffer).convert_from_f32_slice(black_box(&large)))
+        },
+    );
+}
+
+fn bench_slice_f64_to_f16(c: &mut Criterion) {
+    let mut constant_buffer = [f16::ZERO; 11];
+    let constants = [
+        0.,
+        -0.,
+        1.,
+        f64::MIN,
+        f64::MAX,
+        f64::MIN_POSITIVE,
+        f64::NEG_INFINITY,
+        f64::INFINITY,
+        f64::NAN,
+        f64::consts::E,
+        f64::consts::PI,
+    ];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_from_f64_slice/constants",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&mut constant_buffer).convert_from_f64_slice(black_box(&constants)))
+        },
+    );
+
+    let large: Vec<_> = iter::repeat(0)
+        .enumerate()
+        .map(|(i, _)| i as f64)
+        .take(SIMD_LARGE_BENCH_SLICE_LEN)
+        .collect();
+    let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_from_f64_slice/large",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&mut large_buffer).convert_from_f64_slice(black_box(&large)))
+        },
+    );
+}
+
+fn bench_slice_f16_to_f32(c: &mut Criterion) {
+    let mut constant_buffer = [0f32; 11];
+    let constants = [
+        f16::ZERO,
+        f16::NEG_ZERO,
+        f16::ONE,
+        f16::MIN,
+        f16::MAX,
+        f16::MIN_POSITIVE,
+        f16::NEG_INFINITY,
+        f16::INFINITY,
+        f16::NAN,
+        f16::E,
+        f16::PI,
+    ];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_to_f32_slice/constants",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&constants).convert_to_f32_slice(black_box(&mut constant_buffer)))
+        },
+    );
+
+    let large: Vec<_> = iter::repeat(0)
+        .enumerate()
+        .map(|(i, _)| f16::from_f32(i as f32))
+        .take(SIMD_LARGE_BENCH_SLICE_LEN)
+        .collect();
+    let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_to_f32_slice/large",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&large).convert_to_f32_slice(black_box(&mut large_buffer)))
+        },
+    );
+}
+
+fn bench_slice_f16_to_f64(c: &mut Criterion) {
+    let mut constant_buffer = [0f64; 11];
+    let constants = [
+        f16::ZERO,
+        f16::NEG_ZERO,
+        f16::ONE,
+        f16::MIN,
+        f16::MAX,
+        f16::MIN_POSITIVE,
+        f16::NEG_INFINITY,
+        f16::INFINITY,
+        f16::NAN,
+        f16::E,
+        f16::PI,
+    ];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_to_f64_slice/constants",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&constants).convert_to_f64_slice(black_box(&mut constant_buffer)))
+        },
+    );
+
+    let large: Vec<_> = iter::repeat(0)
+        .enumerate()
+        .map(|(i, _)| f16::from_f64(i as f64))
+        .take(SIMD_LARGE_BENCH_SLICE_LEN)
+        .collect();
+    let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN];
+    c.bench_function(
+        "HalfFloatSliceExt::convert_to_f64_slice/large",
+        |b: &mut Bencher<'_>| {
+            b.iter(|| black_box(&large).convert_to_f64_slice(black_box(&mut large_buffer)))
+        },
+    );
+}
+
+criterion_group!(
+    f16_simd,
+    bench_slice_f32_to_f16,
+    bench_slice_f64_to_f16,
+    bench_slice_f16_to_f32,
+    bench_slice_f16_to_f64
+);
+
+fn bench_f32_to_bf16(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert bf16 From f32");
+    for val in &[
+        0.,
+        -0.,
+        1.,
+        f32::MIN,
+        f32::MAX,
+        f32::MIN_POSITIVE,
+        f32::NEG_INFINITY,
+        f32::INFINITY,
+        f32::NAN,
+        f32::consts::E,
+        f32::consts::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("bf16::from_f32", val), val, |b, i| {
+            b.iter(|| bf16::from_f32(*i))
+        });
+    }
+}
+
+fn bench_f64_to_bf16(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert bf16 From f64");
+    for val in &[
+        0.,
+        -0.,
+        1.,
+        f64::MIN,
+        f64::MAX,
+        f64::MIN_POSITIVE,
+        f64::NEG_INFINITY,
+        f64::INFINITY,
+        f64::NAN,
+        f64::consts::E,
+        f64::consts::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("bf16::from_f64", val), val, |b, i| {
+            b.iter(|| bf16::from_f64(*i))
+        });
+    }
+}
+
+fn bench_bf16_to_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert bf16 to f32");
+    for val in &[
+        bf16::ZERO,
+        bf16::NEG_ZERO,
+        bf16::ONE,
+        bf16::MIN,
+        bf16::MAX,
+        bf16::MIN_POSITIVE,
+        bf16::NEG_INFINITY,
+        bf16::INFINITY,
+        bf16::NAN,
+        bf16::E,
+        bf16::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("bf16::to_f32", val), val, |b, i| {
+            b.iter(|| i.to_f32())
+        });
+    }
+}
+
+fn bench_bf16_to_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Convert bf16 to f64");
+    for val in &[
+        bf16::ZERO,
+        bf16::NEG_ZERO,
+        bf16::ONE,
+        bf16::MIN,
+        bf16::MAX,
+        bf16::MIN_POSITIVE,
+        bf16::NEG_INFINITY,
+        bf16::INFINITY,
+        bf16::NAN,
+        bf16::E,
+        bf16::PI,
+    ] {
+        group.bench_with_input(BenchmarkId::new("bf16::to_f64", val), val, |b, i| {
+            b.iter(|| i.to_f64())
+        });
+    }
+}
+
+criterion_group!(
+    bf16_sisd,
+    bench_f32_to_bf16,
+    bench_f64_to_bf16,
+    bench_bf16_to_f32,
+    bench_bf16_to_f64
+);
+
+criterion_main!(f16_sisd, bf16_sisd, f16_simd);
--- a/vendor/half/src/bfloat.rs
+++ b/vendor/half/src/bfloat.rs
--- a/vendor/half/src/bfloat/convert.rs
+++ b/vendor/half/src/bfloat/convert.rs
@@ -0,0 +1,152 @@
+use crate::leading_zeros::leading_zeros_u16;
+use core::mem;
+
+#[inline]
+pub(crate) const fn f32_to_bf16(value: f32) -> u16 {
+    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
+    // Convert to raw bytes
+    let x: u32 = unsafe { mem::transmute::<f32, u32>(value) };
+
+    // check for NaN
+    if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 {
+        // Keep high part of current mantissa but also set most significiant mantissa bit
+        return ((x >> 16) | 0x0040u32) as u16;
+    }
+
+    // round and shift
+    let round_bit = 0x0000_8000u32;
+    if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 {
+        (x >> 16) as u16 + 1
+    } else {
+        (x >> 16) as u16
+    }
+}
+
+#[inline]
+pub(crate) const fn f64_to_bf16(value: f64) -> u16 {
+    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
+    // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always
+    // be lost on half-precision.
+    let val: u64 = unsafe { mem::transmute::<f64, u64>(value) };
+    let x = (val >> 32) as u32;
+
+    // Extract IEEE754 components
+    let sign = x & 0x8000_0000u32;
+    let exp = x & 0x7FF0_0000u32;
+    let man = x & 0x000F_FFFFu32;
+
+    // Check for all exponent bits being set, which is Infinity or NaN
+    if exp == 0x7FF0_0000u32 {
+        // Set mantissa MSB for NaN (and also keep shifted mantissa bits).
+        // We also have to check the last 32 bits.
+        let nan_bit = if man == 0 && (val as u32 == 0) {
+            0
+        } else {
+            0x0040u32
+        };
+        return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16;
+    }
+
+    // The number is normalized, start assembling half precision version
+    let half_sign = sign >> 16;
+    // Unbias the exponent, then bias for bfloat16 precision
+    let unbiased_exp = ((exp >> 20) as i64) - 1023;
+    let half_exp = unbiased_exp + 127;
+
+    // Check for exponent overflow, return +infinity
+    if half_exp >= 0xFF {
+        return (half_sign | 0x7F80u32) as u16;
+    }
+
+    // Check for underflow
+    if half_exp <= 0 {
+        // Check mantissa for what we can do
+        if 7 - half_exp > 21 {
+            // No rounding possibility, so this is a full underflow, return signed zero
+            return half_sign as u16;
+        }
+        // Don't forget about hidden leading mantissa bit when assembling mantissa
+        let man = man | 0x0010_0000u32;
+        let mut half_man = man >> (14 - half_exp);
+        // Check for rounding
+        let round_bit = 1 << (13 - half_exp);
+        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+            half_man += 1;
+        }
+        // No exponent for subnormals
+        return (half_sign | half_man) as u16;
+    }
+
+    // Rebias the exponent
+    let half_exp = (half_exp as u32) << 7;
+    let half_man = man >> 13;
+    // Check for rounding
+    let round_bit = 0x0000_1000u32;
+    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+        // Round it
+        ((half_sign | half_exp | half_man) + 1) as u16
+    } else {
+        (half_sign | half_exp | half_man) as u16
+    }
+}
+
+#[inline]
+pub(crate) const fn bf16_to_f32(i: u16) -> f32 {
+    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
+    // If NaN, keep current mantissa but also set most significiant mantissa bit
+    if i & 0x7FFFu16 > 0x7F80u16 {
+        unsafe { mem::transmute::<u32, f32>((i as u32 | 0x0040u32) << 16) }
+    } else {
+        unsafe { mem::transmute::<u32, f32>((i as u32) << 16) }
+    }
+}
+
+#[inline]
+pub(crate) const fn bf16_to_f64(i: u16) -> f64 {
+    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
+    // Check for signed zero
+    if i & 0x7FFFu16 == 0 {
+        return unsafe { mem::transmute::<u64, f64>((i as u64) << 48) };
+    }
+
+    let half_sign = (i & 0x8000u16) as u64;
+    let half_exp = (i & 0x7F80u16) as u64;
+    let half_man = (i & 0x007Fu16) as u64;
+
+    // Check for an infinity or NaN when all exponent bits set
+    if half_exp == 0x7F80u64 {
+        // Check for signed infinity if mantissa is zero
+        if half_man == 0 {
+            return unsafe {
+                mem::transmute::<u64, f64>((half_sign << 48) | 0x7FF0_0000_0000_0000u64)
+            };
+        } else {
+            // NaN, keep current mantissa but also set most significiant mantissa bit
+            return unsafe {
+                mem::transmute::<u64, f64>(
+                    (half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45),
+                )
+            };
+        }
+    }
+
+    // Calculate double-precision components with adjusted exponent
+    let sign = half_sign << 48;
+    // Unbias exponent
+    let unbiased_exp = ((half_exp as i64) >> 7) - 127;
+
+    // Check for subnormals, which will be normalized by adjusting exponent
+    if half_exp == 0 {
+        // Calculate how much to adjust the exponent by
+        let e = leading_zeros_u16(half_man as u16) - 9;
+
+        // Rebias and adjust exponent
+        let exp = ((1023 - 127 - e) as u64) << 52;
+        let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64;
+        return unsafe { mem::transmute::<u64, f64>(sign | exp | man) };
+    }
+    // Rebias exponent for a normalized normal
+    let exp = ((unbiased_exp + 1023) as u64) << 52;
+    let man = (half_man & 0x007Fu64) << 45;
+    unsafe { mem::transmute::<u64, f64>(sign | exp | man) }
+}
--- a/vendor/half/src/binary16.rs
+++ b/vendor/half/src/binary16.rs
--- a/vendor/half/src/binary16/arch.rs
+++ b/vendor/half/src/binary16/arch.rs
@@ -0,0 +1,847 @@
+#![allow(dead_code, unused_imports)]
+use crate::leading_zeros::leading_zeros_u16;
+use core::mem;
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86;
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64;
+
+macro_rules! convert_fn {
+    (if x86_feature("f16c") { $f16c:expr }
+    else if aarch64_feature("fp16") { $aarch64:expr }
+    else { $fallback:expr }) => {
+        cfg_if::cfg_if! {
+            // Use intrinsics directly when a compile target or using no_std
+            if #[cfg(all(
+                any(target_arch = "x86", target_arch = "x86_64"),
+                target_feature = "f16c"
+            ))] {
+                $f16c
+            }
+            else if #[cfg(all(
+                target_arch = "aarch64",
+                target_feature = "fp16"
+            ))] {
+                $aarch64
+
+            }
+
+            // Use CPU feature detection if using std
+            else if #[cfg(all(
+                feature = "std",
+                any(target_arch = "x86", target_arch = "x86_64")
+            ))] {
+                use std::arch::is_x86_feature_detected;
+                if is_x86_feature_detected!("f16c") {
+                    $f16c
+                } else {
+                    $fallback
+                }
+            }
+            else if #[cfg(all(
+                feature = "std",
+                target_arch = "aarch64",
+            ))] {
+                use std::arch::is_aarch64_feature_detected;
+                if is_aarch64_feature_detected!("fp16") {
+                    $aarch64
+                } else {
+                    $fallback
+                }
+            }
+
+            // Fallback to software
+            else {
+                $fallback
+            }
+        }
+    };
+}
+
+#[inline]
+pub(crate) fn f32_to_f16(f: f32) -> u16 {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f32_to_f16_x86_f16c(f) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f32_to_f16_fp16(f) }
+        } else {
+            f32_to_f16_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f64_to_f16(f: f64) -> u16 {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f32_to_f16_x86_f16c(f as f32) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f64_to_f16_fp16(f) }
+        } else {
+            f64_to_f16_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16_to_f32(i: u16) -> f32 {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16_to_f32_x86_f16c(i) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f16_to_f32_fp16(i) }
+        } else {
+            f16_to_f32_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16_to_f64(i: u16) -> f64 {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16_to_f32_x86_f16c(i) as f64 }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f16_to_f64_fp16(i) }
+        } else {
+            f16_to_f64_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f32x4_to_f16x4_x86_f16c(f) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f32x4_to_f16x4_fp16(f) }
+        } else {
+            f32x4_to_f16x4_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16x4_to_f32x4_x86_f16c(i) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f16x4_to_f32x4_fp16(i) }
+        } else {
+            f16x4_to_f32x4_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f64x4_to_f16x4_x86_f16c(f) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f64x4_to_f16x4_fp16(f) }
+        } else {
+            f64x4_to_f16x4_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16x4_to_f64x4_x86_f16c(i) }
+        } else if aarch64_feature("fp16") {
+            unsafe { aarch64::f16x4_to_f64x4_fp16(i) }
+        } else {
+            f16x4_to_f64x4_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f32x8_to_f16x8_x86_f16c(f) }
+        } else if aarch64_feature("fp16") {
+            {
+                let mut result = [0u16; 8];
+                convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
+                    aarch64::f32x4_to_f16x4_fp16);
+                result
+            }
+        } else {
+            f32x8_to_f16x8_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16x8_to_f32x8_x86_f16c(i) }
+        } else if aarch64_feature("fp16") {
+            {
+                let mut result = [0f32; 8];
+                convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
+                    aarch64::f16x4_to_f32x4_fp16);
+                result
+            }
+        } else {
+            f16x8_to_f32x8_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f64x8_to_f16x8_x86_f16c(f) }
+        } else if aarch64_feature("fp16") {
+            {
+                let mut result = [0u16; 8];
+                convert_chunked_slice_4(f.as_slice(), result.as_mut_slice(),
+                    aarch64::f64x4_to_f16x4_fp16);
+                result
+            }
+        } else {
+            f64x8_to_f16x8_fallback(f)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] {
+    convert_fn! {
+        if x86_feature("f16c") {
+            unsafe { x86::f16x8_to_f64x8_x86_f16c(i) }
+        } else if aarch64_feature("fp16") {
+            {
+                let mut result = [0f64; 8];
+                convert_chunked_slice_4(i.as_slice(), result.as_mut_slice(),
+                    aarch64::f16x4_to_f64x4_fp16);
+                result
+            }
+        } else {
+            f16x8_to_f64x8_fallback(i)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) {
+    convert_fn! {
+        if x86_feature("f16c") {
+            convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c,
+                x86::f32x4_to_f16x4_x86_f16c)
+        } else if aarch64_feature("fp16") {
+            convert_chunked_slice_4(src, dst, aarch64::f32x4_to_f16x4_fp16)
+        } else {
+            slice_fallback(src, dst, f32_to_f16_fallback)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) {
+    convert_fn! {
+        if x86_feature("f16c") {
+            convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c,
+                x86::f16x4_to_f32x4_x86_f16c)
+        } else if aarch64_feature("fp16") {
+            convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f32x4_fp16)
+        } else {
+            slice_fallback(src, dst, f16_to_f32_fallback)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) {
+    convert_fn! {
+        if x86_feature("f16c") {
+            convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c,
+                x86::f64x4_to_f16x4_x86_f16c)
+        } else if aarch64_feature("fp16") {
+            convert_chunked_slice_4(src, dst, aarch64::f64x4_to_f16x4_fp16)
+        } else {
+            slice_fallback(src, dst, f64_to_f16_fallback)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) {
+    convert_fn! {
+        if x86_feature("f16c") {
+            convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c,
+                x86::f16x4_to_f64x4_x86_f16c)
+        } else if aarch64_feature("fp16") {
+            convert_chunked_slice_4(src, dst, aarch64::f16x4_to_f64x4_fp16)
+        } else {
+            slice_fallback(src, dst, f16_to_f64_fallback)
+        }
+    }
+}
+
+macro_rules! math_fn {
+    (if aarch64_feature("fp16") { $aarch64:expr }
+    else { $fallback:expr }) => {
+        cfg_if::cfg_if! {
+            // Use intrinsics directly when a compile target or using no_std
+            if #[cfg(all(
+                target_arch = "aarch64",
+                target_feature = "fp16"
+            ))] {
+                $aarch64
+            }
+
+            // Use CPU feature detection if using std
+            else if #[cfg(all(
+                feature = "std",
+                target_arch = "aarch64",
+                not(target_feature = "fp16")
+            ))] {
+                use std::arch::is_aarch64_feature_detected;
+                if is_aarch64_feature_detected!("fp16") {
+                    $aarch64
+                } else {
+                    $fallback
+                }
+            }
+
+            // Fallback to software
+            else {
+                $fallback
+            }
+        }
+    };
+}
+
+#[inline]
+pub(crate) fn add_f16(a: u16, b: u16) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            unsafe { aarch64::add_f16_fp16(a, b) }
+        } else {
+            add_f16_fallback(a, b)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn subtract_f16(a: u16, b: u16) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            unsafe { aarch64::subtract_f16_fp16(a, b) }
+        } else {
+            subtract_f16_fallback(a, b)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn multiply_f16(a: u16, b: u16) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            unsafe { aarch64::multiply_f16_fp16(a, b) }
+        } else {
+            multiply_f16_fallback(a, b)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn divide_f16(a: u16, b: u16) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            unsafe { aarch64::divide_f16_fp16(a, b) }
+        } else {
+            divide_f16_fallback(a, b)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn remainder_f16(a: u16, b: u16) -> u16 {
+    remainder_f16_fallback(a, b)
+}
+
+#[inline]
+pub(crate) fn product_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            iter.fold(0, |acc, x| unsafe { aarch64::multiply_f16_fp16(acc, x) })
+        } else {
+            product_f16_fallback(iter)
+        }
+    }
+}
+
+#[inline]
+pub(crate) fn sum_f16<I: Iterator<Item = u16>>(iter: I) -> u16 {
+    math_fn! {
+        if aarch64_feature("fp16") {
+            iter.fold(0, |acc, x| unsafe { aarch64::add_f16_fp16(acc, x) })
+        } else {
+            sum_f16_fallback(iter)
+        }
+    }
+}
+
+/// Chunks sliced into x8 or x4 arrays
+#[inline]
+fn convert_chunked_slice_8<S: Copy + Default, D: Copy>(
+    src: &[S],
+    dst: &mut [D],
+    fn8: unsafe fn(&[S; 8]) -> [D; 8],
+    fn4: unsafe fn(&[S; 4]) -> [D; 4],
+) {
+    assert_eq!(src.len(), dst.len());
+
+    // TODO: Can be further optimized with array_chunks when it becomes stabilized
+
+    let src_chunks = src.chunks_exact(8);
+    let mut dst_chunks = dst.chunks_exact_mut(8);
+    let src_remainder = src_chunks.remainder();
+    for (s, d) in src_chunks.zip(&mut dst_chunks) {
+        let chunk: &[S; 8] = s.try_into().unwrap();
+        d.copy_from_slice(unsafe { &fn8(chunk) });
+    }
+
+    // Process remainder
+    if src_remainder.len() > 4 {
+        let mut buf: [S; 8] = Default::default();
+        buf[..src_remainder.len()].copy_from_slice(src_remainder);
+        let vec = unsafe { fn8(&buf) };
+        let dst_remainder = dst_chunks.into_remainder();
+        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
+    } else if !src_remainder.is_empty() {
+        let mut buf: [S; 4] = Default::default();
+        buf[..src_remainder.len()].copy_from_slice(src_remainder);
+        let vec = unsafe { fn4(&buf) };
+        let dst_remainder = dst_chunks.into_remainder();
+        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
+    }
+}
+
+/// Chunks sliced into x4 arrays
+#[inline]
+fn convert_chunked_slice_4<S: Copy + Default, D: Copy>(
+    src: &[S],
+    dst: &mut [D],
+    f: unsafe fn(&[S; 4]) -> [D; 4],
+) {
+    assert_eq!(src.len(), dst.len());
+
+    // TODO: Can be further optimized with array_chunks when it becomes stabilized
+
+    let src_chunks = src.chunks_exact(4);
+    let mut dst_chunks = dst.chunks_exact_mut(4);
+    let src_remainder = src_chunks.remainder();
+    for (s, d) in src_chunks.zip(&mut dst_chunks) {
+        let chunk: &[S; 4] = s.try_into().unwrap();
+        d.copy_from_slice(unsafe { &f(chunk) });
+    }
+
+    // Process remainder
+    if !src_remainder.is_empty() {
+        let mut buf: [S; 4] = Default::default();
+        buf[..src_remainder.len()].copy_from_slice(src_remainder);
+        let vec = unsafe { f(&buf) };
+        let dst_remainder = dst_chunks.into_remainder();
+        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]);
+    }
+}
+
+/////////////// Fallbacks ////////////////
+
+// In the below functions, round to nearest, with ties to even.
+// Let us call the most significant bit that will be shifted out the round_bit.
+//
+// Round up if either
+//  a) Removed part > tie.
+//     (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0
+//  b) Removed part == tie, and retained part is odd.
+//     (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0
+// (If removed part == tie and retained part is even, do not round up.)
+// These two conditions can be combined into one:
+//     (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0
+// which can be simplified into
+//     (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0
+
+#[inline]
+pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 {
+    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
+    // Convert to raw bytes
+    let x: u32 = unsafe { mem::transmute::<f32, u32>(value) };
+
+    // Extract IEEE754 components
+    let sign = x & 0x8000_0000u32;
+    let exp = x & 0x7F80_0000u32;
+    let man = x & 0x007F_FFFFu32;
+
+    // Check for all exponent bits being set, which is Infinity or NaN
+    if exp == 0x7F80_0000u32 {
+        // Set mantissa MSB for NaN (and also keep shifted mantissa bits)
+        let nan_bit = if man == 0 { 0 } else { 0x0200u32 };
+        return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16;
+    }
+
+    // The number is normalized, start assembling half precision version
+    let half_sign = sign >> 16;
+    // Unbias the exponent, then bias for half precision
+    let unbiased_exp = ((exp >> 23) as i32) - 127;
+    let half_exp = unbiased_exp + 15;
+
+    // Check for exponent overflow, return +infinity
+    if half_exp >= 0x1F {
+        return (half_sign | 0x7C00u32) as u16;
+    }
+
+    // Check for underflow
+    if half_exp <= 0 {
+        // Check mantissa for what we can do
+        if 14 - half_exp > 24 {
+            // No rounding possibility, so this is a full underflow, return signed zero
+            return half_sign as u16;
+        }
+        // Don't forget about hidden leading mantissa bit when assembling mantissa
+        let man = man | 0x0080_0000u32;
+        let mut half_man = man >> (14 - half_exp);
+        // Check for rounding (see comment above functions)
+        let round_bit = 1 << (13 - half_exp);
+        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+            half_man += 1;
+        }
+        // No exponent for subnormals
+        return (half_sign | half_man) as u16;
+    }
+
+    // Rebias the exponent
+    let half_exp = (half_exp as u32) << 10;
+    let half_man = man >> 13;
+    // Check for rounding (see comment above functions)
+    let round_bit = 0x0000_1000u32;
+    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+        // Round it
+        ((half_sign | half_exp | half_man) + 1) as u16
+    } else {
+        (half_sign | half_exp | half_man) as u16
+    }
+}
+
+#[inline]
+pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 {
+    // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always
+    // be lost on half-precision.
+    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized
+    let val: u64 = unsafe { mem::transmute::<f64, u64>(value) };
+    let x = (val >> 32) as u32;
+
+    // Extract IEEE754 components
+    let sign = x & 0x8000_0000u32;
+    let exp = x & 0x7FF0_0000u32;
+    let man = x & 0x000F_FFFFu32;
+
+    // Check for all exponent bits being set, which is Infinity or NaN
+    if exp == 0x7FF0_0000u32 {
+        // Set mantissa MSB for NaN (and also keep shifted mantissa bits).
+        // We also have to check the last 32 bits.
+        let nan_bit = if man == 0 && (val as u32 == 0) {
+            0
+        } else {
+            0x0200u32
+        };
+        return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16;
+    }
+
+    // The number is normalized, start assembling half precision version
+    let half_sign = sign >> 16;
+    // Unbias the exponent, then bias for half precision
+    let unbiased_exp = ((exp >> 20) as i64) - 1023;
+    let half_exp = unbiased_exp + 15;
+
+    // Check for exponent overflow, return +infinity
+    if half_exp >= 0x1F {
+        return (half_sign | 0x7C00u32) as u16;
+    }
+
+    // Check for underflow
+    if half_exp <= 0 {
+        // Check mantissa for what we can do
+        if 10 - half_exp > 21 {
+            // No rounding possibility, so this is a full underflow, return signed zero
+            return half_sign as u16;
+        }
+        // Don't forget about hidden leading mantissa bit when assembling mantissa
+        let man = man | 0x0010_0000u32;
+        let mut half_man = man >> (11 - half_exp);
+        // Check for rounding (see comment above functions)
+        let round_bit = 1 << (10 - half_exp);
+        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+            half_man += 1;
+        }
+        // No exponent for subnormals
+        return (half_sign | half_man) as u16;
+    }
+
+    // Rebias the exponent
+    let half_exp = (half_exp as u32) << 10;
+    let half_man = man >> 10;
+    // Check for rounding (see comment above functions)
+    let round_bit = 0x0000_0200u32;
+    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 {
+        // Round it
+        ((half_sign | half_exp | half_man) + 1) as u16
+    } else {
+        (half_sign | half_exp | half_man) as u16
+    }
+}
+
+#[inline]
+pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 {
+    // Check for signed zero
+    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
+    if i & 0x7FFFu16 == 0 {
+        return unsafe { mem::transmute::<u32, f32>((i as u32) << 16) };
+    }
+
+    let half_sign = (i & 0x8000u16) as u32;
+    let half_exp = (i & 0x7C00u16) as u32;
+    let half_man = (i & 0x03FFu16) as u32;
+
+    // Check for an infinity or NaN when all exponent bits set
+    if half_exp == 0x7C00u32 {
+        // Check for signed infinity if mantissa is zero
+        if half_man == 0 {
+            return unsafe { mem::transmute::<u32, f32>((half_sign << 16) | 0x7F80_0000u32) };
+        } else {
+            // NaN, keep current mantissa but also set most significiant mantissa bit
+            return unsafe {
+                mem::transmute::<u32, f32>((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13))
+            };
+        }
+    }
+
+    // Calculate single-precision components with adjusted exponent
+    let sign = half_sign << 16;
+    // Unbias exponent
+    let unbiased_exp = ((half_exp as i32) >> 10) - 15;
+
+    // Check for subnormals, which will be normalized by adjusting exponent
+    if half_exp == 0 {
+        // Calculate how much to adjust the exponent by
+        let e = leading_zeros_u16(half_man as u16) - 6;
+
+        // Rebias and adjust exponent
+        let exp = (127 - 15 - e) << 23;
+        let man = (half_man << (14 + e)) & 0x7F_FF_FFu32;
+        return unsafe { mem::transmute::<u32, f32>(sign | exp | man) };
+    }
+
+    // Rebias exponent for a normalized normal
+    let exp = ((unbiased_exp + 127) as u32) << 23;
+    let man = (half_man & 0x03FFu32) << 13;
+    unsafe { mem::transmute::<u32, f32>(sign | exp | man) }
+}
+
+#[inline]
+pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 {
+    // Check for signed zero
+    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized
+    if i & 0x7FFFu16 == 0 {
+        return unsafe { mem::transmute::<u64, f64>((i as u64) << 48) };
+    }
+
+    let half_sign = (i & 0x8000u16) as u64;
+    let half_exp = (i & 0x7C00u16) as u64;
+    let half_man = (i & 0x03FFu16) as u64;
+
+    // Check for an infinity or NaN when all exponent bits set
+    if half_exp == 0x7C00u64 {
+        // Check for signed infinity if mantissa is zero
+        if half_man == 0 {
+            return unsafe {
+                mem::transmute::<u64, f64>((half_sign << 48) | 0x7FF0_0000_0000_0000u64)
+            };
+        } else {
+            // NaN, keep current mantissa but also set most significiant mantissa bit
+            return unsafe {
+                mem::transmute::<u64, f64>(
+                    (half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42),
+                )
+            };
+        }
+    }
+
+    // Calculate double-precision components with adjusted exponent
+    let sign = half_sign << 48;
+    // Unbias exponent
+    let unbiased_exp = ((half_exp as i64) >> 10) - 15;
+
+    // Check for subnormals, which will be normalized by adjusting exponent
+    if half_exp == 0 {
+        // Calculate how much to adjust the exponent by
+        let e = leading_zeros_u16(half_man as u16) - 6;
+
+        // Rebias and adjust exponent
+        let exp = ((1023 - 15 - e) as u64) << 52;
+        let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64;
+        return unsafe { mem::transmute::<u64, f64>(sign | exp | man) };
+    }
+
+    // Rebias exponent for a normalized normal
+    let exp = ((unbiased_exp + 1023) as u64) << 52;
+    let man = (half_man & 0x03FFu64) << 42;
+    unsafe { mem::transmute::<u64, f64>(sign | exp | man) }
+}
+
+#[inline]
+fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] {
+    [
+        f16_to_f32_fallback(v[0]),
+        f16_to_f32_fallback(v[1]),
+        f16_to_f32_fallback(v[2]),
+        f16_to_f32_fallback(v[3]),
+    ]
+}
+
+#[inline]
+fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] {
+    [
+        f32_to_f16_fallback(v[0]),
+        f32_to_f16_fallback(v[1]),
+        f32_to_f16_fallback(v[2]),
+        f32_to_f16_fallback(v[3]),
+    ]
+}
+
+#[inline]
+fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] {
+    [
+        f16_to_f64_fallback(v[0]),
+        f16_to_f64_fallback(v[1]),
+        f16_to_f64_fallback(v[2]),
+        f16_to_f64_fallback(v[3]),
+    ]
+}
+
+#[inline]
+fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] {
+    [
+        f64_to_f16_fallback(v[0]),
+        f64_to_f16_fallback(v[1]),
+        f64_to_f16_fallback(v[2]),
+        f64_to_f16_fallback(v[3]),
+    ]
+}
+
+#[inline]
+fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] {
+    [
+        f16_to_f32_fallback(v[0]),
+        f16_to_f32_fallback(v[1]),
+        f16_to_f32_fallback(v[2]),
+        f16_to_f32_fallback(v[3]),
+        f16_to_f32_fallback(v[4]),
+        f16_to_f32_fallback(v[5]),
+        f16_to_f32_fallback(v[6]),
+        f16_to_f32_fallback(v[7]),
+    ]
+}
+
+#[inline]
+fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] {
+    [
+        f32_to_f16_fallback(v[0]),
+        f32_to_f16_fallback(v[1]),
+        f32_to_f16_fallback(v[2]),
+        f32_to_f16_fallback(v[3]),
+        f32_to_f16_fallback(v[4]),
+        f32_to_f16_fallback(v[5]),
+        f32_to_f16_fallback(v[6]),
+        f32_to_f16_fallback(v[7]),
+    ]
+}
+
+#[inline]
+fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] {
+    [
+        f16_to_f64_fallback(v[0]),
+        f16_to_f64_fallback(v[1]),
+        f16_to_f64_fallback(v[2]),
+        f16_to_f64_fallback(v[3]),
+        f16_to_f64_fallback(v[4]),
+        f16_to_f64_fallback(v[5]),
+        f16_to_f64_fallback(v[6]),
+        f16_to_f64_fallback(v[7]),
+    ]
+}
+
+#[inline]
+fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] {
+    [
+        f64_to_f16_fallback(v[0]),
+        f64_to_f16_fallback(v[1]),
+        f64_to_f16_fallback(v[2]),
+        f64_to_f16_fallback(v[3]),
+        f64_to_f16_fallback(v[4]),
+        f64_to_f16_fallback(v[5]),
+        f64_to_f16_fallback(v[6]),
+        f64_to_f16_fallback(v[7]),
+    ]
+}
+
+#[inline]
+fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) {
+    assert_eq!(src.len(), dst.len());
+    for (s, d) in src.iter().copied().zip(dst.iter_mut()) {
+        *d = f(s);
+    }
+}
+
+#[inline]
+fn add_f16_fallback(a: u16, b: u16) -> u16 {
+    f32_to_f16(f16_to_f32(a) + f16_to_f32(b))
+}
+
+#[inline]
+fn subtract_f16_fallback(a: u16, b: u16) -> u16 {
+    f32_to_f16(f16_to_f32(a) - f16_to_f32(b))
+}
+
+#[inline]
+fn multiply_f16_fallback(a: u16, b: u16) -> u16 {
+    f32_to_f16(f16_to_f32(a) * f16_to_f32(b))
+}
+
+#[inline]
+fn divide_f16_fallback(a: u16, b: u16) -> u16 {
+    f32_to_f16(f16_to_f32(a) / f16_to_f32(b))
+}
+
+#[inline]
+fn remainder_f16_fallback(a: u16, b: u16) -> u16 {
+    f32_to_f16(f16_to_f32(a) % f16_to_f32(b))
+}
+
+#[inline]
+fn product_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
+    f32_to_f16(iter.map(f16_to_f32).product())
+}
+
+#[inline]
+fn sum_f16_fallback<I: Iterator<Item = u16>>(iter: I) -> u16 {
+    f32_to_f16(iter.map(f16_to_f32).sum())
+}
+
+// TODO SIMD arithmetic
--- a/vendor/half/src/binary16/arch/aarch64.rs
+++ b/vendor/half/src/binary16/arch/aarch64.rs
@@ -0,0 +1,175 @@
+use core::{
+    arch::{
+        aarch64::{float32x4_t, float64x2_t, uint16x4_t},
+        asm,
+    },
+    mem::MaybeUninit,
+    ptr,
+};
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f16_to_f32_fp16(i: u16) -> f32 {
+    let result: f32;
+    asm!(
+        "fcvt {0:s}, {1:h}",
+        out(vreg) result,
+        in(vreg) i,
+        options(pure, nomem, nostack, preserves_flags));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f16_to_f64_fp16(i: u16) -> f64 {
+    let result: f64;
+    asm!(
+        "fcvt {0:d}, {1:h}",
+        out(vreg) result,
+        in(vreg) i,
+        options(pure, nomem, nostack, preserves_flags));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f32_to_f16_fp16(f: f32) -> u16 {
+    let result: u16;
+    asm!(
+        "fcvt {0:h}, {1:s}",
+        out(vreg) result,
+        in(vreg) f,
+        options(pure, nomem, nostack, preserves_flags));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f64_to_f16_fp16(f: f64) -> u16 {
+    let result: u16;
+    asm!(
+        "fcvt {0:h}, {1:d}",
+        out(vreg) result,
+        in(vreg) f,
+        options(pure, nomem, nostack, preserves_flags));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f16x4_to_f32x4_fp16(v: &[u16; 4]) -> [f32; 4] {
+    let mut vec = MaybeUninit::<uint16x4_t>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
+    let result: float32x4_t;
+    asm!(
+        "fcvtl {0:v}.4s, {1:v}.4h",
+        out(vreg) result,
+        in(vreg) vec.assume_init(),
+        options(pure, nomem, nostack));
+    *(&result as *const float32x4_t).cast()
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f32x4_to_f16x4_fp16(v: &[f32; 4]) -> [u16; 4] {
+    let mut vec = MaybeUninit::<float32x4_t>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
+    let result: uint16x4_t;
+    asm!(
+        "fcvtn {0:v}.4h, {1:v}.4s",
+        out(vreg) result,
+        in(vreg) vec.assume_init(),
+        options(pure, nomem, nostack));
+    *(&result as *const uint16x4_t).cast()
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f16x4_to_f64x4_fp16(v: &[u16; 4]) -> [f64; 4] {
+    let mut vec = MaybeUninit::<uint16x4_t>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
+    let low: float64x2_t;
+    let high: float64x2_t;
+    asm!(
+        "fcvtl {2:v}.4s, {3:v}.4h", // Convert to f32
+        "fcvtl {0:v}.2d, {2:v}.2s", // Convert low part to f64
+        "fcvtl2 {1:v}.2d, {2:v}.4s", // Convert high part to f64
+        lateout(vreg) low,
+        lateout(vreg) high,
+        out(vreg) _,
+        in(vreg) vec.assume_init(),
+        options(pure, nomem, nostack));
+    *[low, high].as_ptr().cast()
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn f64x4_to_f16x4_fp16(v: &[f64; 4]) -> [u16; 4] {
+    let mut low = MaybeUninit::<float64x2_t>::uninit();
+    let mut high = MaybeUninit::<float64x2_t>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), low.as_mut_ptr().cast(), 2);
+    ptr::copy_nonoverlapping(v[2..].as_ptr(), high.as_mut_ptr().cast(), 2);
+    let result: uint16x4_t;
+    asm!(
+        "fcvtn {1:v}.2s, {2:v}.2d", // Convert low to f32
+        "fcvtn2 {1:v}.4s, {3:v}.2d", // Convert high to f32
+        "fcvtn {0:v}.4h, {1:v}.4s", // Convert to f16
+        lateout(vreg) result,
+        out(vreg) _,
+        in(vreg) low.assume_init(),
+        in(vreg) high.assume_init(),
+        options(pure, nomem, nostack));
+    *(&result as *const uint16x4_t).cast()
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn add_f16_fp16(a: u16, b: u16) -> u16 {
+    let result: u16;
+    asm!(
+        "fadd {0:h}, {1:h}, {2:h}",
+        out(vreg) result,
+        in(vreg) a,
+        in(vreg) b,
+        options(pure, nomem, nostack));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn subtract_f16_fp16(a: u16, b: u16) -> u16 {
+    let result: u16;
+    asm!(
+        "fsub {0:h}, {1:h}, {2:h}",
+        out(vreg) result,
+        in(vreg) a,
+        in(vreg) b,
+        options(pure, nomem, nostack));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn multiply_f16_fp16(a: u16, b: u16) -> u16 {
+    let result: u16;
+    asm!(
+        "fmul {0:h}, {1:h}, {2:h}",
+        out(vreg) result,
+        in(vreg) a,
+        in(vreg) b,
+        options(pure, nomem, nostack));
+    result
+}
+
+#[target_feature(enable = "fp16")]
+#[inline]
+pub(super) unsafe fn divide_f16_fp16(a: u16, b: u16) -> u16 {
+    let result: u16;
+    asm!(
+        "fdiv {0:h}, {1:h}, {2:h}",
+        out(vreg) result,
+        in(vreg) a,
+        in(vreg) b,
+        options(pure, nomem, nostack));
+    result
+}
--- a/vendor/half/src/binary16/arch/x86.rs
+++ b/vendor/half/src/binary16/arch/x86.rs
@@ -0,0 +1,132 @@
+use core::{mem::MaybeUninit, ptr};
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::{
+    __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps,
+    _MM_FROUND_TO_NEAREST_INT,
+};
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::{
+    __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph,
+    _MM_FROUND_TO_NEAREST_INT,
+};
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::_mm_cvtps_ph;
+
+use super::convert_chunked_slice_8;
+
+/////////////// x86/x86_64 f16c ////////////////
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 {
+    let mut vec = MaybeUninit::<__m128i>::zeroed();
+    vec.as_mut_ptr().cast::<u16>().write(i);
+    let retval = _mm_cvtph_ps(vec.assume_init());
+    *(&retval as *const __m128).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 {
+    let mut vec = MaybeUninit::<__m128>::zeroed();
+    vec.as_mut_ptr().cast::<f32>().write(f);
+    let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
+    *(&retval as *const __m128i).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] {
+    let mut vec = MaybeUninit::<__m128i>::zeroed();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
+    let retval = _mm_cvtph_ps(vec.assume_init());
+    *(&retval as *const __m128).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] {
+    let mut vec = MaybeUninit::<__m128>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);
+    let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
+    *(&retval as *const __m128i).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] {
+    let array = f16x4_to_f32x4_x86_f16c(v);
+    // Let compiler vectorize this regular cast for now.
+    // TODO: investigate auto-detecting sse2/avx convert features
+    [
+        array[0] as f64,
+        array[1] as f64,
+        array[2] as f64,
+        array[3] as f64,
+    ]
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] {
+    // Let compiler vectorize this regular cast for now.
+    // TODO: investigate auto-detecting sse2/avx convert features
+    let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32];
+    f32x4_to_f16x4_x86_f16c(&v)
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] {
+    let mut vec = MaybeUninit::<__m128i>::zeroed();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8);
+    let retval = _mm256_cvtph_ps(vec.assume_init());
+    *(&retval as *const __m256).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] {
+    let mut vec = MaybeUninit::<__m256>::uninit();
+    ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8);
+    let retval = _mm256_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT);
+    *(&retval as *const __m128i).cast()
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] {
+    let array = f16x8_to_f32x8_x86_f16c(v);
+    // Let compiler vectorize this regular cast for now.
+    // TODO: investigate auto-detecting sse2/avx convert features
+    [
+        array[0] as f64,
+        array[1] as f64,
+        array[2] as f64,
+        array[3] as f64,
+        array[4] as f64,
+        array[5] as f64,
+        array[6] as f64,
+        array[7] as f64,
+    ]
+}
+
+#[target_feature(enable = "f16c")]
+#[inline]
+pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] {
+    // Let compiler vectorize this regular cast for now.
+    // TODO: investigate auto-detecting sse2/avx convert features
+    let v = [
+        v[0] as f32,
+        v[1] as f32,
+        v[2] as f32,
+        v[3] as f32,
+        v[4] as f32,
+        v[5] as f32,
+        v[6] as f32,
+        v[7] as f32,
+    ];
+    f32x8_to_f16x8_x86_f16c(&v)
+}
--- a/vendor/half/src/leading_zeros.rs
+++ b/vendor/half/src/leading_zeros.rs
@@ -0,0 +1,65 @@
+// https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros
+
+#[cfg(not(any(all(
+    target_arch = "spirv",
+    not(all(
+        target_feature = "IntegerFunctions2INTEL",
+        target_feature = "SPV_INTEL_shader_integer_functions2"
+    ))
+))))]
+#[inline]
+pub(crate) const fn leading_zeros_u16(x: u16) -> u32 {
+    x.leading_zeros()
+}
+
+#[cfg(all(
+    target_arch = "spirv",
+    not(all(
+        target_feature = "IntegerFunctions2INTEL",
+        target_feature = "SPV_INTEL_shader_integer_functions2"
+    ))
+))]
+#[inline]
+pub(crate) const fn leading_zeros_u16(x: u16) -> u32 {
+    leading_zeros_u16_fallback(x)
+}
+
+#[cfg(any(
+    test,
+    all(
+        target_arch = "spirv",
+        not(all(
+            target_feature = "IntegerFunctions2INTEL",
+            target_feature = "SPV_INTEL_shader_integer_functions2"
+        ))
+    )
+))]
+#[inline]
+const fn leading_zeros_u16_fallback(mut x: u16) -> u32 {
+    use crunchy::unroll;
+    let mut c = 0;
+    let msb = 1 << 15;
+    unroll! { for i in 0 .. 16 {
+        if x & msb == 0 {
+            c += 1;
+        } else {
+            return c;
+        }
+        #[allow(unused_assignments)]
+        if i < 15 {
+            x <<= 1;
+        }
+    }}
+    c
+}
+
+#[cfg(test)]
+mod test {
+
+    #[test]
+    fn leading_zeros_u16_fallback() {
+        for x in [44, 97, 304, 1179, 23571] {
+            assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros());
+        }
+    }
+}
--- a/vendor/half/src/lib.rs
+++ b/vendor/half/src/lib.rs
@@ -0,0 +1,271 @@
+//! A crate that provides support for half-precision 16-bit floating point types.
+//!
+//! This crate provides the [`struct@f16`] type, which is an implementation of the IEEE 754-2008 standard
+//! [`binary16`] a.k.a "half" floating point type. This 16-bit floating point type is intended for
+//! efficient storage where the full range and precision of a larger floating point value is not
+//! required. This is especially useful for image storage formats.
+//!
+//! This crate also provides a [`struct@bf16`] type, an alternative 16-bit floating point format. The
+//! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the
+//! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11
+//! bits for [`struct@f16`]). See the [`struct@bf16`] type for details.
+//!
+//! Because [`struct@f16`] and [`struct@bf16`] are primarily for efficient storage, floating point operations such
+//! as addition, multiplication, etc. are not always implemented by hardware. When hardware does not
+//! support these operations, this crate emulates them by converting the value to
+//! [`f32`] before performing the operation and then back afterward.
+//!
+//! Note that conversion from [`f32`]/[`f64`] to both [`struct@f16`] and [`struct@bf16`] are lossy operations, and
+//! just as converting a [`f64`] to [`f32`] is lossy and does not have `Into`/`From` trait
+//! implementations, so too do these smaller types not have those trait implementations either.
+//! Instead, use `from_f32`/`from_f64` functions for the types in this crate. If you don't care
+//! about lossy conversions and need trait conversions, use the appropriate [`num-traits`]
+//! traits that are implemented.
+//!
+//! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of
+//! [`u16`] slices to both [`struct@f16`] and [`struct@bf16`], as well as efficient vectorized conversions of
+//! larger buffers of floating point values to and from these half formats.
+//!
+//! The crate supports `#[no_std]` when the `std` cargo feature is not enabled, so can be used in
+//! embedded environments without using the Rust [`std`] library. The `std` feature enables support
+//! for the standard library and is enabled by default, see the [Cargo Features](#cargo-features)
+//! section below.
+//!
+//! A [`prelude`] module is provided for easy importing of available utility traits.
+//!
+//! # Serialization
+//!
+//! When the `serde` feature is enabled, [`struct@f16`] and [`struct@bf16`] will be serialized as a newtype of
+//! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for
+//! storage. For string formats like JSON, however, this isn't as useful, and due to design
+//! limitations of serde, it's not possible for the default `Serialize` implementation to support
+//! different serialization for different formats.
+//!
+//! Instead, it's up to the containter type of the floats to control how it is serialized. This can
+//! easily be controlled when using the derive macros using `#[serde(serialize_with="")]`
+//! attributes. For both [`struct@f16`] and [`struct@bf16`] a `serialize_as_f32` and `serialize_as_string` are
+//! provided for use with this attribute.
+//!
+//! Deserialization of both float types supports deserializing from the default serialization,
+//! strings, and `f32`/`f64` values, so no additional work is required.
+//!
+//! # Hardware support
+//!
+//! Hardware support for these conversions and arithmetic will be used
+//! whenever hardware support is available—either through instrinsics or targeted assembly—although
+//! a nightly Rust toolchain may be required for some hardware. When hardware supports it the
+//! functions and traits in the [`slice`][mod@slice] and [`vec`] modules will also use vectorized
+//! SIMD intructions for increased efficiency.
+//!
+//! The following list details hardware support for floating point types in this crate. When using
+//! `std` cargo feature, runtime CPU target detection will be used. To get the most performance
+//! benefits, compile for specific CPU features which avoids the runtime overhead and works in a
+//! `no_std` environment.
+//!
+//! | Architecture | CPU Target Feature | Notes |
+//! | ------------ | ------------------ | ----- |
+//! | `x86`/`x86_64` | `f16c` | This supports conversion to/from [`struct@f16`] only (including vector SIMD) and does not support any [`struct@bf16`] or arithmetic operations. |
+//! | `aarch64` | `fp16` | This supports all operations on [`struct@f16`] only. |
+//!
+//! # Cargo Features
+//!
+//! This crate supports a number of optional cargo features. None of these features are enabled by
+//! default, even `std`.
+//!
+//! - **`alloc`** — Enable use of the [`alloc`] crate when not using the `std` library.
+//!
+//!   Among other functions, this enables the [`vec`] module, which contains zero-copy
+//!   conversions for the [`Vec`] type. This allows fast conversion between raw `Vec<u16>` bits and
+//!   `Vec<f16>` or `Vec<bf16>` arrays, and vice versa.
+//!
+//! - **`std`** — Enable features that depend on the Rust [`std`] library. This also enables the
+//!   `alloc` feature automatically.
+//!
+//!   Enabling the `std` feature enables runtime CPU feature detection of hardware support.
+//!   Without this feature detection, harware is only used when compiler target supports them.
+//!
+//! - **`serde`** — Adds support for the [`serde`] crate by implementing [`Serialize`] and
+//!   [`Deserialize`] traits for both [`struct@f16`] and [`struct@bf16`].
+//!
+//! - **`num-traits`** — Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`],
+//!   [`FromPrimitive`], [`ToBytes`], `FromBytes`, [`AsPrimitive`], [`Num`], [`Float`],
+//!   [`FloatCore`], and [`Bounded`] traits for both [`struct@f16`] and [`struct@bf16`].
+//!
+//! - **`bytemuck`** — Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and
+//!   [`Pod`] traits for both [`struct@f16`] and [`struct@bf16`].
+//!
+//! - **`zerocopy`** — Adds support for the [`zerocopy`] crate by implementing [`IntoBytes`] and
+//!   [`FromBytes`] traits for both [`struct@f16`] and [`struct@bf16`].
+//!
+//! - **`rand_distr`** — Adds support for the [`rand_distr`] crate by implementing [`Distribution`]
+//!   and other traits for both [`struct@f16`] and [`struct@bf16`].
+//!
+//! - **`rkyv`** -- Enable zero-copy deserializtion with [`rkyv`] crate.
+//!
+//! - **`aribtrary`** -- Enable fuzzing support with [`arbitrary`] crate by implementing
+//!   [`Arbitrary`] trait.
+//!
+//! [`alloc`]: https://doc.rust-lang.org/alloc/
+//! [`std`]: https://doc.rust-lang.org/std/
+//! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
+//! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
+//! [`serde`]: https://crates.io/crates/serde
+//! [`bytemuck`]: https://crates.io/crates/bytemuck
+//! [`num-traits`]: https://crates.io/crates/num-traits
+//! [`zerocopy`]: https://crates.io/crates/zerocopy
+//! [`rand_distr`]: https://crates.io/crates/rand_distr
+//! [`rkyv`]: (https://crates.io/crates/rkyv)
+//! [`arbitrary`]: (https://crates.io/crates/arbitrary)
+#![cfg_attr(
+    feature = "alloc",
+    doc = "
+[`vec`]: mod@vec"
+)]
+#![cfg_attr(
+    not(feature = "alloc"),
+    doc = "
+[`vec`]: #
+[`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html"
+)]
+#![cfg_attr(
+    feature = "serde",
+    doc = "
+[`Serialize`]: serde::Serialize
+[`Deserialize`]: serde::Deserialize"
+)]
+#![cfg_attr(
+    not(feature = "serde"),
+    doc = "
+[`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html
+[`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html"
+)]
+#![cfg_attr(
+    feature = "num-traits",
+    doc = "
+[`ToPrimitive`]: ::num_traits::ToPrimitive
+[`FromPrimitive`]: ::num_traits::FromPrimitive
+[`ToBytes`]: ::num_traits::ToBytes
+[`AsPrimitive`]: ::num_traits::AsPrimitive
+[`Num`]: ::num_traits::Num
+[`Float`]: ::num_traits::Float
+[`FloatCore`]: ::num_traits::float::FloatCore
+[`Bounded`]: ::num_traits::Bounded"
+)]
+#![cfg_attr(
+    not(feature = "num-traits"),
+    doc = "
+[`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html
+[`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html
+[`ToBytes`]: https://docs.rs/num-traits/*/num_traits/ops/bytes/trait.ToBytes.html
+[`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html
+[`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html
+[`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html
+[`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html
+[`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html"
+)]
+#![cfg_attr(
+    feature = "bytemuck",
+    doc = "
+[`Zeroable`]: bytemuck::Zeroable
+[`Pod`]: bytemuck::Pod"
+)]
+#![cfg_attr(
+    not(feature = "bytemuck"),
+    doc = "
+[`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html
+[`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html"
+)]
+#![cfg_attr(
+    feature = "zerocopy",
+    doc = "
+[`IntoBytes`]: zerocopy::IntoBytes
+[`FromBytes`]: zerocopy::FromBytes"
+)]
+#![cfg_attr(
+    not(feature = "zerocopy"),
+    doc = "
+[`IntoBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.IntoBytes.html
+[`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html"
+)]
+#![cfg_attr(
+    feature = "rand_distr",
+    doc = "
+[`Distribution`]: rand::distr::Distribution"
+)]
+#![cfg_attr(
+    not(feature = "rand_distr"),
+    doc = "
+[`Distribution`]: https://docs.rs/rand/*/rand/distr/trait.Distribution.html"
+)]
+#![cfg_attr(
+    feature = "arbitrary",
+    doc = "
+[`Arbitrary`]: arbitrary::Arbitrary"
+)]
+#![cfg_attr(
+    not(feature = "arbitrary"),
+    doc = "
+[`Arbitrary`]: https://docs.rs/arbitrary/*/arbitrary/trait.Arbitrary.html"
+)]
+#![warn(
+    missing_docs,
+    missing_copy_implementations,
+    trivial_numeric_casts,
+    future_incompatible
+)]
+#![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))]
+#![allow(clippy::verbose_bit_mask, clippy::cast_lossless, unexpected_cfgs)]
+#![cfg_attr(not(feature = "std"), no_std)]
+#![doc(html_root_url = "https://docs.rs/half/2.6.0")]
+#![doc(test(attr(deny(warnings), allow(unused))))]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+mod bfloat;
+mod binary16;
+mod leading_zeros;
+#[cfg(feature = "num-traits")]
+mod num_traits;
+
+#[cfg(not(target_arch = "spirv"))]
+pub mod slice;
+#[cfg(feature = "alloc")]
+pub mod vec;
+
+pub use bfloat::bf16;
+pub use binary16::f16;
+
+#[cfg(feature = "rand_distr")]
+mod rand_distr;
+
+/// A collection of the most used items and traits in this crate for easy importing.
+///
+/// # Examples
+///
+/// ```rust
+/// use half::prelude::*;
+/// ```
+pub mod prelude {
+    #[doc(no_inline)]
+    pub use crate::{bf16, f16};
+
+    #[cfg(not(target_arch = "spirv"))]
+    #[doc(no_inline)]
+    pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt};
+
+    #[cfg(feature = "alloc")]
+    #[doc(no_inline)]
+    pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt};
+}
+
+// Keep this module private to crate
+mod private {
+    use crate::{bf16, f16};
+
+    pub trait SealedHalf {}
+
+    impl SealedHalf for f16 {}
+    impl SealedHalf for bf16 {}
+}
--- a/vendor/half/src/num_traits.rs
+++ b/vendor/half/src/num_traits.rs
--- a/vendor/half/src/rand_distr.rs
+++ b/vendor/half/src/rand_distr.rs
@@ -0,0 +1,125 @@
+use crate::{bf16, f16};
+
+use rand::{distr::Distribution, Rng};
+use rand_distr::uniform::UniformFloat;
+
+macro_rules! impl_distribution_via_f32 {
+    ($Ty:ty, $Distr:ty) => {
+        impl Distribution<$Ty> for $Distr {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $Ty {
+                <$Ty>::from_f32(<Self as Distribution<f32>>::sample(self, rng))
+            }
+        }
+    };
+}
+
+impl_distribution_via_f32!(f16, rand_distr::StandardUniform);
+impl_distribution_via_f32!(f16, rand_distr::StandardNormal);
+impl_distribution_via_f32!(f16, rand_distr::Exp1);
+impl_distribution_via_f32!(f16, rand_distr::Open01);
+impl_distribution_via_f32!(f16, rand_distr::OpenClosed01);
+
+impl_distribution_via_f32!(bf16, rand_distr::StandardUniform);
+impl_distribution_via_f32!(bf16, rand_distr::StandardNormal);
+impl_distribution_via_f32!(bf16, rand_distr::Exp1);
+impl_distribution_via_f32!(bf16, rand_distr::Open01);
+impl_distribution_via_f32!(bf16, rand_distr::OpenClosed01);
+
+#[derive(Debug, Clone, Copy)]
+pub struct Float16Sampler(UniformFloat<f32>);
+
+impl rand_distr::uniform::SampleUniform for f16 {
+    type Sampler = Float16Sampler;
+}
+
+impl rand_distr::uniform::UniformSampler for Float16Sampler {
+    type X = f16;
+    fn new<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
+    where
+        B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Ok(Self(UniformFloat::new(
+            low.borrow().to_f32(),
+            high.borrow().to_f32(),
+        )?))
+    }
+    fn new_inclusive<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
+    where
+        B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Ok(Self(UniformFloat::new_inclusive(
+            low.borrow().to_f32(),
+            high.borrow().to_f32(),
+        )?))
+    }
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
+        f16::from_f32(self.0.sample(rng))
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct BFloat16Sampler(UniformFloat<f32>);
+
+impl rand_distr::uniform::SampleUniform for bf16 {
+    type Sampler = BFloat16Sampler;
+}
+
+impl rand_distr::uniform::UniformSampler for BFloat16Sampler {
+    type X = bf16;
+    fn new<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
+    where
+        B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Ok(Self(UniformFloat::new(
+            low.borrow().to_f32(),
+            high.borrow().to_f32(),
+        )?))
+    }
+    fn new_inclusive<B1, B2>(low: B1, high: B2) -> Result<Self, rand_distr::uniform::Error>
+    where
+        B1: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+        B2: rand_distr::uniform::SampleBorrow<Self::X> + Sized,
+    {
+        Ok(Self(UniformFloat::new_inclusive(
+            low.borrow().to_f32(),
+            high.borrow().to_f32(),
+        )?))
+    }
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
+        bf16::from_f32(self.0.sample(rng))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[allow(unused_imports)]
+    use rand::{rng, Rng};
+    use rand_distr::{StandardNormal, StandardUniform, Uniform};
+
+    #[test]
+    fn test_sample_f16() {
+        let mut rng = rng();
+        let _: f16 = rng.sample(StandardUniform);
+        let _: f16 = rng.sample(StandardNormal);
+        let _: f16 = rng.sample(Uniform::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap());
+        #[cfg(feature = "num-traits")]
+        let _: f16 =
+            rng.sample(rand_distr::Normal::new(f16::from_f32(0.0), f16::from_f32(1.0)).unwrap());
+    }
+
+    #[test]
+    fn test_sample_bf16() {
+        let mut rng = rng();
+        let _: bf16 = rng.sample(StandardUniform);
+        let _: bf16 = rng.sample(StandardNormal);
+        let _: bf16 = rng.sample(Uniform::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap());
+        #[cfg(feature = "num-traits")]
+        let _: bf16 =
+            rng.sample(rand_distr::Normal::new(bf16::from_f32(0.0), bf16::from_f32(1.0)).unwrap());
+    }
+}
--- a/vendor/half/src/slice.rs
+++ b/vendor/half/src/slice.rs
@@ -0,0 +1,845 @@
+//! Contains utility functions and traits to convert between slices of [`u16`] bits and [`struct@f16`] or
+//! [`struct@bf16`] numbers.
+//!
+//! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices,
+//! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]`
+//! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of
+//! larger buffers of floating point values, and are automatically included in the
+//! [`prelude`][crate::prelude] module.
+
+use crate::{bf16, binary16::arch, f16};
+#[cfg(feature = "alloc")]
+#[allow(unused_imports)]
+use alloc::{vec, vec::Vec};
+use core::slice;
+
+/// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations.
+///
+/// This trait is sealed and cannot be implemented outside of this crate.
+pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice {
+    /// Reinterprets a slice of [`struct@f16`] or [`struct@bf16`] numbers as a slice of [`u16`] bits.
+    ///
+    /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory
+    /// location as `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
+    /// let int_buffer = float_buffer.reinterpret_cast();
+    ///
+    /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]);
+    /// ```
+    #[must_use]
+    fn reinterpret_cast(&self) -> &[u16];
+
+    /// Reinterprets a mutable slice of [`struct@f16`] or [`struct@bf16`] numbers as a mutable slice of [`u16`].
+    /// bits
+    ///
+    /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original,
+    /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
+    ///
+    /// {
+    ///     let int_buffer = float_buffer.reinterpret_cast_mut();
+    ///
+    ///     assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
+    ///
+    ///     // Mutating the u16 slice will mutating the original
+    ///     int_buffer[0] = 0;
+    /// }
+    ///
+    /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error.
+    /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]);
+    /// ```
+    #[must_use]
+    fn reinterpret_cast_mut(&mut self) -> &mut [u16];
+
+    /// Converts all of the elements of a `[f32]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`.
+    ///
+    /// The length of `src` must be the same as `self`.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if the two slices have different lengths.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// // Initialize an empty buffer
+    /// let mut buffer = [0u16; 4];
+    /// let buffer = buffer.reinterpret_cast_mut::<f16>();
+    ///
+    /// let float_values = [1., 2., 3., 4.];
+    ///
+    /// // Now convert
+    /// buffer.convert_from_f32_slice(&float_values);
+    ///
+    /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]);
+    /// ```
+    fn convert_from_f32_slice(&mut self, src: &[f32]);
+
+    /// Converts all of the elements of a `[f64]` slice into [`struct@f16`] or [`struct@bf16`] values in `self`.
+    ///
+    /// The length of `src` must be the same as `self`.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if the two slices have different lengths.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// // Initialize an empty buffer
+    /// let mut buffer = [0u16; 4];
+    /// let buffer = buffer.reinterpret_cast_mut::<f16>();
+    ///
+    /// let float_values = [1., 2., 3., 4.];
+    ///
+    /// // Now convert
+    /// buffer.convert_from_f64_slice(&float_values);
+    ///
+    /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]);
+    /// ```
+    fn convert_from_f64_slice(&mut self, src: &[f64]);
+
+    /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in `dst`.
+    ///
+    /// The length of `src` must be the same as `self`.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if the two slices have different lengths.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// // Initialize an empty buffer
+    /// let mut buffer = [0f32; 4];
+    ///
+    /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)];
+    ///
+    /// // Now convert
+    /// half_values.convert_to_f32_slice(&mut buffer);
+    ///
+    /// assert_eq!(buffer, [1., 2., 3., 4.]);
+    /// ```
+    fn convert_to_f32_slice(&self, dst: &mut [f32]);
+
+    /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in `dst`.
+    ///
+    /// The length of `src` must be the same as `self`.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if the two slices have different lengths.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// // Initialize an empty buffer
+    /// let mut buffer = [0f64; 4];
+    ///
+    /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)];
+    ///
+    /// // Now convert
+    /// half_values.convert_to_f64_slice(&mut buffer);
+    ///
+    /// assert_eq!(buffer, [1., 2., 3., 4.]);
+    /// ```
+    fn convert_to_f64_slice(&self, dst: &mut [f64]);
+
+    // Because trait is sealed, we can get away with different interfaces between features.
+
+    /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f32`] values in a new
+    /// vector
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// This method is only available with the `std` or `alloc` feature.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)];
+    /// let vec = half_values.to_f32_vec();
+    ///
+    /// assert_eq!(vec, vec![1., 2., 3., 4.]);
+    /// ```
+    #[cfg(any(feature = "alloc", feature = "std"))]
+    #[must_use]
+    fn to_f32_vec(&self) -> Vec<f32>;
+
+    /// Converts all of the [`struct@f16`] or [`struct@bf16`] elements of `self` into [`f64`] values in a new
+    /// vector.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation](crate) for more information on hardware conversion
+    /// support.
+    ///
+    /// This method is only available with the `std` or `alloc` feature.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)];
+    /// let vec = half_values.to_f64_vec();
+    ///
+    /// assert_eq!(vec, vec![1., 2., 3., 4.]);
+    /// ```
+    #[cfg(feature = "alloc")]
+    #[must_use]
+    fn to_f64_vec(&self) -> Vec<f64>;
+}
+
+/// Extensions to `[u16]` slices to support reinterpret operations.
+///
+/// This trait is sealed and cannot be implemented outside of this crate.
+pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice {
+    /// Reinterprets a slice of [`u16`] bits as a slice of [`struct@f16`] or [`struct@bf16`] numbers.
+    ///
+    /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type.
+    ///
+    /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory
+    /// location as `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
+    /// let float_buffer: &[f16] = int_buffer.reinterpret_cast();
+    ///
+    /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
+    ///
+    /// // You may have to specify the cast type directly if the compiler can't infer the type.
+    /// // The following is also valid in Rust.
+    /// let typed_buffer = int_buffer.reinterpret_cast::<f16>();
+    /// ```
+    #[must_use]
+    fn reinterpret_cast<H>(&self) -> &[H]
+    where
+        H: crate::private::SealedHalf;
+
+    /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`struct@f16`] or [`struct@bf16`]
+    /// numbers.
+    ///
+    /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`struct@bf16`] type.
+    ///
+    /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original,
+    /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
+    ///
+    /// {
+    ///     let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut();
+    ///
+    ///     assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
+    ///
+    ///     // Mutating the f16 slice will mutating the original
+    ///     float_buffer[0] = f16::from_f32(0.);
+    /// }
+    ///
+    /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error.
+    /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
+    ///
+    /// // You may have to specify the cast type directly if the compiler can't infer the type.
+    /// // The following is also valid in Rust.
+    /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>();
+    /// ```
+    #[must_use]
+    fn reinterpret_cast_mut<H>(&mut self) -> &mut [H]
+    where
+        H: crate::private::SealedHalf;
+}
+
+mod private {
+    use crate::{bf16, f16};
+
+    pub trait SealedHalfFloatSlice {}
+    impl SealedHalfFloatSlice for [f16] {}
+    impl SealedHalfFloatSlice for [bf16] {}
+
+    pub trait SealedHalfBitsSlice {}
+    impl SealedHalfBitsSlice for [u16] {}
+}
+
+impl HalfFloatSliceExt for [f16] {
+    #[inline]
+    fn reinterpret_cast(&self) -> &[u16] {
+        let pointer = self.as_ptr() as *const u16;
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts(pointer, length) }
+    }
+
+    #[inline]
+    fn reinterpret_cast_mut(&mut self) -> &mut [u16] {
+        let pointer = self.as_mut_ptr().cast::<u16>();
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts_mut(pointer, length) }
+    }
+
+    #[inline]
+    fn convert_from_f32_slice(&mut self, src: &[f32]) {
+        assert_eq!(
+            self.len(),
+            src.len(),
+            "destination and source slices have different lengths"
+        );
+
+        arch::f32_to_f16_slice(src, self.reinterpret_cast_mut())
+    }
+
+    #[inline]
+    fn convert_from_f64_slice(&mut self, src: &[f64]) {
+        assert_eq!(
+            self.len(),
+            src.len(),
+            "destination and source slices have different lengths"
+        );
+
+        arch::f64_to_f16_slice(src, self.reinterpret_cast_mut())
+    }
+
+    #[inline]
+    fn convert_to_f32_slice(&self, dst: &mut [f32]) {
+        assert_eq!(
+            self.len(),
+            dst.len(),
+            "destination and source slices have different lengths"
+        );
+
+        arch::f16_to_f32_slice(self.reinterpret_cast(), dst)
+    }
+
+    #[inline]
+    fn convert_to_f64_slice(&self, dst: &mut [f64]) {
+        assert_eq!(
+            self.len(),
+            dst.len(),
+            "destination and source slices have different lengths"
+        );
+
+        arch::f16_to_f64_slice(self.reinterpret_cast(), dst)
+    }
+
+    #[cfg(any(feature = "alloc", feature = "std"))]
+    #[inline]
+    #[allow(clippy::uninit_vec)]
+    fn to_f32_vec(&self) -> Vec<f32> {
+        let mut vec = vec![0f32; self.len()];
+        self.convert_to_f32_slice(&mut vec);
+        vec
+    }
+
+    #[cfg(any(feature = "alloc", feature = "std"))]
+    #[inline]
+    #[allow(clippy::uninit_vec)]
+    fn to_f64_vec(&self) -> Vec<f64> {
+        let mut vec = vec![0f64; self.len()];
+        self.convert_to_f64_slice(&mut vec);
+        vec
+    }
+}
+
+impl HalfFloatSliceExt for [bf16] {
+    #[inline]
+    fn reinterpret_cast(&self) -> &[u16] {
+        let pointer = self.as_ptr() as *const u16;
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts(pointer, length) }
+    }
+
+    #[inline]
+    fn reinterpret_cast_mut(&mut self) -> &mut [u16] {
+        let pointer = self.as_mut_ptr().cast::<u16>();
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts_mut(pointer, length) }
+    }
+
+    #[inline]
+    fn convert_from_f32_slice(&mut self, src: &[f32]) {
+        assert_eq!(
+            self.len(),
+            src.len(),
+            "destination and source slices have different lengths"
+        );
+
+        // Just use regular loop here until there's any bf16 SIMD support.
+        for (i, f) in src.iter().enumerate() {
+            self[i] = bf16::from_f32(*f);
+        }
+    }
+
+    #[inline]
+    fn convert_from_f64_slice(&mut self, src: &[f64]) {
+        assert_eq!(
+            self.len(),
+            src.len(),
+            "destination and source slices have different lengths"
+        );
+
+        // Just use regular loop here until there's any bf16 SIMD support.
+        for (i, f) in src.iter().enumerate() {
+            self[i] = bf16::from_f64(*f);
+        }
+    }
+
+    #[inline]
+    fn convert_to_f32_slice(&self, dst: &mut [f32]) {
+        assert_eq!(
+            self.len(),
+            dst.len(),
+            "destination and source slices have different lengths"
+        );
+
+        // Just use regular loop here until there's any bf16 SIMD support.
+        for (i, f) in self.iter().enumerate() {
+            dst[i] = f.to_f32();
+        }
+    }
+
+    #[inline]
+    fn convert_to_f64_slice(&self, dst: &mut [f64]) {
+        assert_eq!(
+            self.len(),
+            dst.len(),
+            "destination and source slices have different lengths"
+        );
+
+        // Just use regular loop here until there's any bf16 SIMD support.
+        for (i, f) in self.iter().enumerate() {
+            dst[i] = f.to_f64();
+        }
+    }
+
+    #[cfg(any(feature = "alloc", feature = "std"))]
+    #[inline]
+    #[allow(clippy::uninit_vec)]
+    fn to_f32_vec(&self) -> Vec<f32> {
+        let mut vec = vec![0f32; self.len()];
+        self.convert_to_f32_slice(&mut vec);
+        vec
+    }
+
+    #[cfg(any(feature = "alloc", feature = "std"))]
+    #[inline]
+    #[allow(clippy::uninit_vec)]
+    fn to_f64_vec(&self) -> Vec<f64> {
+        let mut vec = vec![0f64; self.len()];
+        self.convert_to_f64_slice(&mut vec);
+        vec
+    }
+}
+
+impl HalfBitsSliceExt for [u16] {
+    // Since we sealed all the traits involved, these are safe.
+    #[inline]
+    fn reinterpret_cast<H>(&self) -> &[H]
+    where
+        H: crate::private::SealedHalf,
+    {
+        let pointer = self.as_ptr() as *const H;
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts(pointer, length) }
+    }
+
+    #[inline]
+    fn reinterpret_cast_mut<H>(&mut self) -> &mut [H]
+    where
+        H: crate::private::SealedHalf,
+    {
+        let pointer = self.as_mut_ptr() as *mut H;
+        let length = self.len();
+        // SAFETY: We are reconstructing full length of original slice, using its same lifetime,
+        // and the size of elements are identical
+        unsafe { slice::from_raw_parts_mut(pointer, length) }
+    }
+}
+
+#[allow(clippy::float_cmp)]
+#[cfg(test)]
+mod test {
+    use super::{HalfBitsSliceExt, HalfFloatSliceExt};
+    use crate::{bf16, f16};
+
+    #[test]
+    fn test_slice_conversions_f16() {
+        let bits = &[
+            f16::E.to_bits(),
+            f16::PI.to_bits(),
+            f16::EPSILON.to_bits(),
+            f16::FRAC_1_SQRT_2.to_bits(),
+        ];
+        let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2];
+
+        // Convert from bits to numbers
+        let from_bits = bits.reinterpret_cast::<f16>();
+        assert_eq!(from_bits, numbers);
+
+        // Convert from numbers back to bits
+        let to_bits = from_bits.reinterpret_cast();
+        assert_eq!(to_bits, bits);
+    }
+
+    #[test]
+    fn test_mutablility_f16() {
+        let mut bits_array = [f16::PI.to_bits()];
+        let bits = &mut bits_array[..];
+
+        {
+            // would not compile without these braces
+            let numbers = bits.reinterpret_cast_mut();
+            numbers[0] = f16::E;
+        }
+
+        assert_eq!(bits, &[f16::E.to_bits()]);
+
+        bits[0] = f16::LN_2.to_bits();
+        assert_eq!(bits, &[f16::LN_2.to_bits()]);
+    }
+
+    #[test]
+    fn test_slice_conversions_bf16() {
+        let bits = &[
+            bf16::E.to_bits(),
+            bf16::PI.to_bits(),
+            bf16::EPSILON.to_bits(),
+            bf16::FRAC_1_SQRT_2.to_bits(),
+        ];
+        let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2];
+
+        // Convert from bits to numbers
+        let from_bits = bits.reinterpret_cast::<bf16>();
+        assert_eq!(from_bits, numbers);
+
+        // Convert from numbers back to bits
+        let to_bits = from_bits.reinterpret_cast();
+        assert_eq!(to_bits, bits);
+    }
+
+    #[test]
+    fn test_mutablility_bf16() {
+        let mut bits_array = [bf16::PI.to_bits()];
+        let bits = &mut bits_array[..];
+
+        {
+            // would not compile without these braces
+            let numbers = bits.reinterpret_cast_mut();
+            numbers[0] = bf16::E;
+        }
+
+        assert_eq!(bits, &[bf16::E.to_bits()]);
+
+        bits[0] = bf16::LN_2.to_bits();
+        assert_eq!(bits, &[bf16::LN_2.to_bits()]);
+    }
+
+    #[test]
+    fn slice_convert_f16_f32() {
+        // Exact chunks
+        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let vf16 = [
+            f16::from_f32(1.),
+            f16::from_f32(2.),
+            f16::from_f32(3.),
+            f16::from_f32(4.),
+            f16::from_f32(5.),
+            f16::from_f32(6.),
+            f16::from_f32(7.),
+            f16::from_f32(8.),
+        ];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
+        let vf16 = [
+            f16::from_f32(1.),
+            f16::from_f32(2.),
+            f16::from_f32(3.),
+            f16::from_f32(4.),
+            f16::from_f32(5.),
+            f16::from_f32(6.),
+            f16::from_f32(7.),
+            f16::from_f32(8.),
+            f16::from_f32(9.),
+        ];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf32 = [1., 2.];
+        let vf16 = [f16::from_f32(1.), f16::from_f32(2.)];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+    }
+
+    #[test]
+    fn slice_convert_bf16_f32() {
+        // Exact chunks
+        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let vf16 = [
+            bf16::from_f32(1.),
+            bf16::from_f32(2.),
+            bf16::from_f32(3.),
+            bf16::from_f32(4.),
+            bf16::from_f32(5.),
+            bf16::from_f32(6.),
+            bf16::from_f32(7.),
+            bf16::from_f32(8.),
+        ];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
+        let vf16 = [
+            bf16::from_f32(1.),
+            bf16::from_f32(2.),
+            bf16::from_f32(3.),
+            bf16::from_f32(4.),
+            bf16::from_f32(5.),
+            bf16::from_f32(6.),
+            bf16::from_f32(7.),
+            bf16::from_f32(8.),
+            bf16::from_f32(9.),
+        ];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf32 = [1., 2.];
+        let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)];
+        let mut buf32 = vf32;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f32_slice(&mut buf32);
+        assert_eq!(&vf32, &buf32);
+
+        buf16.convert_from_f32_slice(&vf32);
+        assert_eq!(&vf16, &buf16);
+    }
+
+    #[test]
+    fn slice_convert_f16_f64() {
+        // Exact chunks
+        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let vf16 = [
+            f16::from_f64(1.),
+            f16::from_f64(2.),
+            f16::from_f64(3.),
+            f16::from_f64(4.),
+            f16::from_f64(5.),
+            f16::from_f64(6.),
+            f16::from_f64(7.),
+            f16::from_f64(8.),
+        ];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
+        let vf16 = [
+            f16::from_f64(1.),
+            f16::from_f64(2.),
+            f16::from_f64(3.),
+            f16::from_f64(4.),
+            f16::from_f64(5.),
+            f16::from_f64(6.),
+            f16::from_f64(7.),
+            f16::from_f64(8.),
+            f16::from_f64(9.),
+        ];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf64 = [1., 2.];
+        let vf16 = [f16::from_f64(1.), f16::from_f64(2.)];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+    }
+
+    #[test]
+    fn slice_convert_bf16_f64() {
+        // Exact chunks
+        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.];
+        let vf16 = [
+            bf16::from_f64(1.),
+            bf16::from_f64(2.),
+            bf16::from_f64(3.),
+            bf16::from_f64(4.),
+            bf16::from_f64(5.),
+            bf16::from_f64(6.),
+            bf16::from_f64(7.),
+            bf16::from_f64(8.),
+        ];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.];
+        let vf16 = [
+            bf16::from_f64(1.),
+            bf16::from_f64(2.),
+            bf16::from_f64(3.),
+            bf16::from_f64(4.),
+            bf16::from_f64(5.),
+            bf16::from_f64(6.),
+            bf16::from_f64(7.),
+            bf16::from_f64(8.),
+            bf16::from_f64(9.),
+        ];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+
+        // Partial with chunks
+        let vf64 = [1., 2.];
+        let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)];
+        let mut buf64 = vf64;
+        let mut buf16 = vf16;
+
+        vf16.convert_to_f64_slice(&mut buf64);
+        assert_eq!(&vf64, &buf64);
+
+        buf16.convert_from_f64_slice(&vf64);
+        assert_eq!(&vf16, &buf16);
+    }
+
+    #[test]
+    #[should_panic]
+    fn convert_from_f32_slice_len_mismatch_panics() {
+        let mut slice1 = [f16::ZERO; 3];
+        let slice2 = [0f32; 4];
+        slice1.convert_from_f32_slice(&slice2);
+    }
+
+    #[test]
+    #[should_panic]
+    fn convert_from_f64_slice_len_mismatch_panics() {
+        let mut slice1 = [f16::ZERO; 3];
+        let slice2 = [0f64; 4];
+        slice1.convert_from_f64_slice(&slice2);
+    }
+
+    #[test]
+    #[should_panic]
+    fn convert_to_f32_slice_len_mismatch_panics() {
+        let slice1 = [f16::ZERO; 3];
+        let mut slice2 = [0f32; 4];
+        slice1.convert_to_f32_slice(&mut slice2);
+    }
+
+    #[test]
+    #[should_panic]
+    fn convert_to_f64_slice_len_mismatch_panics() {
+        let slice1 = [f16::ZERO; 3];
+        let mut slice2 = [0f64; 4];
+        slice1.convert_to_f64_slice(&mut slice2);
+    }
+}
--- a/vendor/half/src/vec.rs
+++ b/vendor/half/src/vec.rs
@@ -0,0 +1,260 @@
+//! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`struct@f16`] or
+//! [`bf16`] vectors.
+//!
+//! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec<u16>`] vectors,
+//! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both
+//! [`Vec<f16>`] and [`Vec<bf16>`] vectors. These traits provide efficient conversions and
+//! reinterpret casting of larger buffers of floating point values, and are automatically included
+//! in the [`prelude`][crate::prelude] module.
+//!
+//! This module is only available with the `std` or `alloc` feature.
+
+use super::{bf16, f16, slice::HalfFloatSliceExt};
+#[cfg(feature = "alloc")]
+#[allow(unused_imports)]
+use alloc::{vec, vec::Vec};
+use core::mem;
+
+/// Extensions to [`Vec<f16>`] and [`Vec<bf16>`] to support reinterpret operations.
+///
+/// This trait is sealed and cannot be implemented outside of this crate.
+pub trait HalfFloatVecExt: private::SealedHalfFloatVec {
+    /// Reinterprets a vector of [`struct@f16`]or [`bf16`] numbers as a vector of [`u16`] bits.
+    ///
+    /// This is a zero-copy operation. The reinterpreted vector has the same memory location as
+    /// `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)];
+    /// let int_buffer = float_buffer.reinterpret_into();
+    ///
+    /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]);
+    /// ```
+    #[must_use]
+    fn reinterpret_into(self) -> Vec<u16>;
+
+    /// Converts all of the elements of a `[f32]` slice into a new [`struct@f16`] or [`bf16`] vector.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation][crate] for more information on hardware conversion
+    /// support.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let float_values = [1., 2., 3., 4.];
+    /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values);
+    ///
+    /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]);
+    /// ```
+    #[must_use]
+    fn from_f32_slice(slice: &[f32]) -> Self;
+
+    /// Converts all of the elements of a `[f64]` slice into a new [`struct@f16`] or [`bf16`] vector.
+    ///
+    /// The conversion operation is vectorized over the slice, meaning the conversion may be more
+    /// efficient than converting individual elements on some hardware that supports SIMD
+    /// conversions. See [crate documentation][crate] for more information on hardware conversion
+    /// support.
+    ///
+    /// # Examples
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let float_values = [1., 2., 3., 4.];
+    /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values);
+    ///
+    /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]);
+    /// ```
+    #[must_use]
+    fn from_f64_slice(slice: &[f64]) -> Self;
+}
+
+/// Extensions to [`Vec<u16>`] to support reinterpret operations.
+///
+/// This trait is sealed and cannot be implemented outside of this crate.
+pub trait HalfBitsVecExt: private::SealedHalfBitsVec {
+    /// Reinterprets a vector of [`u16`] bits as a vector of [`struct@f16`] or [`bf16`] numbers.
+    ///
+    /// `H` is the type to cast to, and must be either the [`struct@f16`] or [`bf16`] type.
+    ///
+    /// This is a zero-copy operation. The reinterpreted vector has the same memory location as
+    /// `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use half::prelude::*;
+    /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()];
+    /// let float_buffer = int_buffer.reinterpret_into::<f16>();
+    ///
+    /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]);
+    /// ```
+    #[must_use]
+    fn reinterpret_into<H>(self) -> Vec<H>
+    where
+        H: crate::private::SealedHalf;
+}
+
+mod private {
+    use crate::{bf16, f16};
+    #[cfg(feature = "alloc")]
+    #[allow(unused_imports)]
+    use alloc::vec::Vec;
+
+    pub trait SealedHalfFloatVec {}
+    impl SealedHalfFloatVec for Vec<f16> {}
+    impl SealedHalfFloatVec for Vec<bf16> {}
+
+    pub trait SealedHalfBitsVec {}
+    impl SealedHalfBitsVec for Vec<u16> {}
+}
+
+impl HalfFloatVecExt for Vec<f16> {
+    #[inline]
+    fn reinterpret_into(mut self) -> Vec<u16> {
+        // An f16 array has same length and capacity as u16 array
+        let length = self.len();
+        let capacity = self.capacity();
+
+        // Actually reinterpret the contents of the Vec<f16> as u16,
+        // knowing that structs are represented as only their members in memory,
+        // which is the u16 part of `f16(u16)`
+        let pointer = self.as_mut_ptr() as *mut u16;
+
+        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
+        mem::forget(self);
+
+        // Finally construct a new Vec<f16> from the raw pointer
+        // SAFETY: We are reconstructing full length and capacity of original vector,
+        // using its original pointer, and the size of elements are identical.
+        unsafe { Vec::from_raw_parts(pointer, length, capacity) }
+    }
+
+    #[allow(clippy::uninit_vec)]
+    fn from_f32_slice(slice: &[f32]) -> Self {
+        let mut vec = vec![f16::from_bits(0); slice.len()];
+        vec.convert_from_f32_slice(slice);
+        vec
+    }
+
+    #[allow(clippy::uninit_vec)]
+    fn from_f64_slice(slice: &[f64]) -> Self {
+        let mut vec = vec![f16::from_bits(0); slice.len()];
+        vec.convert_from_f64_slice(slice);
+        vec
+    }
+}
+
+impl HalfFloatVecExt for Vec<bf16> {
+    #[inline]
+    fn reinterpret_into(mut self) -> Vec<u16> {
+        // An f16 array has same length and capacity as u16 array
+        let length = self.len();
+        let capacity = self.capacity();
+
+        // Actually reinterpret the contents of the Vec<f16> as u16,
+        // knowing that structs are represented as only their members in memory,
+        // which is the u16 part of `f16(u16)`
+        let pointer = self.as_mut_ptr() as *mut u16;
+
+        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
+        mem::forget(self);
+
+        // Finally construct a new Vec<f16> from the raw pointer
+        // SAFETY: We are reconstructing full length and capacity of original vector,
+        // using its original pointer, and the size of elements are identical.
+        unsafe { Vec::from_raw_parts(pointer, length, capacity) }
+    }
+
+    #[allow(clippy::uninit_vec)]
+    fn from_f32_slice(slice: &[f32]) -> Self {
+        let mut vec = vec![bf16::from_bits(0); slice.len()];
+        vec.convert_from_f32_slice(slice);
+        vec
+    }
+
+    #[allow(clippy::uninit_vec)]
+    fn from_f64_slice(slice: &[f64]) -> Self {
+        let mut vec = vec![bf16::from_bits(0); slice.len()];
+        vec.convert_from_f64_slice(slice);
+        vec
+    }
+}
+
+impl HalfBitsVecExt for Vec<u16> {
+    // This is safe because all traits are sealed
+    #[inline]
+    fn reinterpret_into<H>(mut self) -> Vec<H>
+    where
+        H: crate::private::SealedHalf,
+    {
+        // An f16 array has same length and capacity as u16 array
+        let length = self.len();
+        let capacity = self.capacity();
+
+        // Actually reinterpret the contents of the Vec<u16> as f16,
+        // knowing that structs are represented as only their members in memory,
+        // which is the u16 part of `f16(u16)`
+        let pointer = self.as_mut_ptr() as *mut H;
+
+        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted
+        mem::forget(self);
+
+        // Finally construct a new Vec<f16> from the raw pointer
+        // SAFETY: We are reconstructing full length and capacity of original vector,
+        // using its original pointer, and the size of elements are identical.
+        unsafe { Vec::from_raw_parts(pointer, length, capacity) }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::{HalfBitsVecExt, HalfFloatVecExt};
+    use crate::{bf16, f16};
+    #[cfg(all(feature = "alloc", not(feature = "std")))]
+    use alloc::vec;
+
+    #[test]
+    fn test_vec_conversions_f16() {
+        let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2];
+        let bits = vec![
+            f16::E.to_bits(),
+            f16::PI.to_bits(),
+            f16::EPSILON.to_bits(),
+            f16::FRAC_1_SQRT_2.to_bits(),
+        ];
+        let bits_cloned = bits.clone();
+
+        // Convert from bits to numbers
+        let from_bits = bits.reinterpret_into::<f16>();
+        assert_eq!(&from_bits[..], &numbers[..]);
+
+        // Convert from numbers back to bits
+        let to_bits = from_bits.reinterpret_into();
+        assert_eq!(&to_bits[..], &bits_cloned[..]);
+    }
+
+    #[test]
+    fn test_vec_conversions_bf16() {
+        let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2];
+        let bits = vec![
+            bf16::E.to_bits(),
+            bf16::PI.to_bits(),
+            bf16::EPSILON.to_bits(),
+            bf16::FRAC_1_SQRT_2.to_bits(),
+        ];
+        let bits_cloned = bits.clone();
+
+        // Convert from bits to numbers
+        let from_bits = bits.reinterpret_into::<bf16>();
+        assert_eq!(&from_bits[..], &numbers[..]);
+
+        // Convert from numbers back to bits
+        let to_bits = from_bits.reinterpret_into();
+        assert_eq!(&to_bits[..], &bits_cloned[..]);
+    }
+}
				`@@ -0,0 +1 @@`
				{"files":{"CHANGELOG.md":"fcef01224dc41fb792a3d33e817e9de950ed94e74b7645512fd30545b5f30930","Cargo.lock":"3c0026d74bc22500e3fd5289c1c2b9dcd07d66ead302361cd6b379fffd91e3bc","Cargo.toml":"a2d670b4c0afe5ac5115de4da5a83c6c3e5feeeafe0b5b169a5e5c2b03ef2f11","LICENSE-APACHE":"a6cba85bc92e0cff7a450b1d873c0eaa2e9fc96bf472df0247a26bec77bf3ff9","LICENSE-MIT":"508a77d2e7b51d98adeed32648ad124b7b30241a8e70b2e72c99f92d8e5874d1","Makefile.toml":"9a7c73f926e9a03e24331852fe31c5c0b034215ea18c23de91b5d98e097b9ed9","README.md":"f48dacc314fbd367551ec9e00edab552449117668bc34e83b1941cb3babbd08b","benches/convert.rs":"95c905467f132ea37e79b704a8fc3a0843f5f9dc6d61750c4abc57408321b8f6","src/bfloat.rs":"c9d8984d4be5a3536d8dd7bb0a587856094492c8b2c49351c324801319f40c93","src/bfloat/convert.rs":"daab845ea86e45449550f7039f6b5a550da763ca65839b8b3ced1fcc9fa439cc","src/binary16.rs":"5119ccd45f3697b58813c5d55cc8e969a6dcedc37393382d0406e23e6aaa837b","src/binary16/arch.rs":"f34369c39545e53a8ee9ef3a29f6c837272e5cbf70d86cb815234c42f5bf721c","src/binary16/arch/aarch64.rs":"a912917e13c01f175576edcdad720d19ce6e53f32e37e5b39027b1188fc99fb0","src/binary16/arch/x86.rs":"c2f4ae517f3216c0716dca52837f8aff89186856e6e7eac894e8b644b2124a75","src/leading_zeros.rs":"37fe7595856f9d0b1ed0a806314ab7a8860bdebf4223fb3747768a077935bea0","src/lib.rs":"62fd970b472f946253ad3a79b0865fa77a098e0e2059240e17c2f3128cfeaf57","src/num_traits.rs":"79e6c08a5aa5cc47543e0ab65797adfb50bbc8092dd1f7f292d8640f3806b2b2","src/rand_distr.rs":"0a738b423beae579017291180b1e89ef9e2edc2a484392838b388b3a200bab1e","src/slice.rs":"b42be83032cd4beaa43c2ca6e66bd86db6908a1add00feabf1b9c8c8bd48b1fc","src/vec.rs":"1858a6485b26027ad7160a834977c424b05983ab42de2e618aa3f9d11221b04d"},"package":"459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"}