Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

159
vendor/const_soft_float/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//! # Rust float-point in constant context
//!
//! Features:
//! * `no_std`
//! * `const_trait_impl`
//! * `const_mut_refs`
//!
//! work in `stable`:
//! ```
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! SoftF32(a).add(SoftF32(b)).to_f32()
//! }
//! ```
//!
//!
//! with `const_trait_impl` usage (requires `nightly`):
//! ```
//! # cfg_if::cfg_if! {
//! # if #[cfg(nightly)] {
//! # #![feature(const_trait_impl)]
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! (SoftF32(a) + SoftF32(b)).to_f32()
//! }
//! # }
//! # }
//! ```
//!
//! with `const_mut_refs` usage (requires `nightly`):
//! ```
//! # cfg_if::cfg_if! {
//! # if #[cfg(nightly)] {
//! # #![feature(const_trait_impl)]
//! # #![feature(const_mut_refs)]
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! let mut x = SoftF32(a);
//! x += SoftF32(b);
//! x.to_f32()
//! }
//! # }
//! # }
//! ```
//!
//!
#![cfg_attr(feature = "no_std", no_std)]
#![cfg_attr(feature = "const_trait_impl", feature(const_trait_impl))]
#![cfg_attr(feature = "const_mut_refs", feature(const_mut_refs))]
pub mod soft_f32;
pub mod soft_f64;
const fn abs_diff(a: i32, b: i32) -> u32 {
a.wrapping_sub(b).wrapping_abs() as u32
}
#[cfg(test)]
mod tests {
use crate::soft_f32::SoftF32;
use crate::soft_f64::SoftF64;
const RANGE: core::ops::Range<i32> = -1000..1000;
const F32_FACTOR: f32 = 10.0;
const F64_FACTOR: f64 = 1000.0;
#[test]
fn f32_add() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).add(SoftF32(b)).0, a + b);
}
}
}
#[test]
fn f32_sub() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).sub(SoftF32(b)).0, a - b);
}
}
}
#[test]
fn f32_mul() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).mul(SoftF32(b)).0, a * b);
}
}
}
#[test]
fn f32_div() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
let x = SoftF32(a).div(SoftF32(b)).0;
let y = a / b;
assert!(x == y || x.is_nan() && y.is_nan())
}
}
}
#[test]
fn f64_add() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).sub(SoftF64(b)).0, a - b);
}
}
}
#[test]
fn f64_sub() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).sub(SoftF64(b)).0, a - b);
}
}
}
#[test]
fn f64_mul() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).mul(SoftF64(b)).0, a * b);
}
}
}
#[test]
fn f64_div() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
let x = SoftF64(a).div(SoftF64(b)).0;
let y = a / b;
assert!(x == y || x.is_nan() && y.is_nan())
}
}
}
}

View File

@@ -0,0 +1,193 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
type FInt = u32;
pub(crate) const fn add(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS as FInt;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let mut a_rep = a.repr();
let mut b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// Detect if a or b is zero, infinity, or NaN.
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_abs | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_abs | quiet_bit);
}
if a_abs == inf_rep {
// +/-infinity + -/+infinity = qNaN
if (a.repr() ^ b.repr()) == sign_bit {
return F::from_repr(qnan_rep);
} else {
// +/-infinity + anything remaining = +/- infinity
return a;
}
}
// anything remaining + +/-infinity = +/-infinity
if b_abs == inf_rep {
return b;
}
// zero + anything = anything
if a_abs == 0 {
// but we need to get the sign right for zero + zero
if b_abs == 0 {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}
// anything + zero = anything
if b_abs == 0 {
return a;
}
}
// Swap a and b if necessary so that a has the larger absolute value.
if b_abs > a_abs {
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
let tmp = a_rep;
a_rep = b_rep;
b_rep = tmp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits) as _;
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits) as _;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// normalize any denormals, and adjust the exponent accordingly.
if a_exponent == 0 {
let (exponent, significand) = F::normalize(a_significand);
a_exponent = exponent;
a_significand = significand;
}
if b_exponent == 0 {
let (exponent, significand) = F::normalize(b_significand);
b_exponent = exponent;
b_significand = significand;
}
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
let result_sign = a_rep & sign_bit;
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize(), but setting it twice won't hurt
// anything.)
a_significand = (a_significand | implicit_bit) << 3;
b_significand = (b_significand | implicit_bit) << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent) as _;
if align != 0 {
if align < bits {
let sticky = (b_significand << bits.wrapping_sub(align) != 0) as FInt;
b_significand = (b_significand >> align) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
}
}
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == 0 {
return F::from_repr(0);
}
// If partial cancellation occured, we need to left-shift the result
// and adjust the exponent:
if a_significand < implicit_bit << 3 {
let shift =
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
a_significand <<= shift;
a_exponent -= shift;
}
} else {
// addition
a_significand += b_significand;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != 0 {
let sticky = (a_significand & one != 0) as FInt;
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if a_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | result_sign);
}
if a_exponent <= 0 {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
let shift = (1 - a_exponent) as _;
let sticky = ((a_significand << bits.wrapping_sub(shift)) != 0) as FInt;
a_significand = a_significand >> shift | sticky;
a_exponent = 0;
}
// Low three bits are round, guard, and sticky.
let a_significand_i32: i32 = a_significand as _;
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
// Shift the significand into place, and mask off the implicit bit.
let mut result = a_significand >> 3 & significand_mask;
// Insert the exponent and sign.
result |= (a_exponent as FInt) << significand_bits;
result |= result_sign;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if round_guard_sticky > 0x4 {
result += one;
}
if round_guard_sticky == 0x4 {
result += result & one;
}
F::from_repr(result)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(1.0).add(SoftF32(1.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,63 @@
use crate::soft_f32::SoftF32;
use core::cmp::Ordering;
type F = SoftF32;
type FInt = u32;
type FSignedInt = i32;
const UNORDERED: Option<Ordering> = None;
const EQUAL: Option<Ordering> = Some(Ordering::Equal);
const GREATER: Option<Ordering> = Some(Ordering::Greater);
const LESS: Option<Ordering> = Some(Ordering::Less);
pub(crate) const fn cmp(a: F, b: F) -> Option<Ordering> {
let one: FInt = 1;
let zero: FInt = 0;
let szero: FSignedInt = 0;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let a_rep = a.repr();
let b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// If either a or b is NaN, they are unordered.
if a_abs > inf_rep || b_abs > inf_rep {
return UNORDERED;
}
// If a and b are both zeros, they are equal.
if a_abs | b_abs == zero {
return EQUAL;
}
let a_srep = a.signed_repr();
let b_srep = b.signed_repr();
// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if a_srep & b_srep >= szero {
if a_srep < b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
} else if a_srep > b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
}

View File

@@ -0,0 +1,84 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
impl const From<f32> for F {
fn from(value: f32) -> Self {
F::from_f32(value)
}
}
impl const PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl const PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl const core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl const core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl const core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl const core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,23 @@
use super::SoftF32;
/// Sign of Y, magnitude of X (SoftF32)
///
/// Constructs a number with the magnitude (absolute value) of its
/// first argument, `x`, and the sign of its second argument, `y`.
pub(crate) const fn copysign(x: SoftF32, y: SoftF32) -> SoftF32 {
let mut ux = x.to_bits();
let uy = y.to_bits();
ux &= 0x7fffffff;
ux |= uy & 0x80000000;
SoftF32::from_bits(ux)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(1.0).copysign(SoftF32(-0.0)).0, -1.0)
}
}

View File

@@ -0,0 +1,96 @@
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use core::f64::consts::FRAC_PI_2;
use crate::soft_f64::SoftF64;
use super::{
helpers::{k_cosf, k_sinf, rem_pio2f},
SoftF32,
};
/* Small multiples of pi/2 rounded to double precision. */
const C1_PIO2: SoftF64 = SoftF64(1.).mul(SoftF64(FRAC_PI_2)); /* 0x3FF921FB, 0x54442D18 */
const C2_PIO2: SoftF64 = SoftF64(2.).mul(SoftF64(FRAC_PI_2)); /* 0x400921FB, 0x54442D18 */
const C3_PIO2: SoftF64 = SoftF64(3.).mul(SoftF64(FRAC_PI_2)); /* 0x4012D97C, 0x7F3321D2 */
const C4_PIO2: SoftF64 = SoftF64(4.).mul(SoftF64(FRAC_PI_2)); /* 0x401921FB, 0x54442D18 */
pub const fn cos(x: SoftF32) -> SoftF32 {
let x64 = SoftF64(x.0 as f64);
let x1p120 = SoftF32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x != 0 */
let _ = x.add(x1p120);
return SoftF32(1.0);
}
return k_cosf(x64);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix > 0x4016cbe3 {
/* |x| ~> 3*pi/4 */
return k_cosf(if sign {
x64.add(C2_PIO2)
} else {
x64.sub(C2_PIO2)
})
.neg();
} else if sign {
return k_sinf(x64.add(C1_PIO2));
} else {
return k_sinf(C1_PIO2.sub(x64));
}
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix > 0x40afeddf {
/* |x| ~> 7*pi/4 */
return k_cosf(if sign {
x64.add(C4_PIO2)
} else {
x64.sub(C4_PIO2)
});
} else if sign {
return k_sinf(x64.neg().sub(C3_PIO2));
} else {
return k_sinf(x64.sub(C3_PIO2));
}
}
/* cos(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x.sub(x);
}
/* general argument reduction needed */
let (n, y) = rem_pio2f(x);
match n & 3 {
0 => k_cosf(y),
1 => k_sinf(y.neg()),
2 => k_cosf(y).neg(),
_ => k_sinf(y),
}
}

View File

@@ -0,0 +1,444 @@
use crate::soft_f32::{u32_widen_mul, SoftF32};
type F = SoftF32;
type FInt = u32;
pub(crate) const fn div(a: F, b: F) -> F {
const NUMBER_OF_HALF_ITERATIONS: usize = 0;
const NUMBER_OF_FULL_ITERATIONS: usize = 3;
const USE_NATIVE_FULL_ITERATIONS: bool = true;
let one = 1;
let zero = 0;
let hw = F::BITS / 2;
let lo_mask = u32::MAX >> hw;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
#[inline(always)]
const fn negate_u32(a: u32) -> u32 {
(<i32>::wrapping_neg(a as i32)) as u32
}
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent;
let b_exponent = (b_rep >> significand_bits) & max_exponent;
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1)
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1)
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN / anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything / NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs == inf_rep {
// infinity / infinity = NaN
return F::from_repr(qnan_rep);
} else {
// infinity / anything else = +/- infinity
return F::from_repr(a_abs | quotient_sign);
}
}
// anything else / infinity = +/- 0
if b_abs == inf_rep {
return F::from_repr(quotient_sign);
}
if a_abs == zero {
if b_abs == zero {
// zero / zero = NaN
return F::from_repr(qnan_rep);
} else {
// zero / anything else = +/- zero
return F::from_repr(quotient_sign);
}
}
// anything else / zero = +/- infinity
if b_abs == zero {
return F::from_repr(inf_rep | quotient_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale -= exponent;
b_significand = significand;
}
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
a_significand |= implicit_bit;
b_significand |= implicit_bit;
let written_exponent: i32 = (a_exponent
.wrapping_sub(b_exponent)
.wrapping_add(scale as u32))
.wrapping_add(exponent_bias) as i32;
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
// The max error for this approximation is achieved at endpoints, so
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
// which is about 4.5 bits.
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
// Then, refine the reciprocal estimate using a quadratically converging
// Newton-Raphson iteration:
// x_{n+1} = x_n * (2 - x_n * b)
//
// Let b be the original divisor considered "in infinite precision" and
// obtained from IEEE754 representation of function argument (with the
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
// UQ1.(W-1).
//
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
// bits of divisor significand (with the implicit bit set). Corresponds to
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
// version of b_UQ1.
//
// Let e_n := x_n - 1/b_hw
// E_n := x_n - 1/b
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
// = abs(e_n) + (b - b_hw) / (b*b_hw)
// <= abs(e_n) + 2 * 2^-HW
// rep_t-sized iterations may be slower than the corresponding half-width
// variant depending on the handware and whether single/double/quad precision
// is selected.
// NB: Using half-width iterations increases computation errors due to
// rounding, so error estimations have to be computed taking the selected
// mode into account!
#[allow(clippy::absurd_extreme_comparisons)]
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u16 = (b_significand >> (significand_bits + 1 - hw)) as u16;
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504_u32 as u16).wrapping_shl(hw.wrapping_sub(32));
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u16 = {
let mut x_uq0_hw: u16 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
// * C_hw being some (irrational) number **truncated** to W0 bits
// Please note that e_0 is calculated against the infinitely precise
// reciprocal of b_hw (that is, **truncated** version of b).
//
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
// By construction, 1 <= b < 2
// f(x) = x * (2 - b*x) = 2*x - b*x^2
// f'(x) = 2 * (1 - b*x)
//
// On the [0, 1] interval, f(0) = 0,
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
// then it decreses to f(1) = 2 - b
//
// Let g(x) = x - f(x) = b*x^2 - x.
// On (0, 1/b), g(x) < 0 <=> f(x) > x
// On (1/b, 1], g(x) > 0 <=> f(x) < x
//
// For half-width iterations, b_hw is used instead of b.
#[allow(clippy::reversed_empty_ranges)]
let mut idx = 0;
while idx < NUMBER_OF_HALF_ITERATIONS {
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
// of corr_UQ1_hw.
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u16 = 0_u32
.wrapping_sub((x_uq0_hw as u32).wrapping_mul(b_uq1_hw as u32) >> hw)
as u16;
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
// considered to be 0 to be able to represent it in UQ0.HW.
// From the above analysis of f(x), if corr_UQ1_hw would be represented
// without any intermediate loss of precision (that is, in twice_rep_t)
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
// to 1.0 being not representable as UQ0.HW).
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u32).wrapping_mul(corr_uq1_hw as u32) >> (hw - 1)) as u16;
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
// any number of iterations, so just subtract 2 from the reciprocal
// approximation after last iteration.
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
// = 1 - e_n * b_hw + 2*eps1
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
// \------ >0 -------/ \-- >0 ---/
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
idx += 1;
}
// For initial half-width iterations, U = 2^-HW
// Let abs(e_n) <= u_n * U,
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
// Account for possible overflow (see above). For an overflow to occur for the
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u16)
};
// Error estimations for full-precision iterations are calculated just
// as above, but with U := 2^-W and taking extra decrementing into account.
// We need at least one such iteration.
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u32 = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
// <--- higher half ---><--- lower half --->
// [x_UQ0_hw * b_UQ1_hw]
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u32(
(x_uq0_hw as u32) * (b_uq1_hw as u32) + (((x_uq0_hw as u32) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0 = (((x_uq0_hw as u32) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u32) * lo_corr) >> (hw - 1))
.wrapping_sub(2);
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work
// On error estimation:
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
// + (2^-HW + 2^-W))
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
// Then like for the half-width iterations:
// With 0 <= eps1, eps2 < 2^-W
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n
let c = 0x7504F333_u32 << (F::BITS - 32);
let x_uq0 = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32
x_uq0
};
let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS {
let mut idx = 0;
while idx < NUMBER_OF_FULL_ITERATIONS {
let corr_uq1: u32 = 0_u64
.wrapping_sub(((x_uq0 as u64).wrapping_mul(b_uq1 as u64)).wrapping_shr(F::BITS))
as u32;
x_uq0 = (((x_uq0 as u64) * (corr_uq1 as u64)) >> (F::BITS - 1)) as u32;
idx += 1;
}
x_uq0
} else {
// not using native full iterations
x_uq0
};
// Finally, account for possible overflow, as explained above.
x_uq0 = x_uq0.wrapping_sub(2);
// u_n for different precisions (with N-1 half-width iterations):
// W0 is the precision of C
// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
// Estimated with bc:
// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
// u_3 | < 7.31 | | < 7.31 | < 27054456580
// u_4 | | | | < 80.4
// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
// Add 2 to U_N due to final decrement.
let reciprocal_precision: FInt = 10;
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?
let mut quotient: FInt = u32_widen_mul(x_uq0, a_significand << 1).1;
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
// adjust it to be in [1.0, 2.0) as UQ1.SB.
let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) {
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
// effectively doubling its value as well as its error estimation.
let residual_lo = (a_significand << (significand_bits + 1))
.wrapping_sub(quotient.wrapping_mul(b_significand));
a_significand <<= 1;
(residual_lo, written_exponent.wrapping_sub(1))
} else {
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
quotient >>= 1;
let residual_lo =
(a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand));
(residual_lo, written_exponent)
};
//drop mutability
let quotient = quotient;
// NB: residualLo is calculated above for the normal result case.
// It is re-computed on denormal path that is expected to be not so
// performance-sensitive.
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
// Each NextAfter() increments the floating point value by at least 2^-SB
// (more, if exponent was incremented).
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
// q
// | | * | | | | |
// <---> 2^t
// | | | | | * | |
// q
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
// (8*P) * 2^-W < 0.5 * 2^-SB
// P < 2^(W-4-SB)
// Generally, for at most R NextAfter() to be enough,
// P < (2*R - 1) * 2^(W-4-SB)
// For f32 (0+3): 10 < 32 (OK)
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
// For f64: 220 < 256 (OK)
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
// If we have overflowed the exponent, return infinity
if written_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | quotient_sign);
}
// Now, quotient <= the correctly-rounded result
// and may need taking NextAfter() up to 3 times (see error estimates above)
// r = a - b * q
let abs_result = if written_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= ((written_exponent as u32) << significand_bits) as u32;
residual <<= 1;
ret
} else {
if (significand_bits as i32 + written_exponent) < 0 {
return F::from_repr(quotient_sign);
}
let ret = quotient.wrapping_shr(negate_u32(written_exponent as u32) + 1);
residual = (a_significand
.wrapping_shl(significand_bits.wrapping_add(written_exponent as u32))
as u32)
.wrapping_sub((ret.wrapping_mul(b_significand)) << 1);
ret
};
// Round
let abs_result = {
residual += abs_result & one; // tie to even
// The above line conditionally turns the below LT comparison into LTE
if residual > b_significand {
abs_result + one
} else {
abs_result
}
};
F::from_repr(abs_result | quotient_sign)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(10.0).div(SoftF32(5.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,57 @@
use super::SoftF32;
/// Floor (SoftF32)
///
/// Finds the nearest integer less than or equal to `x`.
pub const fn floor(x: SoftF32) -> SoftF32 {
let mut ui = x.to_bits();
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
if e >= 23 {
return x;
}
if e >= 0 {
let m: u32 = 0x007fffff >> e;
if (ui & m) == 0 {
return x;
}
// force_eval!(x + SoftF32::from_bits(0x7b800000));
if ui >> 31 != 0 {
ui += m;
}
ui &= !m;
} else {
// force_eval!(x + SoftF32::from_bits(0x7b800000));
if ui >> 31 == 0 {
ui = 0;
} else if ui << 1 != 0 {
return SoftF32(-1.0);
}
}
SoftF32::from_bits(ui)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(floor(SoftF32(0.5)).0, 0.0);
assert_eq!(floor(SoftF32(1.1)).0, 1.0);
assert_eq!(floor(SoftF32(2.9)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
#[test]
fn spec_tests() {
// Not Asserted: that the current rounding mode has no effect.
assert!(floor(SoftF32(f32::NAN)).0.is_nan());
for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY]
.iter()
.copied()
{
assert_eq!(SoftF32(f).floor().0, f);
}
}
}

View File

@@ -0,0 +1,31 @@
/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::{soft_f32::SoftF32, soft_f64::SoftF64};
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
const C0: SoftF64 = SoftF64(-0.499999997251031003120); /* -0x1ffffffd0c5e81.0p-54 */
const C1: SoftF64 = SoftF64(0.0416666233237390631894); /* 0x155553e1053a42.0p-57 */
const C2: SoftF64 = SoftF64(-0.00138867637746099294692); /* -0x16c087e80f1e27.0p-62 */
const C3: SoftF64 = SoftF64(0.0000243904487962774090654); /* 0x199342e0ee5069.0p-68 */
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn k_cosf(x: SoftF64) -> SoftF32 {
let z = x.mul(x);
let w = z.mul(z);
let r = C2.add(z.mul(C3));
SoftF32((((SoftF64(1.0).add(z.mul(C0))).add(w.mul(C1))).add((w.mul(z)).mul(r))).0 as f32)
}

View File

@@ -0,0 +1,14 @@
use crate::{soft_f32::SoftF32, soft_f64::SoftF64};
/// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_sinf.rs
pub(crate) const fn k_sinf(x: SoftF64) -> SoftF32 {
const S1: SoftF64 = SoftF64(-0.166666666416265235595); /* -0x15555554cbac77.0p-55 */
const S2: SoftF64 = SoftF64(0.0083333293858894631756); /* 0x111110896efbb2.0p-59 */
const S3: SoftF64 = SoftF64(-0.000198393348360966317347); /* -0x1a00f9e2cae774.0p-65 */
const S4: SoftF64 = SoftF64(0.0000027183114939898219064); /* 0x16cd878c3b46a7.0p-71 */
let z = x.mul(x);
let w = z.mul(z);
let r = S3.add(z.mul(S4));
let s = z.mul(x);
SoftF32((x.add(s.mul(S1.add(z.mul(S2))))).add(s.mul(w).mul(r)).0 as f32)
}

View File

@@ -0,0 +1,8 @@
mod k_cosf;
mod k_sinf;
mod rem_pio2f;
pub(crate) use k_cosf::k_cosf;
pub(crate) use k_sinf::k_sinf;
pub(crate) use rem_pio2f::rem_pio2f;

View File

@@ -0,0 +1,65 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::{
soft_f32::SoftF32,
soft_f64::{helpers::rem_pio2_large, SoftF64},
};
const TOINT: SoftF64 = SoftF64(1.5).div(SoftF64(f64::EPSILON));
/// 53 bits of 2/pi
const INV_PIO2: SoftF64 = SoftF64(6.36619772367581382433e-01); /* 0x3FE45F30, 0x6DC9C883 */
/// first 25 bits of pi/2
const PIO2_1: SoftF64 = SoftF64(1.57079631090164184570e+00); /* 0x3FF921FB, 0x50000000 */
/// pi/2 - pio2_1
const PIO2_1T: SoftF64 = SoftF64(1.58932547735281966916e-08); /* 0x3E5110b4, 0x611A6263 */
/// Return the remainder of x rem pi/2 in *y
///
/// use double precision for everything except passing x
/// use __rem_pio2_large() for large x
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn rem_pio2f(x: SoftF32) -> (i32, SoftF64) {
let x64 = SoftF64(x.0 as f64);
let mut tx: [SoftF64; 1] = [SoftF64(0.0)];
let ty: [SoftF64; 1] = [SoftF64(0.0)];
let ix = x.to_bits() & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if ix < 0x4dc90fdb {
/* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
let tmp = x64.mul(INV_PIO2).add(TOINT);
let f_n = tmp.sub(TOINT);
return (f_n.0 as i32, x64.sub(f_n.mul(PIO2_1)).sub(f_n.mul(PIO2_1T)));
}
if ix >= 0x7f800000 {
/* x is inf or NaN */
return (0, x64.sub(x64));
}
/* scale x into [2^23, 2^24-1] */
let sign = (x.to_bits() >> 31) != 0;
let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */
tx[0] = SoftF64(SoftF32::from_bits(ix - (e0 << 23) as u32).0 as f64);
let (n, ty) = rem_pio2_large(&tx, &ty, e0, 0);
if sign {
return (-n, ty[0].neg());
}
(n, ty[0])
}

View File

@@ -0,0 +1,68 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
impl From<f32> for F {
fn from(value: f32) -> Self {
F::from_f32(value)
}
}
impl PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,174 @@
mod helpers;
pub mod add;
pub mod cmp;
pub mod copysign;
pub mod cos;
pub mod div;
pub mod floor;
pub mod mul;
pub mod pow;
pub mod round;
pub mod sin;
pub mod sqrt;
pub mod trunc;
#[cfg(feature = "const_trait_impl")]
pub mod const_impl_trait;
#[cfg(feature = "const_trait_impl")]
pub use const_impl_trait as impl_trait;
#[cfg(not(feature = "const_trait_impl"))]
pub mod impl_trait;
#[derive(Default, Copy, Clone)]
#[repr(transparent)]
pub struct SoftF32(pub f32);
impl SoftF32 {
pub const fn from_f32(a: f32) -> Self {
Self(a)
}
pub const fn to_f32(self) -> f32 {
self.0
}
pub const fn from_bits(a: u32) -> Self {
Self(unsafe { core::mem::transmute(a) })
}
pub const fn to_bits(self) -> u32 {
unsafe { core::mem::transmute(self.0) }
}
pub const fn add(self, rhs: Self) -> Self {
add::add(self, rhs)
}
pub const fn mul(self, rhs: Self) -> Self {
mul::mul(self, rhs)
}
pub const fn div(self, rhs: Self) -> Self {
div::div(self, rhs)
}
pub const fn cmp(self, rhs: Self) -> Option<core::cmp::Ordering> {
cmp::cmp(self, rhs)
}
pub const fn neg(self) -> Self {
Self::from_repr(self.repr() ^ Self::SIGN_MASK)
}
pub const fn sub(self, rhs: Self) -> Self {
self.add(rhs.neg())
}
pub const fn sqrt(self) -> Self {
sqrt::sqrtf(self)
}
pub const fn powi(self, n: i32) -> Self {
pow::pow(self, n)
}
pub const fn copysign(self, other: Self) -> Self {
copysign::copysign(self, other)
}
pub const fn trunc(self) -> Self {
trunc::trunc(self)
}
pub const fn round(self) -> Self {
round::round(self)
}
pub const fn floor(self) -> Self {
floor::floor(self)
}
pub const fn sin(self) -> Self {
sin::sinf(self)
}
pub const fn cos(self) -> Self {
cos::cos(self)
}
}
type SelfInt = u32;
type SelfSignedInt = i32;
type SelfExpInt = i16;
#[allow(unused)]
impl SoftF32 {
const ZERO: Self = Self(0.0);
const ONE: Self = Self(1.0);
const BITS: u32 = 32;
const SIGNIFICAND_BITS: u32 = 23;
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
const SIGN_MASK: SelfInt = 1 << (Self::BITS - 1);
const SIGNIFICAND_MASK: SelfInt = (1 << Self::SIGNIFICAND_BITS) - 1;
const IMPLICIT_BIT: SelfInt = 1 << Self::SIGNIFICAND_BITS;
const EXPONENT_MASK: SelfInt = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
const fn repr(self) -> SelfInt {
self.to_bits()
}
const fn signed_repr(self) -> SelfSignedInt {
self.to_bits() as SelfSignedInt
}
const fn sign(self) -> bool {
self.signed_repr() < 0
}
const fn exp(self) -> SelfExpInt {
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as SelfExpInt
}
const fn frac(self) -> SelfInt {
self.to_bits() & Self::SIGNIFICAND_MASK
}
const fn imp_frac(self) -> SelfInt {
self.frac() | Self::IMPLICIT_BIT
}
const fn from_repr(a: SelfInt) -> Self {
Self::from_bits(a)
}
const fn from_parts(sign: bool, exponent: SelfInt, significand: SelfInt) -> Self {
Self::from_repr(
((sign as SelfInt) << (Self::BITS - 1))
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
| (significand & Self::SIGNIFICAND_MASK),
)
}
const fn normalize(significand: SelfInt) -> (i32, SelfInt) {
let shift = significand
.leading_zeros()
.wrapping_sub((1u32 << Self::SIGNIFICAND_BITS).leading_zeros());
(
1i32.wrapping_sub(shift as i32),
significand << shift as SelfInt,
)
}
const fn is_subnormal(self) -> bool {
(self.repr() & Self::EXPONENT_MASK) == 0
}
}
const fn u64_lo(x: u64) -> u32 {
x as u32
}
const fn u64_hi(x: u64) -> u32 {
(x >> 32) as u32
}
const fn u32_widen_mul(a: u32, b: u32) -> (u32, u32) {
let x = u64::wrapping_mul(a as _, b as _);
(u64_lo(x), u64_hi(x))
}

View File

@@ -0,0 +1,194 @@
use crate::soft_f32::{u32_widen_mul, SoftF32};
type F = SoftF32;
type FInt = u32;
const fn widen_mul(a: FInt, b: FInt) -> (FInt, FInt) {
u32_widen_mul(a, b)
}
pub(crate) const fn mul(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let exponent_bits = F::EXPONENT_BITS;
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as FInt;
let b_exponent = (b_rep >> significand_bits) & max_exponent as FInt;
let product_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(a_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
if b_abs == inf_rep {
if a_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(b_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
// zero * anything = +/- zero
if a_abs == zero {
return F::from_repr(product_sign);
}
// anything * zero = +/- zero
if b_abs == zero {
return F::from_repr(product_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale += exponent;
b_significand = significand;
}
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
let (mut product_low, mut product_high) =
widen_mul(a_significand, b_significand << exponent_bits);
let a_exponent_i32: i32 = a_exponent as _;
let b_exponent_i32: i32 = b_exponent as _;
let mut product_exponent: i32 = a_exponent_i32
.wrapping_add(b_exponent_i32)
.wrapping_add(scale)
.wrapping_sub(exponent_bias as i32);
// Normalize the significand, adjust exponent if needed.
if (product_high & implicit_bit) != zero {
product_exponent = product_exponent.wrapping_add(1);
} else {
product_high = (product_high << 1) | (product_low >> (bits - 1));
product_low <<= 1;
}
// If we have overflowed the type, return +/- infinity.
if product_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | product_sign);
}
if product_exponent <= 0 {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
let shift = one.wrapping_sub(product_exponent as FInt) as u32;
if shift >= bits {
return F::from_repr(product_sign);
}
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of productLo.
if shift < bits {
let sticky = product_low << (bits - shift);
product_low = product_high << (bits - shift) | product_low >> shift | sticky;
product_high >>= shift;
} else if shift < (2 * bits) {
let sticky = product_high << (2 * bits - shift) | product_low;
product_low = product_high >> (shift - bits) | sticky;
product_high = zero;
} else {
product_high = zero;
}
} else {
// Result is normal before rounding; insert the exponent.
product_high &= significand_mask;
product_high |= (product_exponent as FInt) << significand_bits;
}
// Insert the sign of the result:
product_high |= product_sign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if product_low > sign_bit {
product_high += one;
}
if product_low == sign_bit {
product_high += product_high & one;
}
F::from_repr(product_high)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(2.0).mul(SoftF32(2.0)).0, 4.0)
}
}

View File

@@ -0,0 +1,37 @@
use crate::abs_diff;
use crate::soft_f32::SoftF32;
type F = SoftF32;
pub(crate) const fn pow(a: F, b: i32) -> F {
let mut a = a;
let recip = b < 0;
let mut pow = abs_diff(b, 0);
let mut mul = F::ONE;
loop {
if (pow & 1) != 0 {
mul = mul.mul(a);
}
pow >>= 1;
if pow == 0 {
break;
}
a = a.mul(a);
}
if recip {
F::ONE.div(mul)
} else {
mul
}
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(2.0).powi(2).0, 4.0)
}
}

View File

@@ -0,0 +1,31 @@
use super::SoftF32;
pub(crate) const fn round(x: SoftF32) -> SoftF32 {
SoftF32::trunc(x.add(SoftF32::copysign(
SoftF32(0.5).sub(SoftF32(0.25).mul(SoftF32(f32::EPSILON))),
x,
)))
}
#[cfg(test)]
mod tests {
use super::SoftF32;
#[test]
fn negative_zero() {
assert_eq!(
SoftF32::round(SoftF32(-0.0)).to_bits(),
SoftF32(-0.0).to_bits()
);
}
#[test]
fn sanity_check() {
assert_eq!((SoftF32(-1.0)).round().0, -1.0);
assert_eq!((SoftF32(2.8)).round().0, 3.0);
assert_eq!((SoftF32(-0.5)).round().0, -1.0);
assert_eq!((SoftF32(0.5)).round().0, 1.0);
assert_eq!((SoftF32(-1.5)).round().0, -2.0);
assert_eq!((SoftF32(1.5)).round().0, 2.0);
}
}

View File

@@ -0,0 +1,115 @@
/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use core::f64::consts::FRAC_PI_2;
use crate::soft_f64::SoftF64;
use super::{
helpers::{k_cosf, k_sinf, rem_pio2f},
SoftF32,
};
/* Small multiples of pi/2 rounded to double precision. */
const S1_PIO2: SoftF64 = SoftF64(1.).mul(SoftF64(FRAC_PI_2)); /* 0x3FF921FB, 0x54442D18 */
const S2_PIO2: SoftF64 = SoftF64(2.).mul(SoftF64(FRAC_PI_2)); /* 0x400921FB, 0x54442D18 */
const S3_PIO2: SoftF64 = SoftF64(3.).mul(SoftF64(FRAC_PI_2)); /* 0x4012D97C, 0x7F3321D2 */
const S4_PIO2: SoftF64 = SoftF64(4.).mul(SoftF64(FRAC_PI_2)); /* 0x401921FB, 0x54442D18 */
pub const fn sinf(x: SoftF32) -> SoftF32 {
let x64 = SoftF64(x.0 as f64);
let x1p120 = SoftF32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
if ix < 0x00800000 {
let _ = x.div(x1p120);
} else {
let _ = x.add(x1p120);
};
return x;
}
return k_sinf(x64);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix <= 0x4016cbe3 {
/* |x| ~<= 3pi/4 */
if sign {
return k_cosf(x64.add(S1_PIO2)).neg();
} else {
return k_cosf(x64.sub(S1_PIO2));
}
}
return k_sinf(if sign {
x64.add(S2_PIO2).neg()
} else {
x64.sub(S2_PIO2).neg()
});
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix <= 0x40afeddf {
/* |x| ~<= 7*pi/4 */
if sign {
return k_cosf(x64.add(S3_PIO2));
} else {
return k_cosf(x64.sub(S3_PIO2)).neg();
}
}
return k_sinf(if sign {
x64.add(S4_PIO2)
} else {
x64.sub(S4_PIO2)
});
}
/* sin(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x.sub(x);
}
/* general argument reduction needed */
let (n, y) = rem_pio2f(x);
match n & 3 {
0 => k_sinf(y),
1 => k_cosf(y),
2 => k_sinf(y.neg()),
_ => k_cosf(y).neg(),
}
}
#[cfg(test)]
mod test {
use core::f32::consts::{FRAC_2_PI, FRAC_PI_2, FRAC_PI_3, PI};
use super::*;
#[test]
fn test_basic() {
for val in [0.0, FRAC_PI_3, FRAC_PI_2, PI, FRAC_2_PI] {
assert_eq!(SoftF32(val).sin().to_f32(), val.sin())
}
}
}

View File

@@ -0,0 +1,137 @@
/* origin: Rust libm https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/sqrtf.rs */
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
use crate::soft_f32::SoftF32;
use core::cmp::Ordering;
pub(crate) const fn sqrtf(x: SoftF32) -> SoftF32 {
const TINY: SoftF32 = SoftF32(1.0e-30);
let mut z: SoftF32;
let sign: i32 = 0x80000000_u32 as i32;
let mut ix: i32;
let mut s: i32;
let mut q: i32;
let mut m: i32;
let mut t: i32;
let mut i: i32;
let mut r: u32;
ix = x.to_bits() as i32;
/* take care of Inf and NaN */
if (ix as u32 & 0x7f800000) == 0x7f800000 {
return x.mul(x).add(x); /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
}
/* take care of zero */
if ix <= 0 {
if (ix & !sign) == 0 {
return x; /* sqrt(+-0) = +-0 */
}
if ix < 0 {
return (x.sub(x)).div(x.sub(x)); /* sqrt(-ve) = sNaN */
}
}
/* normalize x */
m = ix >> 23;
if m == 0 {
/* subnormal x */
i = 0;
while ix & 0x00800000 == 0 {
ix <<= 1;
i = i + 1;
}
m -= i - 1;
}
m -= 127; /* unbias exponent */
ix = (ix & 0x007fffff) | 0x00800000;
if m & 1 == 1 {
/* odd m, double x to make it even */
ix += ix;
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = 0;
s = 0;
r = 0x01000000; /* r = moving bit from right to left */
while r != 0 {
t = s + r as i32;
if t <= ix {
s = t + r as i32;
ix -= t;
q += r as i32;
}
ix += ix;
r >>= 1;
}
/* use floating add to find out rounding direction */
if ix != 0 {
z = SoftF32(1.0).sub(TINY); /* raise inexact flag */
if ge(z, 1.0) {
z = SoftF32(1.0).add(TINY);
if gt(z, 1.0) {
q += 2;
} else {
q += q & 1;
}
}
}
ix = (q >> 1) + 0x3f000000;
ix += m << 23;
SoftF32::from_bits(ix as u32)
}
const fn gt(l: SoftF32, r: f32) -> bool {
if let Some(ord) = l.cmp(SoftF32(r)) {
match ord {
Ordering::Greater => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
const fn ge(l: SoftF32, r: f32) -> bool {
if let Some(ord) = l.cmp(SoftF32(r)) {
match ord {
Ordering::Less => false,
_ => true,
}
} else {
panic!("Failed to compare values");
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::f32::*;
#[test]
fn sanity_check() {
assert_eq!(sqrtf(SoftF32(100.0)).0, 10.0);
assert_eq!(sqrtf(SoftF32(4.0)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
#[test]
fn spec_tests() {
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrtf(SoftF32(-1.0)).0.is_nan());
assert!(sqrtf(SoftF32(NAN)).0.is_nan());
for f in [0.0, -0.0, INFINITY].iter().copied() {
assert_eq!(sqrtf(SoftF32(f)).0, f);
}
}
}

View File

@@ -0,0 +1,30 @@
use super::SoftF32;
pub(crate) const fn trunc(x: SoftF32) -> SoftF32 {
let mut i: u32 = x.to_bits();
let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9;
let m: u32;
if e >= 23 + 9 {
return x;
}
if e < 9 {
e = 1;
}
m = -1i32 as u32 >> e;
if (i & m) == 0 {
return x;
}
i &= !m;
SoftF32::from_bits(i)
}
#[cfg(test)]
mod tests {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(super::trunc(SoftF32(1.1)).0, 1.0);
}
}

View File

@@ -0,0 +1,193 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
type FInt = u64;
pub(crate) const fn add(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS as FInt;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let mut a_rep = a.repr();
let mut b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// Detect if a or b is zero, infinity, or NaN.
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_abs | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_abs | quiet_bit);
}
if a_abs == inf_rep {
// +/-infinity + -/+infinity = qNaN
if (a.repr() ^ b.repr()) == sign_bit {
return F::from_repr(qnan_rep);
} else {
// +/-infinity + anything remaining = +/- infinity
return a;
}
}
// anything remaining + +/-infinity = +/-infinity
if b_abs == inf_rep {
return b;
}
// zero + anything = anything
if a_abs == 0 {
// but we need to get the sign right for zero + zero
if b_abs == 0 {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}
// anything + zero = anything
if b_abs == 0 {
return a;
}
}
// Swap a and b if necessary so that a has the larger absolute value.
if b_abs > a_abs {
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
let tmp = a_rep;
a_rep = b_rep;
b_rep = tmp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits) as _;
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits) as _;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// normalize any denormals, and adjust the exponent accordingly.
if a_exponent == 0 {
let (exponent, significand) = F::normalize(a_significand);
a_exponent = exponent;
a_significand = significand;
}
if b_exponent == 0 {
let (exponent, significand) = F::normalize(b_significand);
b_exponent = exponent;
b_significand = significand;
}
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
let result_sign = a_rep & sign_bit;
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize(), but setting it twice won't hurt
// anything.)
a_significand = (a_significand | implicit_bit) << 3;
b_significand = (b_significand | implicit_bit) << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent) as _;
if align != 0 {
if align < bits {
let sticky = (b_significand << bits.wrapping_sub(align) != 0) as FInt;
b_significand = (b_significand >> align) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
}
}
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == 0 {
return F::from_repr(0);
}
// If partial cancellation occured, we need to left-shift the result
// and adjust the exponent:
if a_significand < implicit_bit << 3 {
let shift =
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
a_significand <<= shift;
a_exponent -= shift;
}
} else {
// addition
a_significand += b_significand;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != 0 {
let sticky = (a_significand & one != 0) as FInt;
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if a_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | result_sign);
}
if a_exponent <= 0 {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
let shift = (1 - a_exponent) as _;
let sticky = ((a_significand << bits.wrapping_sub(shift)) != 0) as FInt;
a_significand = a_significand >> shift | sticky;
a_exponent = 0;
}
// Low three bits are round, guard, and sticky.
let a_significand_i32: i32 = a_significand as _;
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
// Shift the significand into place, and mask off the implicit bit.
let mut result = a_significand >> 3 & significand_mask;
// Insert the exponent and sign.
result |= (a_exponent as FInt) << significand_bits;
result |= result_sign;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if round_guard_sticky > 0x4 {
result += one;
}
if round_guard_sticky == 0x4 {
result += result & one;
}
F::from_repr(result)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(1.0).add(SoftF64(1.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,63 @@
use crate::soft_f64::SoftF64;
use core::cmp::Ordering;
type F = SoftF64;
type FInt = u64;
type FSignedInt = i64;
const UNORDERED: Option<Ordering> = None;
const EQUAL: Option<Ordering> = Some(Ordering::Equal);
const GREATER: Option<Ordering> = Some(Ordering::Greater);
const LESS: Option<Ordering> = Some(Ordering::Less);
pub(crate) const fn cmp(a: F, b: F) -> Option<Ordering> {
let one: FInt = 1;
let zero: FInt = 0;
let szero: FSignedInt = 0;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let a_rep = a.repr();
let b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// If either a or b is NaN, they are unordered.
if a_abs > inf_rep || b_abs > inf_rep {
return UNORDERED;
}
// If a and b are both zeros, they are equal.
if a_abs | b_abs == zero {
return EQUAL;
}
let a_srep = a.signed_repr();
let b_srep = b.signed_repr();
// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if a_srep & b_srep >= szero {
if a_srep < b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
} else if a_srep > b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
}

View File

@@ -0,0 +1,84 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
impl const From<f64> for F {
fn from(value: f64) -> Self {
F::from_f64(value)
}
}
impl const PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl const PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl const core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl const core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl const core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl const core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,13 @@
use super::SoftF64;
/// Sign of Y, magnitude of X (f64)
///
/// Constructs a number with the magnitude (absolute value) of its
/// first argument, `x`, and the sign of its second argument, `y`.
pub(crate) const fn copysign(x: SoftF64, y: SoftF64) -> SoftF64 {
let mut ux = x.to_bits();
let uy = y.to_bits();
ux &= (!0) >> 1;
ux |= uy & (1 << 63);
SoftF64::from_bits(ux)
}

View File

@@ -0,0 +1,86 @@
// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/cos.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use super::{
helpers::{k_cos, k_sin, rem_pio2},
SoftF64,
};
// cos(x)
// Return cosine function of x.
//
// kernel function:
// k_sin ... sine function on [-pi/4,pi/4]
// k_cos ... cosine function on [-pi/4,pi/4]
// rem_pio2 ... argument reduction routine
//
// Method.
// Let S,C and T denote the sin, cos and tan respectively on
// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
// in [-pi/4 , +pi/4], and let n = k mod 4.
// We have
//
// n sin(x) cos(x) tan(x)
// ----------------------------------------------------------
// 0 S C T
// 1 C -S -1/T
// 2 -S -C T
// 3 -C S -1/T
// ----------------------------------------------------------
//
// Special cases:
// Let trig be any of sin, cos, or tan.
// trig(+-INF) is NaN, with signals;
// trig(NaN) is that NaN;
//
// Accuracy:
// TRIG(x) returns trig(x) nearly rounded
//
pub(crate) const fn cos(x: SoftF64) -> SoftF64 {
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
/* |x| ~< pi/4 */
if ix <= 0x3fe921fb {
if ix < 0x3e46a09e {
/* if x < 2**-27 * sqrt(2) */
/* raise inexact if x != 0 */
if x.0 as i32 == 0 {
return SoftF64::ONE;
}
}
return k_cos(x, SoftF64::ZERO);
}
/* cos(Inf or NaN) is NaN */
if ix >= 0x7ff00000 {
return x.sub(x);
}
/* argument reduction needed */
let (n, y0, y1) = rem_pio2(x);
match n & 3 {
0 => k_cos(y0, y1),
1 => k_sin(y0, y1, 1).neg(),
2 => k_cos(y0, y1).neg(),
_ => k_sin(y0, y1, 1),
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn test_large_neg() {
assert_eq!(SoftF64(-1647101.0).cos().to_f64(), (-1647101.0_f64).cos())
}
}

View File

@@ -0,0 +1,439 @@
use crate::soft_f64::{u64_widen_mul, SoftF64};
type F = SoftF64;
type FInt = u64;
pub(crate) const fn div(a: F, b: F) -> F {
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
const USE_NATIVE_FULL_ITERATIONS: bool = false;
let one = 1;
let zero = 0;
let hw = F::BITS / 2;
let lo_mask = u64::MAX >> hw;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
#[inline(always)]
const fn negate_u64(a: u64) -> u64 {
(<i64>::wrapping_neg(a as i64)) as u64
}
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as u64;
let b_exponent = (b_rep >> significand_bits) & max_exponent as u64;
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as u64
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as u64
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN / anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything / NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs == inf_rep {
// infinity / infinity = NaN
return F::from_repr(qnan_rep);
} else {
// infinity / anything else = +/- infinity
return F::from_repr(a_abs | quotient_sign);
}
}
// anything else / infinity = +/- 0
if b_abs == inf_rep {
return F::from_repr(quotient_sign);
}
if a_abs == zero {
if b_abs == zero {
// zero / zero = NaN
return F::from_repr(qnan_rep);
} else {
// zero / anything else = +/- zero
return F::from_repr(quotient_sign);
}
}
// anything else / zero = +/- infinity
if b_abs == zero {
return F::from_repr(inf_rep | quotient_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale -= exponent;
b_significand = significand;
}
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
a_significand |= implicit_bit;
b_significand |= implicit_bit;
let written_exponent: i64 = a_exponent
.wrapping_sub(b_exponent)
.wrapping_add(scale as u64)
.wrapping_add(exponent_bias as u64) as i64;
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
// The max error for this approximation is achieved at endpoints, so
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
// which is about 4.5 bits.
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
// Then, refine the reciprocal estimate using a quadratically converging
// Newton-Raphson iteration:
// x_{n+1} = x_n * (2 - x_n * b)
//
// Let b be the original divisor considered "in infinite precision" and
// obtained from IEEE754 representation of function argument (with the
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
// UQ1.(W-1).
//
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
// bits of divisor significand (with the implicit bit set). Corresponds to
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
// version of b_UQ1.
//
// Let e_n := x_n - 1/b_hw
// E_n := x_n - 1/b
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
// = abs(e_n) + (b - b_hw) / (b*b_hw)
// <= abs(e_n) + 2 * 2^-HW
// rep_t-sized iterations may be slower than the corresponding half-width
// variant depending on the handware and whether single/double/quad precision
// is selected.
// NB: Using half-width iterations increases computation errors due to
// rounding, so error estimations have to be computed taking the selected
// mode into account!
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u32 = (b_significand >> (significand_bits + 1 - hw)) as u32;
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u32 = {
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// dbg!(x_uq0_hw);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
// * C_hw being some (irrational) number **truncated** to W0 bits
// Please note that e_0 is calculated against the infinitely precise
// reciprocal of b_hw (that is, **truncated** version of b).
//
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
// By construction, 1 <= b < 2
// f(x) = x * (2 - b*x) = 2*x - b*x^2
// f'(x) = 2 * (1 - b*x)
//
// On the [0, 1] interval, f(0) = 0,
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
// then it decreses to f(1) = 2 - b
//
// Let g(x) = x - f(x) = b*x^2 - x.
// On (0, 1/b), g(x) < 0 <=> f(x) > x
// On (1/b, 1], g(x) > 0 <=> f(x) < x
//
// For half-width iterations, b_hw is used instead of b.
let mut idx = 0;
while idx < NUMBER_OF_HALF_ITERATIONS {
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
// of corr_UQ1_hw.
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u32 = 0_u64
.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw)
as u32;
// dbg!(corr_uq1_hw);
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
// considered to be 0 to be able to represent it in UQ0.HW.
// From the above analysis of f(x), if corr_UQ1_hw would be represented
// without any intermediate loss of precision (that is, in twice_rep_t)
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
// to 1.0 being not representable as UQ0.HW).
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
// dbg!(x_uq0_hw);
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
// any number of iterations, so just subtract 2 from the reciprocal
// approximation after last iteration.
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
// = 1 - e_n * b_hw + 2*eps1
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
// \------ >0 -------/ \-- >0 ---/
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
idx += 1;
}
// For initial half-width iterations, U = 2^-HW
// Let abs(e_n) <= u_n * U,
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
// Account for possible overflow (see above). For an overflow to occur for the
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u32)
};
// Error estimations for full-precision iterations are calculated just
// as above, but with U := 2^-W and taking extra decrementing into account.
// We need at least one such iteration.
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u64 = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
// <--- higher half ---><--- lower half --->
// [x_UQ0_hw * b_UQ1_hw]
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u64(
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: FInt = (((x_uq0_hw as u64) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
.wrapping_sub(2); // 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work
// On error estimation:
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
// + (2^-HW + 2^-W))
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
// Then like for the half-width iterations:
// With 0 <= eps1, eps2 < 2^-W
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: FInt = 0x7504F333 << (F::BITS - 32);
let x_uq0: FInt = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
x_uq0
};
let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS {
let mut idx = 0;
while idx < NUMBER_OF_FULL_ITERATIONS {
let corr_uq1: u64 = 0_u64.wrapping_sub((x_uq0 * b_uq1) >> F::BITS);
x_uq0 = (((x_uq0 as u128) * (corr_uq1 as u128)) >> (F::BITS - 1)) as u64;
idx += 1;
}
x_uq0
} else {
// not using native full iterations
x_uq0
};
// Finally, account for possible overflow, as explained above.
x_uq0 = x_uq0.wrapping_sub(2);
// u_n for different precisions (with N-1 half-width iterations):
// W0 is the precision of C
// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
// Estimated with bc:
// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
// u_3 | < 7.31 | | < 7.31 | < 27054456580
// u_4 | | | | < 80.4
// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
// Add 2 to U_N due to final decrement.
let reciprocal_precision: FInt = 220;
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?
let mut quotient: FInt = u64_widen_mul(x_uq0, a_significand << 1).1;
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
// adjust it to be in [1.0, 2.0) as UQ1.SB.
let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) {
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
// effectively doubling its value as well as its error estimation.
let residual_lo = (a_significand << (significand_bits + 1))
.wrapping_sub(quotient.wrapping_mul(b_significand));
a_significand <<= 1;
(residual_lo, written_exponent.wrapping_sub(1))
} else {
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
quotient >>= 1;
let residual_lo =
(a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand));
(residual_lo, written_exponent)
};
//drop mutability
let quotient = quotient;
// NB: residualLo is calculated above for the normal result case.
// It is re-computed on denormal path that is expected to be not so
// performance-sensitive.
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
// Each NextAfter() increments the floating point value by at least 2^-SB
// (more, if exponent was incremented).
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
// q
// | | * | | | | |
// <---> 2^t
// | | | | | * | |
// q
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
// (8*P) * 2^-W < 0.5 * 2^-SB
// P < 2^(W-4-SB)
// Generally, for at most R NextAfter() to be enough,
// P < (2*R - 1) * 2^(W-4-SB)
// For f32 (0+3): 10 < 32 (OK)
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
// For f64: 220 < 256 (OK)
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
// If we have overflowed the exponent, return infinity
if written_exponent >= max_exponent as i64 {
return F::from_repr(inf_rep | quotient_sign);
}
// Now, quotient <= the correctly-rounded result
// and may need taking NextAfter() up to 3 times (see error estimates above)
// r = a - b * q
let abs_result = if written_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= (written_exponent as u64) << significand_bits;
residual <<= 1;
ret
} else {
if (significand_bits as i64 + written_exponent) < 0 {
return F::from_repr(quotient_sign);
}
let ret = quotient.wrapping_shr((negate_u64(written_exponent as u64) + 1) as u32);
residual = a_significand
.wrapping_shl(significand_bits.wrapping_add(written_exponent as u32))
.wrapping_sub(((ret).wrapping_mul(b_significand)) << 1);
ret
};
// Round
let abs_result = {
residual += abs_result & one; // tie to even
// conditionally turns the below LT comparison into LTE
if residual > b_significand {
abs_result + one
} else {
abs_result
}
};
F::from_repr(abs_result | quotient_sign)
}
#[cfg(test)]
mod test {
use super::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(10.0).div(SoftF64(5.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,61 @@
use super::{
helpers::{eq, gt},
SoftF64,
};
const TOINT: SoftF64 = SoftF64(1.0).div(SoftF64(f64::EPSILON));
/// Floor (f64)
///
/// Finds the nearest integer less than or equal to `x`.
pub const fn floor(x: SoftF64) -> SoftF64 {
let ui = x.to_bits();
let e = ((ui >> 52) & 0x7ff) as i32;
if (e >= 0x3ff + 52) || eq(x, SoftF64::ZERO) {
return x;
}
/* y = int(x) - x, where int(x) is an integer neighbor of x */
let y = if (ui >> 63) != 0 {
x.sub(TOINT).add(TOINT).sub(x)
} else {
x.add(TOINT).sub(TOINT).sub(x)
};
/* special case because of non-nearest rounding modes */
if e < 0x3ff {
return if (ui >> 63) != 0 {
SoftF64(-1.0)
} else {
SoftF64::ZERO
};
}
if gt(y, SoftF64::ZERO) {
x.add(y).sub(SoftF64::ONE)
} else {
x.add(y)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(floor(SoftF64(1.1)).0, 1.0);
assert_eq!(floor(SoftF64(2.9)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
#[test]
fn spec_tests() {
// Not Asserted: that the current rounding mode has no effect.
assert!(floor(SoftF64(f64::NAN)).0.is_nan());
for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY]
.iter()
.copied()
{
assert_eq!(floor(SoftF64(f)).0, f);
}
}
}

View File

@@ -0,0 +1,36 @@
use core::cmp::Ordering;
use crate::soft_f64::SoftF64;
pub(crate) const fn eq(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Equal => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
pub(crate) const fn gt(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Greater => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
pub(crate) const fn ge(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Less => false,
_ => true,
}
} else {
panic!("Failed to compare values");
}
}

View File

@@ -0,0 +1,66 @@
// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_cos.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunSoft, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use crate::soft_f64::SoftF64;
const C1: SoftF64 = SoftF64(4.16666666666666019037e-02); /* 0x3FA55555, 0x5555554C */
const C2: SoftF64 = SoftF64(-1.38888888888741095749e-03); /* 0xBF56C16C, 0x16C15177 */
const C3: SoftF64 = SoftF64(2.48015872894767294178e-05); /* 0x3EFA01A0, 0x19CB1590 */
const C4: SoftF64 = SoftF64(-2.75573143513906633035e-07); /* 0xBE927E4F, 0x809C52AD */
const C5: SoftF64 = SoftF64(2.08757232129817482790e-09); /* 0x3E21EE9E, 0xBDB4B1C4 */
const C6: SoftF64 = SoftF64(-1.13596475577881948265e-11); /* 0xBDA8FAE9, 0xBE8838D4 */
// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
//
// Algorithm
// 1. Since cos(-x) = cos(x), we need only to consider positive x.
// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
// 3. cos(x) is approximated by a polynomial of degree 14 on
// [0,pi/4]
// 4 14
// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
// where the remez error is
//
// | 2 4 6 8 10 12 14 | -58
// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
// | |
//
// 4 6 8 10 12 14
// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
// cos(x) ~ 1 - x*x/2 + r
// since cos(x+y) ~ cos(x) - sin(x)*y
// ~ cos(x) - x*y,
// a correction term is necessary in cos(x) and hence
// cos(x+y) = 1 - (x*x/2 - (r - x*y))
// For better accuracy, rearrange to
// cos(x+y) ~ w + (tmp + (r-x*y))
// where w = 1 - x*x/2 and tmp is a tiny correction term
// (1 - x*x/2 == w + tmp exactly in infinite precision).
// The exactness of w + tmp in infinite precision depends on w
// and tmp having the same precision as x. If they have extra
// precision due to compiler bugs, then the extra precision is
// only good provided it is retained in all terms of the final
// expression for cos(). Retention happens in all cases tested
// under FreeBSD, so don't pessimize things by forcibly clipping
// any extra precision in w.
pub(crate) const fn k_cos(x: SoftF64, y: SoftF64) -> SoftF64 {
let z = x.mul(x);
let w = z.mul(z);
let r = z
.mul(C1.add(z.mul(C2.add(z.mul(C3)))))
.add(w.mul(w.mul(C4.add(z.mul(C5.add(z.mul(C6)))))));
let hz = SoftF64(0.5).mul(z);
let w = SoftF64::ZERO.sub(hz);
w.add(((SoftF64::ONE.sub(w)).sub(hz)).add(z.mul(r).sub(x.mul(y))))
}

View File

@@ -0,0 +1,62 @@
// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_sin.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunSoft, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use crate::soft_f64::SoftF64;
const S1: SoftF64 = SoftF64(-1.66666666666666324348e-01); /* 0xBFC55555, 0x55555549 */
const S2: SoftF64 = SoftF64(8.33333333332248946124e-03); /* 0x3F811111, 0x1110F8A6 */
const S3: SoftF64 = SoftF64(-1.98412698298579493134e-04); /* 0xBF2A01A0, 0x19C161D5 */
const S4: SoftF64 = SoftF64(2.75573137070700676789e-06); /* 0x3EC71DE3, 0x57B1FE7D */
const S5: SoftF64 = SoftF64(-2.50507602534068634195e-08); /* 0xBE5AE5E6, 0x8A2B9CEB */
const S6: SoftF64 = SoftF64(1.58969099521155010221e-10); /* 0x3DE5D93A, 0x5ACFD57C */
// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
//
// Algorithm
// 1. Since sin(-x) = -sin(x), we need only to consider positive x.
// 2. Callers must return sin(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization sin(x) ~ x for tiny x.
// 3. sin(x) is approximated by a polynomial of degree 13 on
// [0,pi/4]
// 3 13
// sin(x) ~ x + S1*x + ... + S6*x
// where
//
// |sin(x) 2 4 6 8 10 12 | -58
// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
// | x |
//
// 4. sin(x+y) = sin(x) + sin'(x')*y
// ~ sin(x) + (1-x*x/2)*y
// For better accuracy, let
// 3 2 2 2 2
// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
// then 3 2
// sin(x) = x + (S1*x + (x *(r-y/2)+y))
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn k_sin(x: SoftF64, y: SoftF64, iy: i32) -> SoftF64 {
let z = x.mul(x);
let w = z.mul(z);
let r = S2
.add(z.mul(S3.add(z.mul(S4))))
.add(z.mul(w.mul(S5.add(z.mul(S6)))));
let v = z.mul(x);
if iy == 0 {
x.add(v.mul(S1.add(z.mul(r))))
} else {
x.sub((z.mul(SoftF64(0.5).mul(y).sub(v.mul(r))).sub(y)).sub(v.mul(S1)))
}
}

View File

@@ -0,0 +1,13 @@
mod cmp;
mod k_cos;
mod k_sin;
mod rem_pio2;
mod rem_pio2_large;
mod scalbn;
pub(crate) use cmp::{eq, ge, gt};
pub(crate) use k_cos::k_cos;
pub(crate) use k_sin::k_sin;
pub(crate) use rem_pio2::rem_pio2;
pub(crate) use rem_pio2_large::rem_pio2_large;
pub(crate) use scalbn::scalbn;

View File

@@ -0,0 +1,241 @@
use crate::soft_f64::{helpers::eq, SoftF64};
// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
// Optimized by Bruce D. Evans. */
use super::rem_pio2_large;
// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
// #define EPS DBL_EPSILON
const EPS: SoftF64 = SoftF64(2.2204460492503131e-16);
// #elif FLT_EVAL_METHOD==2
// #define EPS LDBL_EPSILON
// #endif
// TODO: Support FLT_EVAL_METHOD?
const TO_INT: SoftF64 = SoftF64(1.5).div(EPS);
/// 53 bits of 2/pi
const INV_PIO2: SoftF64 = SoftF64(6.36619772367581382433e-01); /* 0x3FE45F30, 0x6DC9C883 */
/// first 33 bits of pi/2
const PIO2_1: SoftF64 = SoftF64(1.57079632673412561417e+00); /* 0x3FF921FB, 0x54400000 */
/// pi/2 - PIO2_1
const PIO2_1T: SoftF64 = SoftF64(6.07710050650619224932e-11); /* 0x3DD0B461, 0x1A626331 */
/// second 33 bits of pi/2
const PIO2_2: SoftF64 = SoftF64(6.07710050630396597660e-11); /* 0x3DD0B461, 0x1A600000 */
/// pi/2 - (PIO2_1+PIO2_2)
const PIO2_2T: SoftF64 = SoftF64(2.02226624879595063154e-21); /* 0x3BA3198A, 0x2E037073 */
/// third 33 bits of pi/2
const PIO2_3: SoftF64 = SoftF64(2.02226624871116645580e-21); /* 0x3BA3198A, 0x2E000000 */
/// pi/2 - (PIO2_1+PIO2_2+PIO2_3)
const PIO2_3T: SoftF64 = SoftF64(8.47842766036889956997e-32); /* 0x397B839A, 0x252049C1 */
// return the remainder of x rem pi/2 in y[0]+y[1]
// use rem_pio2_large() for large x
//
// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
pub(crate) const fn rem_pio2(x: SoftF64) -> (i32, SoftF64, SoftF64) {
let x1p24 = SoftF64::from_bits(0x4170000000000000);
let sign = (SoftF64::to_bits(x) >> 63) as i32;
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
const fn medium(x: SoftF64, ix: u32) -> (i32, SoftF64, SoftF64) {
/* rint(x/(pi/2)), Assume round-to-nearest. */
let tmp = SoftF64(x.0 as f64).mul(INV_PIO2).add(TO_INT);
// force rounding of tmp to it's storage format on x87 to avoid
// excess precision issues.
let f_n = tmp.sub(TO_INT);
let n = f_n.0 as i32;
let mut r = x.sub(f_n.mul(PIO2_1));
let mut w = f_n.mul(PIO2_1T); /* 1st round, good to 85 bits */
let mut y0 = r.sub(w);
let ui = SoftF64::to_bits(y0);
let ey = (ui >> 52) as i32 & 0x7ff;
let ex = (ix >> 20) as i32;
if ex - ey > 16 {
/* 2nd round, good to 118 bits */
let t = r;
w = f_n.mul(PIO2_2);
r = t.sub(w);
w = f_n.mul(PIO2_2T).sub((t.sub(r)).sub(w));
y0 = r.sub(w);
let ey = (SoftF64::to_bits(y0) >> 52) as i32 & 0x7ff;
if ex - ey > 49 {
/* 3rd round, good to 151 bits, covers all cases */
let t = r;
w = f_n.mul(PIO2_3);
r = t.sub(w);
w = f_n.mul(PIO2_3T).sub((t.sub(r)).sub(w));
y0 = r.sub(w);
}
}
let y1 = (r.sub(y0)).sub(w);
(n, y0, y1)
}
if ix <= 0x400f6a7a {
/* |x| ~<= 5pi/4 */
if (ix & 0xfffff) == 0x921fb {
/* |x| ~= pi/2 or 2pi/2 */
return medium(x, ix); /* cancellation -- use medium case */
}
if ix <= 0x4002d97c {
/* |x| ~<= 3pi/4 */
if sign == 0 {
let z = x.sub(PIO2_1); /* one round good to 85 bits */
let y0 = z.sub(PIO2_1T);
let y1 = (z.sub(y0)).sub(PIO2_1T);
return (1, y0, y1);
} else {
let z = x.add(PIO2_1);
let y0 = z.add(PIO2_1T);
let y1 = (z.sub(y0)).add(PIO2_1T);
return (-1, y0, y1);
}
} else if sign == 0 {
let z = x.sub(SoftF64(2.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(2.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(2.0).mul(PIO2_1T));
return (2, y0, y1);
} else {
let z = x.add(SoftF64(2.0).mul(PIO2_1));
let y0 = z.add(SoftF64(2.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(2.0).mul(PIO2_1T));
return (-2, y0, y1);
}
}
if ix <= 0x401c463b {
/* |x| ~<= 9pi/4 */
if ix <= 0x4015fdbc {
/* |x| ~<= 7pi/4 */
if ix == 0x4012d97c {
/* |x| ~= 3pi/2 */
return medium(x, ix);
}
if sign == 0 {
let z = x.sub(SoftF64(3.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(3.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(3.0).mul(PIO2_1T));
return (3, y0, y1);
} else {
let z = x.add(SoftF64(3.0).mul(PIO2_1));
let y0 = z.add(SoftF64(3.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(3.0).mul(PIO2_1T));
return (-3, y0, y1);
}
} else {
if ix == 0x401921fb {
/* |x| ~= 4pi/2 */
return medium(x, ix);
}
if sign == 0 {
let z = x.sub(SoftF64(4.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(4.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(4.0).mul(PIO2_1T));
return (4, y0, y1);
} else {
let z = x.add(SoftF64(4.0).mul(PIO2_1));
let y0 = z.add(SoftF64(4.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(4.0).mul(PIO2_1T));
return (-4, y0, y1);
}
}
}
if ix < 0x413921fb {
/* |x| ~< 2^20*(pi/2), medium size */
return medium(x, ix);
}
/*
* all other (large) arguments
*/
if ix >= 0x7ff00000 {
/* x is inf or NaN */
let y0 = x.sub(x);
let y1 = y0;
return (0, y0, y1);
}
/* set z = scalbn(|x|,-ilogb(x)+23) */
let mut ui = SoftF64::to_bits(x);
ui &= (!1) >> 12;
ui |= (0x3ff + 23) << 52;
let mut z = SoftF64::from_bits(ui);
let mut tx = [SoftF64::ZERO; 3];
{
let mut i = 0;
while i < 2 {
tx[i] = SoftF64(z.0 as i32 as f64);
z = (z.sub(tx[i])).mul(x1p24);
i += 1;
}
}
tx[2] = z;
/* skip zero terms, first term is non-zero */
let mut i = 2;
while i != 0 && eq(tx[i], SoftF64::ZERO) {
i -= 1;
}
let ty = [SoftF64::ZERO; 3];
let (n, ty) = match i {
2 => rem_pio2_large(&tx, &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
1 => rem_pio2_large(&[tx[0], tx[1]], &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
0 => rem_pio2_large(&[tx[0]], &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
_ => panic!(),
};
if sign != 0 {
return (-n, ty[0].neg(), ty[1].neg());
}
(n, ty[0], ty[1])
}
#[cfg(test)]
mod tests {
use super::{rem_pio2, SoftF64};
#[test]
fn test_near_pi() {
let arg = SoftF64(3.141592025756836);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -6.278329573009626e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592033207416);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -6.20382377148128e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592144966125);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -5.086236681942706e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592979431152);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, 3.2584135866119817e-7, -2.1125998133974653e-23)
);
}
#[test]
fn test_overflow_b9b847() {
let _ = rem_pio2(SoftF64(-3054214.5490637687));
}
#[test]
fn test_overflow_4747b9() {
let _ = rem_pio2(SoftF64(917340800458.2274));
}
}

View File

@@ -0,0 +1,534 @@
#![allow(unused_unsafe)]
/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::soft_f64::SoftF64;
use super::{eq, ge, scalbn};
// initial value for jk
const INIT_JK: [usize; 4] = [3, 4, 4, 6];
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
//
// integer array, contains the (24*i)-th to (24*i+23)-th
// bit of 2/pi after binary point. The corresponding
// floating value is
//
// ipio2[i] * 2^(-24(i+1)).
//
// NB: This table must have at least (e0-3)/24 + jk terms.
// For quad precision (e0 <= 16360, jk = 6), this is 686.
#[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))]
const IPIO2: [i32; 66] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163,
0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C,
0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292,
0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
0x73A8C9, 0x60E27B, 0xC08C6B,
];
#[cfg(target_pointer_width = "64")]
const IPIO2: [i32; 690] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163,
0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C,
0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292,
0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26,
0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B,
0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A,
0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2,
0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE,
0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2,
0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593,
0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199,
0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD,
0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B,
0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C,
0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C,
0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364,
0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6,
0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47,
0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B,
0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6,
0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA,
0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7,
0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D,
0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B,
0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F,
0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E,
0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555,
0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD,
0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9,
0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD,
0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0,
0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351,
0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51,
0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA,
0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385,
0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4,
0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5,
0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
];
const PIO2: [SoftF64; 8] = [
SoftF64(1.57079625129699707031e+00), /* 0x3FF921FB, 0x40000000 */
SoftF64(7.54978941586159635335e-08), /* 0x3E74442D, 0x00000000 */
SoftF64(5.39030252995776476554e-15), /* 0x3CF84698, 0x80000000 */
SoftF64(3.28200341580791294123e-22), /* 0x3B78CC51, 0x60000000 */
SoftF64(1.27065575308067607349e-29), /* 0x39F01B83, 0x80000000 */
SoftF64(1.22933308981111328932e-36), /* 0x387A2520, 0x40000000 */
SoftF64(2.73370053816464559624e-44), /* 0x36E38222, 0x80000000 */
SoftF64(2.16741683877804819444e-51), /* 0x3569F31D, 0x00000000 */
];
//
// Input parameters:
// x[] The input value (must be positive) is broken into nx
// pieces of 24-bit integers in double precision format.
// x[i] will be the i-th 24 bit of x. The scaled exponent
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
// match x's up to 24 bits.
//
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
// e0 = ilogb(z)-23
// z = scalbn(z,-e0)
// for i = 0,1,2
// x[i] = floor(z)
// z = (z-x[i])*2**24
//
// y[] ouput result in an array of double precision numbers.
// The dimension of y[] is:
// 24-bit precision 1
// 53-bit precision 2
// 64-bit precision 2
// 113-bit precision 3
// The actual value is the sum of them. Thus for 113-bit
// precison, one may have to do something like:
//
// long double t,w,r_head, r_tail;
// t = (long double)y[2] + (long double)y[1];
// w = (long double)y[0];
// r_head = t+w;
// r_tail = w - (r_head - t);
//
// e0 The exponent of x[0]. Must be <= 16360 or you need to
// expand the ipio2 table.
//
// prec an integer indicating the precision:
// 0 24 bits (single)
// 1 53 bits (double)
// 2 64 bits (extended)
// 3 113 bits (quad)
//
// Here is the description of some local variables:
//
// jk jk+1 is the initial number of terms of ipio2[] needed
// in the computation. The minimum and recommended value
// for jk is 3,4,4,6 for single, double, extended, and quad.
// jk+1 must be 2 larger than you might expect so that our
// recomputation test works. (Up to 24 bits in the integer
// part (the 24 bits of it that we compute) and 23 bits in
// the fraction part may be lost to cancelation before we
// recompute.)
//
// jz local integer variable indicating the number of
// terms of ipio2[] used.
//
// jx nx - 1
//
// jv index for pointing to the suitable ipio2[] for the
// computation. In general, we want
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
// is an integer. Thus
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
// Hence jv = max(0,(e0-3)/24).
//
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
//
// q[] double array with integral value, representing the
// 24-bits chunk of the product of x and 2/pi.
//
// q0 the corresponding exponent of q[0]. Note that the
// exponent for q[i] would be q0-24*i.
//
// PIo2[] double precision array, obtained by cutting pi/2
// into 24 bits chunks.
//
// f[] ipio2[] in floating point
//
// iq[] integer array by breaking up q[] in 24-bits chunk.
//
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
//
// ih integer. If >0 it indicates q[] is >= 0.5, hence
// it also indicates the *sign* of the result.
/// Return the last three digits of N with y = x - N*pi/2
/// so that |y| < pi/2.
///
/// The method is to compute the integer (mod 8) and fraction parts of
/// (2/pi)*x without doing the full multiplication. In general we
/// skip the part of the product that are known to be a huge integer (
/// more accurately, = 0 mod 8 ). Thus the number of operations are
/// independent of the exponent of the input.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn rem_pio2_large<const Y: usize>(
x: &[SoftF64],
y: &[SoftF64; Y],
e0: i32,
prec: usize,
) -> (i32, [SoftF64; Y]) {
let mut y: [SoftF64; Y] = *y;
let x1p24 = SoftF64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
let x1p_24 = SoftF64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
let nx = x.len();
let mut fw: SoftF64;
let mut n: i32;
let mut ih: i32;
let mut z: SoftF64;
let mut f: [SoftF64; 20] = [SoftF64(0.); 20];
let mut fq: [SoftF64; 20] = [SoftF64(0.); 20];
let mut q: [SoftF64; 20] = [SoftF64(0.); 20];
let mut iq: [i32; 20] = [0; 20];
/* initialize jk*/
let jk = INIT_JK[prec];
let jp = jk;
/* determine jx,jv,q0, note that 3>q0 */
let jx = nx - 1;
let mut jv = (e0 - 3) / 24;
if jv < 0 {
jv = 0;
}
let mut q0 = e0 - 24 * (jv + 1);
let jv = jv as usize;
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
let mut j = (jv as i32) - (jx as i32);
let m = jx + jk;
{
let mut i = 0;
while i <= m {
if j < 0 {
f[i] = SoftF64::ZERO;
} else {
f[i] = SoftF64(IPIO2[j as usize] as f64);
}
j += 1;
i += 1;
}
}
/* compute q[0],q[1],...q[jk] */
{
let mut i = 0;
while i <= jk {
fw = SoftF64::ZERO;
let mut j = 0;
while j <= jx {
fw = fw.add(x[j].mul(f[jx + i - j]));
j += 1;
}
q[i] = fw;
i += 1;
}
}
let mut jz = jk;
'recompute: loop {
/* distill q[] into iq[] reversingly */
let mut i = 0i32;
z = q[jz];
{
let mut j = jz;
while j >= 1 {
fw = SoftF64((x1p_24.mul(z)).0 as i32 as f64);
iq[i as usize] = z.sub(x1p24.mul(fw)).0 as i32;
z = q[j - 1].add(fw);
i += 1;
j -= 1;
}
}
/* compute n */
z = scalbn(z, q0); /* actual value of z */
z = z.sub(SoftF64(8.0).mul(z.mul(SoftF64(0.125)).floor())); /* trim off integer >= 8 */
n = z.0 as i32;
z = z.sub(SoftF64(n as f64));
ih = 0;
if q0 > 0 {
/* need iq[jz-1] to determine n */
i = iq[jz - 1] >> (24 - q0);
n += i;
iq[jz - 1] = iq[jz - 1] - (i << (24 - q0));
ih = iq[jz - 1] >> (23 - q0);
} else if q0 == 0 {
ih = iq[jz - 1] >> 23;
} else if ge(z, SoftF64(0.5)) {
ih = 2;
}
if ih > 0 {
/* q > 0.5 */
n += 1;
let mut carry = 0i32;
{
let mut i = 0;
while i < jz {
/* compute 1-q */
let j = iq[i];
if carry == 0 {
if j != 0 {
carry = 1;
iq[i] = 0x1000000 - j;
}
} else {
iq[i] = 0xffffff - j;
}
i += 1;
}
}
if q0 > 0 {
/* rare case: chance is 1 in 12 */
match q0 {
1 => {
iq[jz - 1] &= 0x7fffff;
}
2 => {
iq[jz - 1] &= 0x3fffff;
}
_ => {}
}
}
if ih == 2 {
z = SoftF64::ONE.sub(z);
if carry != 0 {
z = z.sub(scalbn(SoftF64::ONE, q0));
}
}
}
/* check if recomputation is needed */
if eq(z, SoftF64::ZERO) {
let mut j = 0;
{
let mut i = jz - 1;
while i >= jk {
j |= iq[i];
i -= 1;
}
}
if j == 0 {
/* need recomputation */
let mut k = 1;
while iq[jk - k] == 0 {
k += 1; /* k = no. of terms needed */
}
{
let mut i = jz + 1;
while i <= jz + k {
/* add q[jz+1] to q[jz+k] */
f[jx + i] = SoftF64(IPIO2[jv + i] as f64);
fw = SoftF64::ZERO;
{
let mut j = 0;
while j <= jx {
fw = fw.add(x[j].mul(f[jx + i - j]));
j += 1;
}
}
q[i] = fw;
i += 1;
}
}
jz += k;
continue 'recompute;
}
}
break;
}
/* chop off zero terms */
if eq(z, SoftF64::ZERO) {
jz -= 1;
q0 -= 24;
while iq[jz] == 0 {
jz -= 1;
q0 -= 24;
}
} else {
/* break z into 24-bit if necessary */
z = scalbn(z, -q0);
if ge(z, x1p24) {
fw = SoftF64(x1p_24.mul(z).0 as i32 as f64);
iq[jz] = z.sub(x1p24.mul(fw)).0 as i32;
jz += 1;
q0 += 24;
iq[jz] = fw.0 as i32;
} else {
iq[jz] = z.0 as i32;
}
}
/* convert integer "bit" chunk to floating-point value */
fw = scalbn(SoftF64::ONE, q0);
{
let mut i = jz;
while i != usize::MAX {
q[i] = fw.mul(SoftF64(iq[i] as f64));
fw = fw.mul(x1p_24);
i = i.wrapping_sub(1);
}
}
/* compute PIo2[0,...,jp]*q[jz,...,0] */
{
let mut i = jz;
while i != usize::MAX {
fw = SoftF64::ZERO;
let mut k = 0;
while (k <= jp) && (k <= jz - i) {
fw = fw.add(PIO2[k].mul(q[i + k]));
k += 1;
}
fq[jz - i] = fw;
i = i.wrapping_sub(1);
}
}
/* compress fq[] into y[] */
match prec {
0 => {
fw = SoftF64::ZERO;
{
let mut i = jz;
while i != usize::MAX {
fw = fw.add(fq[i]);
i = i.wrapping_sub(1);
}
}
y[0] = if ih == 0 { fw } else { fw.neg() };
}
1 | 2 => {
fw = SoftF64::ZERO;
{
let mut i = jz;
while i != usize::MAX {
fw = fw.add(fq[i]);
i = i.wrapping_sub(1);
}
}
// TODO: drop excess precision here once double_t is used
fw = SoftF64(fw.0 as f64);
y[0] = if ih == 0 { fw } else { fw.neg() };
fw = fq[0].sub(fw);
{
let mut i = 1;
while i <= jz {
fw = fw.add(fq[i]);
i += 1;
}
}
y[1] = if ih == 0 { fw } else { fw.neg() };
}
3 => {
/* painful */
{
let mut i = jz;
while i >= 1 {
fw = fq[i - 1].add(fq[i]);
fq[i] = fq[i].add(fq[i - 1].sub(fw));
fq[i - 1] = fw;
i -= 1;
}
}
{
let mut i = jz;
while i >= 2 {
fw = fq[i - 1].add(fq[i]);
fq[i] = fq[i].add(fq[i - 1].sub(fw));
fq[i - 1] = fw;
i -= 1;
}
}
fw = SoftF64::ZERO;
let mut i = jz;
while i >= 2 {
fw = fw.add(fq[i]);
i -= 1;
}
if ih == 0 {
y[0] = fq[0];
y[1] = fq[1];
y[2] = fw;
} else {
y[0] = fq[0].neg();
y[1] = fq[1].neg();
y[2] = fw.neg();
}
}
_ => unreachable!(),
};
(n & 7, y)
}

View File

@@ -0,0 +1,34 @@
use crate::soft_f64::SoftF64;
pub(crate) const fn scalbn(x: SoftF64, mut n: i32) -> SoftF64 {
let x1p1023 = SoftF64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
let x1p53 = SoftF64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
let x1p_1022 = SoftF64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
let mut y = x;
if n > 1023 {
y = y.mul(x1p1023);
n -= 1023;
if n > 1023 {
y = y.mul(x1p1023);
n -= 1023;
if n > 1023 {
n = 1023;
}
}
} else if n < -1022 {
/* make sure final n < -53 to avoid double
rounding in the subnormal range */
y = y.mul(x1p_1022.mul(x1p53));
n += 1022 - 53;
if n < -1022 {
y = y.mul(x1p_1022.mul(x1p53));
n += 1022 - 53;
if n < -1022 {
n = -1022;
}
}
}
y.mul(SoftF64::from_bits(((0x3ff + n) as u64) << 52))
}

View File

@@ -0,0 +1,68 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
impl From<f64> for F {
fn from(value: f64) -> Self {
F::from_f64(value)
}
}
impl PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,178 @@
pub(crate) mod helpers;
pub mod add;
pub mod cmp;
pub mod copysign;
pub mod cos;
pub mod div;
pub mod floor;
pub mod mul;
pub mod pow;
pub mod round;
pub mod sin;
pub mod sqrt;
pub mod trunc;
#[cfg(feature = "const_trait_impl")]
pub mod const_impl_trait;
#[cfg(feature = "const_trait_impl")]
pub use const_impl_trait as impl_trait;
#[cfg(not(feature = "const_trait_impl"))]
pub mod impl_trait;
#[derive(Default, Copy, Clone)]
#[repr(transparent)]
pub struct SoftF64(pub f64);
impl SoftF64 {
pub const fn from_f64(a: f64) -> Self {
Self(a)
}
pub const fn to_f64(self) -> f64 {
self.0
}
pub const fn from_bits(a: u64) -> Self {
Self(unsafe { core::mem::transmute(a) })
}
pub const fn to_bits(self) -> u64 {
unsafe { core::mem::transmute(self.0) }
}
pub const fn add(self, rhs: Self) -> Self {
add::add(self, rhs)
}
pub const fn mul(self, rhs: Self) -> Self {
mul::mul(self, rhs)
}
pub const fn div(self, rhs: Self) -> Self {
div::div(self, rhs)
}
pub const fn cmp(self, rhs: Self) -> Option<core::cmp::Ordering> {
cmp::cmp(self, rhs)
}
pub const fn neg(self) -> Self {
Self::from_repr(self.repr() ^ Self::SIGN_MASK)
}
pub const fn sub(self, rhs: Self) -> Self {
self.add(rhs.neg())
}
pub const fn sqrt(self) -> Self {
sqrt::sqrt(self)
}
pub const fn powi(self, n: i32) -> Self {
pow::pow(self, n)
}
pub const fn copysign(self, other: Self) -> Self {
copysign::copysign(self, other)
}
pub const fn trunc(self) -> Self {
trunc::trunc(self)
}
pub const fn round(self) -> Self {
round::round(self)
}
pub const fn floor(self) -> Self {
floor::floor(self)
}
pub const fn sin(self) -> Self {
sin::sin(self)
}
pub const fn cos(self) -> Self {
cos::cos(self)
}
}
type SelfInt = u64;
type SelfSignedInt = i64;
type SelfExpInt = i16;
#[allow(unused)]
impl SoftF64 {
const ZERO: Self = Self(0.0);
const ONE: Self = Self(1.0);
const BITS: u32 = 64;
const SIGNIFICAND_BITS: u32 = 52;
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
const SIGN_MASK: SelfInt = 1 << (Self::BITS - 1);
const SIGNIFICAND_MASK: SelfInt = (1 << Self::SIGNIFICAND_BITS) - 1;
const IMPLICIT_BIT: SelfInt = 1 << Self::SIGNIFICAND_BITS;
const EXPONENT_MASK: SelfInt = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
const fn repr(self) -> SelfInt {
self.to_bits()
}
const fn signed_repr(self) -> SelfSignedInt {
self.to_bits() as SelfSignedInt
}
const fn sign(self) -> bool {
self.signed_repr() < 0
}
const fn exp(self) -> SelfExpInt {
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as SelfExpInt
}
const fn frac(self) -> SelfInt {
self.to_bits() & Self::SIGNIFICAND_MASK
}
const fn imp_frac(self) -> SelfInt {
self.frac() | Self::IMPLICIT_BIT
}
const fn from_repr(a: SelfInt) -> Self {
Self::from_bits(a)
}
const fn from_parts(sign: bool, exponent: SelfInt, significand: SelfInt) -> Self {
Self::from_repr(
((sign as SelfInt) << (Self::BITS - 1))
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
| (significand & Self::SIGNIFICAND_MASK),
)
}
const fn normalize(significand: SelfInt) -> (i32, SelfInt) {
let shift = significand
.leading_zeros()
.wrapping_sub((1u64 << Self::SIGNIFICAND_BITS).leading_zeros());
(
1i32.wrapping_sub(shift as i32),
significand << shift as SelfInt,
)
}
const fn is_subnormal(self) -> bool {
(self.repr() & Self::EXPONENT_MASK) == 0
}
const fn scalbn(self, n: i32) -> SoftF64 {
helpers::scalbn(self, n)
}
}
const fn u128_lo(x: u128) -> u64 {
x as u64
}
const fn u128_hi(x: u128) -> u64 {
(x >> 64) as u64
}
const fn u64_widen_mul(a: u64, b: u64) -> (u64, u64) {
let x = u128::wrapping_mul(a as _, b as _);
(u128_lo(x), u128_hi(x))
}

View File

@@ -0,0 +1,194 @@
use crate::soft_f64::{u64_widen_mul, SoftF64};
type F = SoftF64;
type FInt = u64;
const fn widen_mul(a: FInt, b: FInt) -> (FInt, FInt) {
u64_widen_mul(a, b)
}
pub(crate) const fn mul(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let exponent_bits = F::EXPONENT_BITS;
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as FInt;
let b_exponent = (b_rep >> significand_bits) & max_exponent as FInt;
let product_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(a_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
if b_abs == inf_rep {
if a_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(b_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
// zero * anything = +/- zero
if a_abs == zero {
return F::from_repr(product_sign);
}
// anything * zero = +/- zero
if b_abs == zero {
return F::from_repr(product_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale += exponent;
b_significand = significand;
}
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
let (mut product_low, mut product_high) =
widen_mul(a_significand, b_significand << exponent_bits);
let a_exponent_i32: i32 = a_exponent as _;
let b_exponent_i32: i32 = b_exponent as _;
let mut product_exponent: i32 = a_exponent_i32
.wrapping_add(b_exponent_i32)
.wrapping_add(scale)
.wrapping_sub(exponent_bias as i32);
// Normalize the significand, adjust exponent if needed.
if (product_high & implicit_bit) != zero {
product_exponent = product_exponent.wrapping_add(1);
} else {
product_high = (product_high << 1) | (product_low >> (bits - 1));
product_low <<= 1;
}
// If we have overflowed the type, return +/- infinity.
if product_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | product_sign);
}
if product_exponent <= 0 {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
let shift = one.wrapping_sub(product_exponent as FInt) as u32;
if shift >= bits {
return F::from_repr(product_sign);
}
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of productLo.
if shift < bits {
let sticky = product_low << (bits - shift);
product_low = product_high << (bits - shift) | product_low >> shift | sticky;
product_high >>= shift;
} else if shift < (2 * bits) {
let sticky = product_high << (2 * bits - shift) | product_low;
product_low = product_high >> (shift - bits) | sticky;
product_high = zero;
} else {
product_high = zero;
}
} else {
// Result is normal before rounding; insert the exponent.
product_high &= significand_mask;
product_high |= (product_exponent as FInt) << significand_bits;
}
// Insert the sign of the result:
product_high |= product_sign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if product_low > sign_bit {
product_high += one;
}
if product_low == sign_bit {
product_high += product_high & one;
}
F::from_repr(product_high)
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(2.0).mul(SoftF64(2.0)).0, 4.0)
}
}

View File

@@ -0,0 +1,37 @@
use crate::abs_diff;
use crate::soft_f64::SoftF64;
type F = SoftF64;
pub(crate) const fn pow(a: F, b: i32) -> F {
let mut a = a;
let recip = b < 0;
let mut pow = abs_diff(b, 0);
let mut mul = F::ONE;
loop {
if (pow & 1) != 0 {
mul = mul.mul(a);
}
pow >>= 1;
if pow == 0 {
break;
}
a = a.mul(a);
}
if recip {
F::ONE.div(mul)
} else {
mul
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(2.0).powi(2).0, 4.0)
}
}

View File

@@ -0,0 +1,31 @@
use super::SoftF64;
pub(crate) const fn round(x: SoftF64) -> SoftF64 {
SoftF64::trunc(x.add(SoftF64::copysign(
SoftF64(0.5).sub(SoftF64(0.25).mul(SoftF64(f64::EPSILON))),
x,
)))
}
#[cfg(test)]
mod tests {
use super::SoftF64;
#[test]
fn negative_zero() {
assert_eq!(
SoftF64::round(SoftF64(-0.0)).to_bits(),
SoftF64(-0.0).to_bits()
);
}
#[test]
fn sanity_check() {
assert_eq!((SoftF64(-1.0)).round().0, -1.0);
assert_eq!((SoftF64(2.8)).round().0, 3.0);
assert_eq!((SoftF64(-0.5)).round().0, -1.0);
assert_eq!((SoftF64(0.5)).round().0, 1.0);
assert_eq!((SoftF64(-1.5)).round().0, -2.0);
assert_eq!((SoftF64(1.5)).round().0, 2.0);
}
}

View File

@@ -0,0 +1,98 @@
// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c, https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/sin.rs */
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use super::{
helpers::{k_cos, k_sin, rem_pio2},
SoftF64,
};
// sin(x)
// Return sine function of x.
//
// kernel function:
// k_sin ... sine function on [-pi/4,pi/4]
// k_cos ... cose function on [-pi/4,pi/4]
// rem_pio2 ... argument reduction routine
//
// Method.rounded
// Let S,C and T denote the sin, cos and tan respectively on
// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
// in [-pi/4 , +pi/4], and let n = k mod 4.
// We have
//
// n sin(x) cos(x) tan(x)
// ----------------------------------------------------------
// 0 S C T
// 1 C -S -1/T
// 2 -S -C T
// 3 -C S -1/T
// ----------------------------------------------------------
//
// Special cases:
// Let trig be any of sin, cos, or tan.
// trig(+-INF) is NaN, with signals;
// trig(NaN) is that NaN;
//
// Accuracy:
// TRIG(x) returns trig(x) nearly rounded
pub(crate) const fn sin(x: SoftF64) -> SoftF64 {
let x1p120 = SoftF64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
/* High word of x. */
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
/* |x| ~< pi/4 */
if ix <= 0x3fe921fb {
if ix < 0x3e500000 {
/* |x| < 2**-26 */
/* raise inexact if x != 0 and underflow if subnormal*/
if ix < 0x00100000 {
x.div(x1p120);
} else {
x.add(x1p120);
}
return x;
}
return k_sin(x, SoftF64::ZERO, 0);
}
/* sin(Inf or NaN) is NaN */
if ix >= 0x7ff00000 {
return x.sub(x);
}
/* argument reduction needed */
let (n, y0, y1) = rem_pio2(x);
match n & 3 {
0 => k_sin(y0, y1, 1),
1 => k_cos(y0, y1),
2 => k_sin(y0, y1, 1).neg(),
_ => k_cos(y0, y1).neg(),
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn test_near_pi() {
let x = SoftF64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
let sx = SoftF64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
let result = x.sin().0;
assert_eq!(result, sx.0);
}
#[test]
fn test_large_neg() {
assert_eq!(SoftF64(-1647101.0).sin().to_f64(), (-1647101.0_f64).sin())
}
}

View File

@@ -0,0 +1,239 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* sqrt(x)
* Return correctly rounded sqrt.
* ------------------------------------------
* | Use the hardware sqrt if you have one |
* ------------------------------------------
* Method:
* Bit by bit method using integer arithmetic. (Slow, but portable)
* 1. Normalization
* Scale x to y in [1,4) with even powers of 2:
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
* sqrt(x) = 2^k * sqrt(y)
* 2. Bit by bit computation
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
* i 0
* i+1 2
* s = 2*q , and y = 2 * ( y - q ). (1)
* i i i i
*
* To compute q from q , one checks whether
* i+1 i
*
* -(i+1) 2
* (q + 2 ) <= y. (2)
* i
* -(i+1)
* If (2) is false, then q = q ; otherwise q = q + 2 .
* i+1 i i+1 i
*
* With some algebraic manipulation, it is not difficult to see
* that (2) is equivalent to
* -(i+1)
* s + 2 <= y (3)
* i i
*
* The advantage of (3) is that s and y can be computed by
* i i
* the following recurrence formula:
* if (3) is false
*
* s = s , y = y ; (4)
* i+1 i i+1 i
*
* otherwise,
* -i -(i+1)
* s = s + 2 , y = y - s - 2 (5)
* i+1 i i+1 i i
*
* One may easily use induction to prove (4) and (5).
* Note. Since the left hand side of (3) contain only i+2 bits,
* it does not necessary to do a full (53-bit) comparison
* in (3).
* 3. Final rounding
* After generating the 53 bits result, we compute one more bit.
* Together with the remainder, we can decide whether the
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
* (it will never equal to 1/2ulp).
* The rounding mode can be detected by checking whether
* huge + tiny is equal to huge, and whether huge - tiny is
* equal to huge for some floating point number "huge" and "tiny".
*
* Special cases:
* sqrt(+-0) = +-0 ... exact
* sqrt(inf) = inf
* sqrt(-ve) = NaN ... with invalid signal
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
*/
use crate::soft_f64::{
helpers::{ge, gt},
SoftF64,
};
type F = SoftF64;
pub(crate) const fn sqrt(x: F) -> F {
const TINY: F = SoftF64(1.0e-300);
let mut z: F;
let sign: u32 = 0x80000000;
let mut ix0: i32;
let mut s0: i32;
let mut q: i32;
let mut m: i32;
let mut t: i32;
let mut i: i32;
let mut r: u32;
let mut t1: u32;
let mut s1: u32;
let mut ix1: u32;
let mut q1: u32;
ix0 = (x.to_bits() >> 32) as i32;
ix1 = x.to_bits() as u32;
/* take care of Inf and NaN */
if (ix0 & 0x7ff00000) == 0x7ff00000 {
return x.mul(x).add(x); /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
}
/* take care of zero */
if ix0 <= 0 {
if ((ix0 & !(sign as i32)) | ix1 as i32) == 0 {
return x; /* sqrt(+-0) = +-0 */
}
if ix0 < 0 {
return (x.sub(x)).div(x.sub(x)); /* sqrt(-ve) = sNaN */
}
}
/* normalize x */
m = ix0 >> 20;
if m == 0 {
/* subnormal x */
while ix0 == 0 {
m -= 21;
ix0 |= (ix1 >> 11) as i32;
ix1 <<= 21;
}
i = 0;
while (ix0 & 0x00100000) == 0 {
i += 1;
ix0 <<= 1;
}
m -= i - 1;
ix0 |= (ix1 as usize >> (32 - i) as usize) as i32;
ix1 = ix1 << i as usize;
}
m -= 1023; /* unbias exponent */
ix0 = (ix0 & 0x000fffff) | 0x00100000;
if (m & 1) == 1 {
/* odd m, double x to make it even */
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
q = 0; /* [q,q1] = sqrt(x) */
q1 = 0;
s0 = 0;
s1 = 0;
r = 0x00200000; /* r = moving bit from right to left */
while r != 0 {
t = s0 + r as i32;
if t <= ix0 {
s0 = t + r as i32;
ix0 -= t;
q += r as i32;
}
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
r >>= 1;
}
r = sign;
while r != 0 {
t1 = s1.wrapping_add(r);
t = s0;
if t < ix0 || (t == ix0 && t1 <= ix1) {
s1 = t1.wrapping_add(r);
if (t1 & sign) == sign && (s1 & sign) == 0 {
s0 += 1;
}
ix0 -= t;
if ix1 < t1 {
ix0 -= 1;
}
ix1 = ix1.wrapping_sub(t1);
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
r >>= 1;
}
/* use floating add to find out rounding direction */
if (ix0 as u32 | ix1) != 0 {
z = SoftF64(1.0).sub(TINY); /* raise inexact flag */
if ge(z, SoftF64::ONE) {
z = SoftF64::ONE.add(TINY);
if q1 == 0xffffffff {
q1 = 0;
q += 1;
} else if gt(z, SoftF64::ONE) {
if q1 == 0xfffffffe {
q += 1;
}
q1 = q1.wrapping_add(2);
} else {
q1 += q1 & 1;
}
}
}
ix0 = (q >> 1) + 0x3fe00000;
ix1 = q1 >> 1;
if (q & 1) == 1 {
ix1 |= sign;
}
ix0 += m << 20;
SoftF64::from_bits((ix0 as u64) << 32 | ix1 as u64)
}
#[cfg(test)]
mod tests {
use super::*;
// use core::f64::*;
#[test]
fn sanity_check() {
const SQRT_100: SoftF64 = sqrt(SoftF64(100.0));
assert_eq!(SQRT_100.0, 10.0);
const SQRT_4: SoftF64 = sqrt(SoftF64(4.0));
assert_eq!(SQRT_4.0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
#[test]
fn spec_tests() {
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrt(SoftF64(-1.0)).0.is_nan());
assert!(sqrt(SoftF64(f64::NAN)).0.is_nan());
for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
assert_eq!(sqrt(SoftF64(f)).0, f);
}
}
}

View File

@@ -0,0 +1,29 @@
use super::SoftF64;
pub(crate) const fn trunc(x: SoftF64) -> SoftF64 {
let mut i: u64 = x.to_bits();
let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12;
if e >= 52 + 12 {
return x;
}
if e < 12 {
e = 1;
}
let m = -1i64 as u64 >> e;
if (i & m) == 0 {
return x;
}
i &= !m;
SoftF64::from_bits(i)
}
#[cfg(test)]
mod tests {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(super::trunc(SoftF64(1.1)).0, 1.0);
}
}