Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

349
vendor/pxfm/src/acos.rs vendored Normal file
View File

@@ -0,0 +1,349 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acospi::PI_OVER_TWO_F128;
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
/// Computes acos(x)
///
/// Max found ULP 0.5
pub fn f_acos(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-55.
if x_e < E_BIAS - 55 {
// When |x| < 2^-55, acos(x) = pi/2
return (x_abs + f64::from_bits(0x35f0000000000000)) + PI_OVER_TWO.hi;
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * p
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
// acos(x) = pi/2 - asin(x)
// ~ pi/2 - x * p
// = pi/2 - x * (p.hi + p.lo)
let r_hi = f_fmla(-x, p.hi, PI_OVER_TWO.hi);
// Use Dekker's 2SUM algorithm to compute the lower part.
let mut r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo;
r_lo = f_fmla(-x, p.lo, r_lo + PI_OVER_TWO.lo);
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
return acos_less_0p5_hard(x, x_sq);
}
// |x| >= 0.5
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// x = 1, acos(x) = 0,
// x = -1, acos(x) = pi
return if x == 1.0 {
0.0
} else {
f_fmla(-x_sign, PI.hi, PI.lo)
};
}
// |x| > 1, return NaN.
return f64::NAN;
}
// When |x| >= 0.5, we perform range reduction as follow:
//
// When 0.5 <= x < 1, let:
// y = acos(x)
// We will use the double angle formula:
// cos(2y) = 1 - 2 sin^2(y)
// and the complement angle identity:
// x = cos(y) = 1 - 2 sin^2 (y/2)
// So:
// sin(y/2) = sqrt( (1 - x)/2 )
// And hence:
// y/2 = asin( sqrt( (1 - x)/2 ) )
// Equivalently:
// acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) )
// Let u = (1 - x)/2, then:
// acos(x) = 2 * asin( sqrt(u) )
// Moreover, since 0.5 <= x < 1:
// 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
// And hence we can reuse the same polynomial approximation of asin(x) when
// |x| <= 0.5:
// acos(x) ~ 2 * sqrt(u) * P(u).
//
// When -1 < x <= -0.5, we reduce to the previous case using the formula:
// acos(x) = pi - acos(-x)
// = pi - 2 * asin ( sqrt( (1 + x)/2 ) )
// ~ pi - 2 * sqrt(u) * P(u),
// where u = (1 - |x|)/2.
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let r_hi;
let r_lo;
if x.is_sign_positive() {
r_hi = r0.hi;
r_lo = r0.lo;
} else {
let r = DoubleDouble::from_exact_add(PI.hi, -r0.hi);
r_hi = r.hi;
r_lo = (PI.lo - r0.lo) + r.lo;
}
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
acos_hard(x, u, v_hi, h, vh, vl)
}
#[cold]
#[inline(never)]
fn acos_hard(x: f64, u: f64, v_hi: f64, h: f64, vh: f64, vl: f64) -> f64 {
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
let m_v_p = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + m_v_p;
m_v.sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
// Perform computations in Float128:
// acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1,
// = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5.
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
let p_f128 = asin_eval_dyadic(y_f128, idx);
let mut r_f128 = m_v * p_f128;
if x.is_sign_negative() {
const PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
r_f128 = PI_F128 + r_f128;
}
r_f128.fast_as_f64()
}
#[cold]
#[inline(never)]
fn acos_less_0p5_hard(x: f64, x_sq: DoubleDouble) -> f64 {
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let mut x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * f64::from_bits(0xbf90000000000000),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
// Flip the sign of x_f128 to perform subtraction.
x_f128.sign = x_f128.sign.negate();
let r = PI_OVER_TWO_F128.quick_add(&x_f128.quick_mul(&p_f128));
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_acos_test() {
assert_eq!(f_acos(0.7), 0.7953988301841436);
assert_eq!(f_acos(-0.1), 1.6709637479564565);
assert_eq!(f_acos(-0.4), 1.9823131728623846);
}
}

191
vendor/pxfm/src/acosf.rs vendored Normal file
View File

@@ -0,0 +1,191 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use std::hint::black_box;
#[inline]
pub(crate) fn poly12(z: f64, c: [u64; 12]) -> f64 {
let z2 = z * z;
let z4 = z2 * z2;
let mut c0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
let mut c8 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
let c10 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
c0 = f_fmla(c2, z2, c0);
c4 = f_fmla(c6, z2, c4);
c8 = f_fmla(z2, c10, c8);
f_fmla(z4, f_fmla(z4, c8, c4), c0)
}
#[cold]
fn as_special(x: f32) -> f32 {
const PIH: f32 = f64::from_bits(0x400921fb60000000) as f32;
const PIL: f32 = -f64::from_bits(0x3e70000000000000) as f32;
let t = x.to_bits();
if t == (0x7fu32 << 23) {
return 0.0;
} // x=1
if t == (0x17fu32 << 23) {
return PIH + PIL;
} // x=-1
let ax = t.wrapping_shl(1);
if ax > (0xffu32 << 24) {
return x + x;
} // nan
f32::NAN
}
/// Compute acos
///
/// Max found ULP 0.49999982
#[inline]
pub fn f_acosf(x: f32) -> f32 {
const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
const O: [f64; 2] = [0., f64::from_bits(0x400921fb54442d18)];
let xs = x as f64;
let mut r: f64;
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax >= 0x7f << 24 {
return as_special(x);
}
if ax < 0x7ec2a1dcu32 {
// |x| < 0.880141
const B: [u64; 16] = [
0x3fefffffffd9ccb8,
0x3fc5555c94838007,
0x3fb32ded4b7c20fa,
0x3fa8566df703309e,
0xbf9980c959bec9a3,
0x3fe56fbb04998344,
0xc01403d8e4c49f52,
0x403b06c3e9f311ea,
0xc059ea97c4e2c21f,
0x407200b8261cc61b,
0xc082274c2799a5c7,
0x408a558a59cc19d3,
0xc08aca4b6a529ff0,
0x408228744703f813,
0xc06d7dbb0b322228,
0x4045c2018c0c0105,
];
/* avoid spurious underflow */
if ax < 0x40000000u32 {
// |x| < 2^-63
return PI2 as f32;
}
let z = xs;
let z2 = z * z;
let w0 = f_fmla(z2, f64::from_bits(B[1]), f64::from_bits(B[0]));
let w1 = f_fmla(z2, f64::from_bits(B[3]), f64::from_bits(B[2]));
let w2 = f_fmla(z2, f64::from_bits(B[5]), f64::from_bits(B[4]));
let w3 = f_fmla(z2, f64::from_bits(B[7]), f64::from_bits(B[6]));
let w4 = f_fmla(z2, f64::from_bits(B[9]), f64::from_bits(B[8]));
let w5 = f_fmla(z2, f64::from_bits(B[11]), f64::from_bits(B[10]));
let w6 = f_fmla(z2, f64::from_bits(B[13]), f64::from_bits(B[12]));
let w7 = f_fmla(z2, f64::from_bits(B[15]), f64::from_bits(B[14]));
let z4 = z2 * z2;
let z8 = z4 * z4;
let z16 = z8 * z8;
r = z
* ((f_fmla(z4, w1, w0) + z8 * f_fmla(z4, w3, w2))
+ z16 * (f_fmla(z4, w5, w4) + z8 * f_fmla(z4, w7, w6)));
let ub = f64::from_bits(0x3ff921fb54574191) - r;
let lb = f64::from_bits(0x3ff921fb543118a0) - r;
// Ziv's accuracy test
if ub == lb {
return ub as f32;
}
}
// accurate path
if ax < (0x7eu32 << 24) {
const C: [u64; 12] = [
0x3fc555555555529c,
0x3fb333333337e0dd,
0x3fa6db6db3b4465e,
0x3f9f1c72e13ac306,
0x3f96e89cebe06bc4,
0x3f91c6dcf5289094,
0x3f8c6dbbcc7c6315,
0x3f88f8dc2615e996,
0x3f7a5833b7bf15e8,
0x3f943f44ace1665c,
0xbf90fb17df881c73,
0x3fa07520c026b2d6,
];
if t == 0x328885a3u32 {
return black_box(f64::from_bits(0x3ff921fb60000000) as f32)
+ black_box(f64::from_bits(0x3e60000000000000) as f32);
}
if t == 0x39826222u32 {
return black_box(f64::from_bits(0x3ff920f6a0000000) as f32)
+ black_box(f64::from_bits(0x3e60000000000000) as f32);
}
let x2 = xs * xs;
r = f_fmla(-(xs * x2), poly12(x2, C), PI2 - xs);
} else {
const C: [u64; 12] = [
0x3ff6a09e667f3bcb,
0x3fbe2b7dddff2db9,
0x3f9b27247ab42dbc,
0x3f802995cc4e0744,
0x3f65ffb0276ec8ea,
0x3f5033885a928dec,
0x3f3911f2be23f8c7,
0x3f24c3c55d2437fd,
0x3f0af477e1d7b461,
0x3f0abd6bdff67dcb,
0xbef1717e86d0fa28,
0x3ef6ff526de46023,
];
let bx = xs.abs();
let z = 1.0 - bx;
let s = f64::copysign(z.sqrt(), xs);
r = f_fmla(s, poly12(z, C), O[t.wrapping_shr(31) as usize]);
}
r as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_acosf() {
assert_eq!(f_acosf(-0.5), 2.0943952);
assert_eq!(f_acosf(0.5), std::f32::consts::FRAC_PI_3);
assert!(f_acosf(7.).is_nan());
}
}

369
vendor/pxfm/src/acospi.rs vendored Normal file
View File

@@ -0,0 +1,369 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
pub(crate) const INV_PI_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc76b01ec5417056),
f64::from_bits(0x3fd45f306dc9c883),
);
// 1/PI with 128-bit precision generated by SageMath with:
// def format_hex(value):
// l = hex(value)[2:]
// n = 8
// x = [l[i:i + n] for i in range(0, len(l), n)]
// return "0x" + "'".join(x) + "_u128"
// r = 1/pi
// (s, m, e) = RealField(128)(r).sign_mantissa_exponent();
// print(format_hex(m));
pub(crate) const INV_PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xa2f9836e_4e441529_fc2757d1_f534ddc1_u128,
};
pub(crate) const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
/// Computes acos(x)/PI
///
/// Max ULP 0.5
pub fn f_acospi(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-55.
if x_e < E_BIAS - 55 {
// When |x| < 2^-55, acos(x) = pi/2
return f_fmla(f64::from_bits(0xbc80000000000000), x, 0.5);
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * p
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
// acos(x) = pi/2 - asin(x)
// ~ pi/2 - x * p
// = pi/2 - x * (p.hi + p.lo)
let mut r_hi = f_fmla(-x, p.hi, PI_OVER_TWO.hi);
// Use Dekker's 2SUM algorithm to compute the lower part.
let mut r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo;
r_lo = f_fmla(-x, p.lo, r_lo + PI_OVER_TWO.lo);
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r_hi), INV_PI_DD);
r_hi = p.hi;
r_lo = p.lo;
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let mut x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * f64::from_bits(0xbf90000000000000),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
// Flip the sign of x_f128 to perform subtraction.
x_f128.sign = x_f128.sign.negate();
let mut r = PI_OVER_TWO_F128.quick_add(&x_f128.quick_mul(&p_f128));
r = r.quick_mul(&INV_PI_F128);
return r.fast_as_f64();
}
// |x| >= 0.5
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// x = 1, acos(x) = 0,
// x = -1, acos(x) = pi
return if x == 1.0 { 0.0 } else { 1.0 };
}
// |x| > 1, return NaN.
return f64::NAN;
}
// When |x| >= 0.5, we perform range reduction as follow:
//
// When 0.5 <= x < 1, let:
// y = acos(x)
// We will use the double angle formula:
// cos(2y) = 1 - 2 sin^2(y)
// and the complement angle identity:
// x = cos(y) = 1 - 2 sin^2 (y/2)
// So:
// sin(y/2) = sqrt( (1 - x)/2 )
// And hence:
// y/2 = asin( sqrt( (1 - x)/2 ) )
// Equivalently:
// acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) )
// Let u = (1 - x)/2, then:
// acos(x) = 2 * asin( sqrt(u) )
// Moreover, since 0.5 <= x < 1:
// 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
// And hence we can reuse the same polynomial approximation of asin(x) when
// |x| <= 0.5:
// acos(x) ~ 2 * sqrt(u) * P(u).
//
// When -1 < x <= -0.5, we reduce to the previous case using the formula:
// acos(x) = pi - acos(-x)
// = pi - 2 * asin ( sqrt( (1 + x)/2 ) )
// ~ pi - 2 * sqrt(u) * P(u),
// where u = (1 - |x|)/2.
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let mut r_hi;
let mut r_lo;
if x.is_sign_positive() {
r_hi = r0.hi;
r_lo = r0.lo;
} else {
let r = DoubleDouble::from_exact_add(PI.hi, -r0.hi);
r_hi = r.hi;
r_lo = (PI.lo - r0.lo) + r.lo;
}
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r_hi), INV_PI_DD);
r_hi = p.hi;
r_lo = p.lo;
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let m_v_p = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + m_v_p;
m_v.sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
// Perform computations in Float128:
// acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1,
// = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5.
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
let p_f128 = asin_eval_dyadic(y_f128, idx);
let mut r_f128 = m_v * p_f128;
if x.is_sign_negative() {
const PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
r_f128 = PI_F128 + r_f128;
}
r_f128 = r_f128.quick_mul(&INV_PI_F128);
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn acospi_test() {
assert_eq!(f_acospi(0.5), 0.3333333333333333);
assert!(f_acospi(1.5).is_nan());
}
}

98
vendor/pxfm/src/acospif.rs vendored Normal file
View File

@@ -0,0 +1,98 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asinpif::ASINCOSF_PI_TABLE;
use crate::common::{dd_fmla, f_fmla};
/// Computes acos(x)/PI
///
/// Max ULP 0.5
#[inline]
pub fn f_acospif(x: f32) -> f32 {
let ax = x.abs();
let az = ax as f64;
let z = x as f64;
let t: u32 = x.to_bits();
let e: i32 = ((t >> 23) & 0xff) as i32;
if e >= 127 {
if x == 1.0 {
return 0.0;
}
if x == -1.0 {
return 1.0;
}
if e == 0xff && (t.wrapping_shl(9)) != 0 {
return x + x;
} // nan
return f32::NAN;
}
let s: i32 = 146i32.wrapping_sub(e);
let mut i = 0i32;
if s < 32 {
i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
}
let c = ASINCOSF_PI_TABLE[i as usize & 15];
let z2 = z * z;
let z4 = z2 * z2;
if i == 0 {
let mut c0 = f_fmla(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 += c2 * z4;
c4 += c6 * z4;
/* For |x| <= 0x1.0fd288p-127, c0 += c4*(z4*z4) would raise a spurious
underflow exception, we use an FMA instead, where c4 * z4 does not
underflow. */
c0 = dd_fmla(c4 * z4, z4, c0);
f_fmla(-z, c0, 0.5) as f32
} else {
let f = (1. - az).sqrt();
let mut c0 = f_fmla(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 += c2 * z2;
c4 += c6 * z2;
c0 += c4 * z4;
static SIGN: [f64; 2] = [0., 1.];
let r = SIGN[(t >> 31) as usize] + c0 * f64::copysign(f, x as f64);
r as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_acospif() {
assert_eq!(f_acospif(0.0), 0.5);
assert_eq!(f_acospif(0.5), 0.33333334);
assert_eq!(f_acospif(1.0), 0.0);
}
}

490
vendor/pxfm/src/asin.rs vendored Normal file
View File

@@ -0,0 +1,490 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
static ASIN_COEFFS: [[u64; 12]; 9] = [
[
0x3ff0000000000000,
0x0000000000000000,
0x3fc5555555555555,
0x3c65555555555555,
0x3fb3333333333333,
0x3fa6db6db6db6db7,
0x3f9f1c71c71c71c7,
0x3f96e8ba2e8ba2e9,
0x3f91c4ec4ec4ec4f,
0x3f8c99999999999a,
0x3f87a87878787878,
0x3f83fde50d79435e,
],
[
0x3ff015a397cf0f1c,
0xbc8eebd6ccfe3ee3,
0x3fc5f3581be7b08b,
0xbc65df80d0e7237d,
0x3fb4519ddf1ae530,
0x3fa8eb4b6eeb1696,
0x3fa17bc85420fec8,
0x3f9a8e39b5dcad81,
0x3f953f8df127539b,
0x3f91a485a0b0130a,
0x3f8e20e6e4930020,
0x3f8a466a7030f4c9,
],
[
0x3ff02be9ce0b87cd,
0x3c7e5d09da2e0f04,
0x3fc69ab5325bc359,
0xbc692f480cfede2d,
0x3fb58a4c3097aab1,
0x3fab3db36068dd80,
0x3fa3b94821846250,
0x3f9eedc823765d21,
0x3f998e35d756be6b,
0x3f95ea4f1b32731a,
0x3f9355115764148e,
0x3f916a5853847c91,
],
[
0x3ff042dc6a65ffbf,
0xbc8c7ea28dce95d1,
0x3fc74c4bd7412f9d,
0x3c5447024c0a3c87,
0x3fb6e09c6d2b72b9,
0x3faddd9dcdae5315,
0x3fa656f1f64058b8,
0x3fa21a42e4437101,
0x3f9eed0350b7edb2,
0x3f9b6bc877e58c52,
0x3f9903a0872eb2a4,
0x3f974da839ddd6d8,
],
[
0x3ff05a8621feb16b,
0xbc7e5b33b1407c5f,
0x3fc809186c2e57dd,
0xbc33dcb4d6069407,
0x3fb8587d99442dc5,
0x3fb06c23d1e75be3,
0x3fa969024051c67d,
0x3fa54e4f934aacfd,
0x3fa2d60a732dbc9c,
0x3fa149f0c046eac7,
0x3fa053a56dba1fba,
0x3f9f7face3343992,
],
[
0x3ff072f2b6f1e601,
0xbc92dcbb05419970,
0x3fc8d2397127aeba,
0x3c6ead0c497955fb,
0x3fb9f68df88da518,
0x3fb21ee26a5900d7,
0x3fad08e7081b53a9,
0x3fa938dd661713f7,
0x3fa71b9f299b72e6,
0x3fa5fbc7d2450527,
0x3fa58573247ec325,
0x3fa585a174a6a4ce,
],
[
0x3ff08c2f1d638e4c,
0x3c7b47c159534a3d,
0x3fc9a8f592078624,
0xbc6ea339145b65cd,
0x3fbbc04165b57aab,
0x3fb410df5f58441d,
0x3fb0ab6bdf5f8f70,
0x3fae0b92eea1fce1,
0x3fac9094e443a971,
0x3fac34651d64bc74,
0x3facaa008d1af080,
0x3fadc165bc0c4fc5,
],
[
0x3ff0a649a73e61f2,
0x3c874ac0d817e9c7,
0x3fca8ec30dc93890,
0xbc48ab1c0eef300c,
0x3fbdbc11ea95061b,
0x3fb64e371d661328,
0x3fb33e0023b3d895,
0x3fb2042269c243ce,
0x3fb1cce74bda2230,
0x3fb244d425572ce9,
0x3fb34d475c7f1e3e,
0x3fb4d4e653082ad3,
],
[
0x3ff0c152382d7366,
0xbc9ee6913347c2a6,
0x3fcb8550d62bfb6d,
0xbc6d10aec3f116d5,
0x3fbff1bde0fa3ca0,
0x3fb8e5f3ab69f6a4,
0x3fb656be8b6527ce,
0x3fb5c39755dc041a,
0x3fb661e6ebd40599,
0x3fb7ea3dddee2a4f,
0x3fba4f439abb4869,
0x3fbd9181c0fda658,
],
];
#[inline]
pub(crate) fn asin_eval(u: DoubleDouble, err: f64) -> (DoubleDouble, f64) {
// k = round(u * 32).
let k = (u.hi * f64::from_bits(0x4040000000000000)).round_finite();
let idx = k as u64;
// y = u - k/32.
let y_hi = f_fmla(k, f64::from_bits(0xbfa0000000000000), u.hi); // Exact
let y = DoubleDouble::from_exact_add(y_hi, u.lo);
let y2 = y.hi * y.hi;
// Add double-double errors in addition to the relative errors from y2.
let err = f_fmla(err, y2, f64::from_bits(0x3990000000000000));
let coeffs = ASIN_COEFFS[idx as usize];
let c0 = DoubleDouble::quick_mult(
y,
DoubleDouble::new(f64::from_bits(coeffs[3]), f64::from_bits(coeffs[2])),
);
let c1 = f_fmla(y.hi, f64::from_bits(coeffs[5]), f64::from_bits(coeffs[4]));
let c2 = f_fmla(y.hi, f64::from_bits(coeffs[7]), f64::from_bits(coeffs[6]));
let c3 = f_fmla(y.hi, f64::from_bits(coeffs[9]), f64::from_bits(coeffs[8]));
let c4 = f_fmla(y.hi, f64::from_bits(coeffs[11]), f64::from_bits(coeffs[10]));
let y4 = y2 * y2;
let d0 = f_fmla(y2, c2, c1);
let d1 = f_fmla(y2, c4, c3);
let mut r = DoubleDouble::from_exact_add(f64::from_bits(coeffs[0]), c0.hi);
let e1 = f_fmla(y4, d1, d0);
r.lo = f_fmla(y2, e1, f64::from_bits(coeffs[1]) + c0.lo + r.lo);
(r, err)
}
/// Computes asin(x)
///
/// Max found ULP 0.5
pub fn f_asin(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-26.
if x_e < E_BIAS - 26 {
// When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
// is:
// |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
// = x^2 / 6
// < 2^-54
// < epsilon(1)/2.
// = x otherwise. ,
if x.abs() == 0. {
return x;
}
// Get sign(x) * min_normal.
let eps = f64::copysign(f64::MIN_POSITIVE, x);
let normalize_const = if x_e == 0 { eps } else { 0.0 };
let scaled_normal =
f_fmla(x + normalize_const, f64::from_bits(0x4350000000000000), eps);
return f_fmla(
scaled_normal,
f64::from_bits(0x3c90000000000000),
-normalize_const,
);
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
let r_lo = f_fmla(x, p.lo, r0.lo);
let r_upper = r0.hi + (r_lo + err);
let r_lower = r0.hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * (f64::from_bits(0xbf90000000000000)),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
let r = x_f128.quick_mul(&p_f128);
return r.fast_as_f64();
}
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// return +- pi/2
return f_fmla(x_sign, PI_OVER_TWO.hi, x_sign * PI_OVER_TWO.lo);
}
// |x| > 1, return NaN.
if x.is_nan() {
return x;
}
return f64::NAN;
}
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
// Then,
// asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let r = DoubleDouble::from_exact_add(PI_OVER_TWO.hi, -r0.hi);
let r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
let (r_upper, r_lower);
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
r_upper = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, err));
r_lower = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, -err));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let r_lo = r_lo * x_sign;
let r_hi = r.hi * x_sign;
r_upper = r_hi + (r_lo + err);
r_lower = r.hi + (r_lo - err);
}
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
// vll = 2*v_ll = -vl * (h / (4u)).
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let mv0 = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + mv0;
m_v.sign = DyadicSign::Neg;
// Perform computations in Float128:
// asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let p_f128 = asin_eval_dyadic(y_f128, idx);
let r0_f128 = m_v.quick_mul(&p_f128);
let mut r_f128 = PI_OVER_TWO_F128.quick_add(&r0_f128);
if x.is_sign_negative() {
r_f128.sign = DyadicSign::Neg;
}
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_asin_test() {
assert_eq!(f_asin(-0.4), -0.41151684606748806);
assert_eq!(f_asin(-0.8), -0.9272952180016123);
assert_eq!(f_asin(0.3), 0.3046926540153975);
assert_eq!(f_asin(0.6), 0.6435011087932844);
}
}

1478
vendor/pxfm/src/asin_eval_dyadic.rs vendored Normal file

File diff suppressed because it is too large Load Diff

165
vendor/pxfm/src/asinf.rs vendored Normal file
View File

@@ -0,0 +1,165 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acosf::poly12;
use crate::common::{dd_fmlaf, f_fmla};
#[cold]
fn as_special(x: f32) -> f32 {
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax > (0xffu32 << 24) {
return x + x;
} // nan
f32::NAN
}
/// Computes asin
///
/// Max found ULP 0.49999928
#[inline]
pub fn f_asinf(x: f32) -> f32 {
const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
let xs = x as f64;
let mut r;
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax > 0x7f << 24 {
return as_special(x);
}
if ax < 0x7ec29000u32 {
// |x| < 1.49029
if ax < 115 << 24 {
// |x| < 0.000244141
return dd_fmlaf(x, f64::from_bits(0x3e60000000000000) as f32, x);
}
const B: [u64; 16] = [
0x3ff0000000000005,
0x3fc55557aeca105d,
0x3fb3314ec3db7d12,
0x3fa775738a5a6f92,
0x3f75d5f7ce1c8538,
0x3fd605c6d58740f0,
0xc005728b732d73c6,
0x402f152170f151eb,
0xc04f962ea3ca992e,
0x40671971e17375a0,
0xc07860512b4ba230,
0x40826a3b8d4bdb14,
0xc0836f2ea5698b51,
0x407b3d722aebfa2e,
0xc066cf89703b1289,
0x4041518af6a65e2d,
];
let z = xs;
let z2 = z * z;
let w0 = f_fmla(z2, f64::from_bits(B[1]), f64::from_bits(B[0]));
let w1 = f_fmla(z2, f64::from_bits(B[3]), f64::from_bits(B[2]));
let w2 = f_fmla(z2, f64::from_bits(B[5]), f64::from_bits(B[4]));
let w3 = f_fmla(z2, f64::from_bits(B[7]), f64::from_bits(B[6]));
let w4 = f_fmla(z2, f64::from_bits(B[9]), f64::from_bits(B[8]));
let w5 = f_fmla(z2, f64::from_bits(B[11]), f64::from_bits(B[10]));
let w6 = f_fmla(z2, f64::from_bits(B[13]), f64::from_bits(B[12]));
let w7 = f_fmla(z2, f64::from_bits(B[15]), f64::from_bits(B[14]));
let z4 = z2 * z2;
let z8 = z4 * z4;
let z16 = z8 * z8;
r = z
* ((f_fmla(z4, w1, w0) + z8 * f_fmla(z4, w3, w2))
+ z16 * (f_fmla(z4, w5, w4) + z8 * f_fmla(z4, w7, w6)));
let ub = r;
let lb = r - z * f64::from_bits(0x3e0efa8eb0000000);
// Ziv's accuracy test
if ub == lb {
return ub as f32;
}
}
if ax < (0x7eu32 << 24) {
const C: [u64; 12] = [
0x3fc555555555529c,
0x3fb333333337e0dd,
0x3fa6db6db3b4465e,
0x3f9f1c72e13ac306,
0x3f96e89cebe06bc4,
0x3f91c6dcf5289094,
0x3f8c6dbbcc7c6315,
0x3f88f8dc2615e996,
0x3f7a5833b7bf15e8,
0x3f943f44ace1665c,
0xbf90fb17df881c73,
0x3fa07520c026b2d6,
];
let z = xs;
let z2 = z * z;
let c0 = poly12(z2, C);
r = z + (z * z2) * c0;
} else {
if ax == 0x7e55688au32 {
return f32::copysign(f64::from_bits(0x3fe75b8a20000000) as f32, x)
+ f32::copysign(f64::from_bits(0x3e50000000000000) as f32, x);
}
if ax == 0x7e107434u32 {
return f32::copysign(f64::from_bits(0x3fe1f4b640000000) as f32, x)
+ f32::copysign(f64::from_bits(0x3e50000000000000) as f32, x);
}
let bx = xs.abs();
let z = 1.0 - bx;
let s = z.sqrt();
const C: [u64; 12] = [
0x3ff6a09e667f3bcb,
0x3fbe2b7dddff2db9,
0x3f9b27247ab42dbc,
0x3f802995cc4e0744,
0x3f65ffb0276ec8ea,
0x3f5033885a928dec,
0x3f3911f2be23f8c7,
0x3f24c3c55d2437fd,
0x3f0af477e1d7b461,
0x3f0abd6bdff67dcb,
0xbef1717e86d0fa28,
0x3ef6ff526de46023,
];
r = PI2 - s * poly12(z, C);
r = f64::copysign(r, xs);
}
r as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asinf() {
assert_eq!(f_asinf(-0.5), -std::f32::consts::FRAC_PI_6);
assert_eq!(f_asinf(0.5), std::f32::consts::FRAC_PI_6);
assert!(f_asinf(7.).is_nan());
}
}

370
vendor/pxfm/src/asinpi.rs vendored Normal file
View File

@@ -0,0 +1,370 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acospi::INV_PI_DD;
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
/// Computes asin(x)/PI
///
/// Max found ULP 0.5
pub fn f_asinpi(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-26.
if x_e < E_BIAS - 26 {
// When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
// is:
// |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
// = x^2 / 6
// < 2^-54
// < epsilon(1)/2.
// = x otherwise. ,
if x.abs() == 0. {
return x;
}
if x_e < E_BIAS - 56 {
if (x_abs.to_bits().wrapping_shl(12)) == 0x59af9a1194efe000u64 {
let e = (x.to_bits() >> 52) & 0x7ff;
let h = f64::from_bits(0x3c7b824198b94a89);
let l = f64::from_bits(0x391fffffffffffff);
let mut t = (if x > 0. { 1.0f64 } else { -1.0f64 }).to_bits();
t = t.wrapping_sub(0x3c9u64.wrapping_sub(e).wrapping_shl(52));
return f_fmla(l, f64::from_bits(t), h * f64::from_bits(t));
}
let h = x * INV_PI_DD.hi;
let sx = x * f64::from_bits(0x4690000000000000); /* scale x */
let mut l = dd_fmla(sx, INV_PI_DD.hi, -h * f64::from_bits(0x4690000000000000));
l = dd_fmla(sx, INV_PI_DD.lo, l);
/* scale back */
let res = dyad_fmla(l, f64::from_bits(0x3950000000000000), h);
return res;
}
/* We use the Sollya polynomial 0x1.45f306dc9c882a53f84eafa3ea4p-2 * x
+ 0x1.b2995e7b7b606p-5 * x^3, with relative error bounded by 2^-106.965
on [2^-53, 2^-26] */
const C1H: f64 = f64::from_bits(0x3fd45f306dc9c883);
const C1L: f64 = f64::from_bits(0xbc76b01ec5417057);
const C3: f64 = f64::from_bits(0x3fab2995e7b7b606);
let h = C1H;
let l = dd_fmla(C3, x * x, C1L);
/* multiply h+l by x */
let hh = h * x;
let mut ll = dd_fmla(h, x, -hh);
/* hh+ll = h*x */
ll = dd_fmla(l, x, ll);
return hh + ll;
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
let mut r0 = DoubleDouble::from_exact_mult(x, p.hi);
let mut r_lo = f_fmla(x, p.lo, r0.lo);
r0 = DoubleDouble::mult(DoubleDouble::new(r_lo, r0.hi), INV_PI_DD);
r_lo = r0.lo;
let r_upper = r0.hi + (r_lo + err);
let r_lower = r0.hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * (f64::from_bits(0xbf90000000000000)),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
let mut r = x_f128.quick_mul(&p_f128);
r = r.quick_mul(&crate::acospi::INV_PI_F128);
return r.fast_as_f64();
}
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// return +- pi/2
return x * 0.5; // asinpi_specific
}
// |x| > 1, return NaN.
if x.is_nan() {
return x;
}
return f64::NAN;
}
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
// Then,
// asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let mut r = DoubleDouble::from_exact_add(PI_OVER_TWO.hi, -r0.hi);
let mut r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r.hi), INV_PI_DD);
r_lo = p.lo;
r.hi = p.hi;
let (r_upper, r_lower);
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
r_upper = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, err));
r_lower = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, -err));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let r_lo = r_lo * x_sign;
let r_hi = r.hi * x_sign;
r_upper = r_hi + (r_lo + err);
r_lower = r.hi + (r_lo - err);
}
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
// vll = 2*v_ll = -vl * (h / (4u)).
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let mv0 = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + mv0;
m_v.sign = DyadicSign::Neg;
// Perform computations in Float128:
// asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let p_f128 = asin_eval_dyadic(y_f128, idx);
let r0_f128 = m_v * p_f128;
let mut r_f128 = PI_OVER_TWO_F128 + r0_f128;
if x.is_sign_negative() {
r_f128.sign = DyadicSign::Neg;
}
r_f128 = r_f128.quick_mul(&crate::acospi::INV_PI_F128);
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_asinpi_test() {
assert_eq!(
f_asinpi(-0.00000000032681723993732703),
-0.00000000010402915844735117
);
assert_eq!(f_asinpi(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017801371778309684), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005666352624669099);
assert_eq!(f_asinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000026752519513526076), 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008515591441480124);
assert_eq!(f_asinpi(-0.4), -0.13098988043445461);
assert_eq!(f_asinpi(-0.8), -0.2951672353008666);
assert_eq!(f_asinpi(0.4332432142124432), 0.14263088583055605);
assert_eq!(f_asinpi(0.8543543534343434), 0.326047108714517);
assert_eq!(f_asinpi(0.00323146509843243), 0.0010286090778797426);
}
}

262
vendor/pxfm/src/asinpif.rs vendored Normal file
View File

@@ -0,0 +1,262 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
pub(crate) static ASINCOSF_PI_TABLE: [[u64; 8]; 16] = [
[
0x3fd45f306dc9c882,
0x3fab2995e7b7dc2f,
0x3f98723a1cf50c7e,
0x3f8d1a4591d16a29,
0x3f83ce3aa68ddaee,
0x3f7d3182ab0cc1bf,
0x3f762b379a8b88e3,
0x3f76811411fcfec2,
],
[
0x3fdffffffffd3cda,
0xbfb17cc1b3355fdd,
0x3f9d067a1e8d5a99,
0xbf908e16fb09314a,
0x3f85eed43d42dcb2,
0xbf7f58baca7acc71,
0x3f75dab64e2dcf15,
0xbf659270e30797ac,
],
[
0x3fdfffffff7c4617,
0xbfb17cc149ded3a2,
0x3f9d0654d4cb2c1a,
0xbf908c3ba713d33a,
0x3f85d2053481079c,
0xbf7e485ebc545e7e,
0x3f7303baca167ddd,
0xbf5dee8d16d06b38,
],
[
0x3fdffffffa749848,
0xbfb17cbe71559350,
0x3f9d05a312269adf,
0xbf90862b3ee617d7,
0x3f85920708db2a73,
0xbf7cb0463b3862c3,
0x3f702b82478f95d7,
0xbf552a7b8579e729,
],
[
0x3fdfffffe1f92bb5,
0xbfb17cb3e74c64e3,
0x3f9d03af67311cbf,
0xbf9079441cbfc7a0,
0x3f852b4287805a61,
0xbf7ac3286d604a98,
0x3f6b2f1210d9701b,
0xbf4e740ddc25afd6,
],
[
0x3fdfffff92beb6e2,
0xbfb17c986fe9518b,
0x3f9cff98167c9a5e,
0xbf90638b591eae52,
0x3f84a0803828959e,
0xbf78adeca229f11d,
0x3f66b9a7ba05dfce,
0xbf4640521a43b2d0,
],
[
0x3fdffffeccee5bfc,
0xbfb17c5f1753f5ea,
0x3f9cf874e4fe258f,
0xbf9043e6cf77b256,
0x3f83f7db42227d92,
0xbf7691a6fa2a2882,
0x3f62f6543162bc61,
0xbf407d5da05822b6,
],
[
0x3fdffffd2f64431d,
0xbfb17bf8208c10c1,
0x3f9ced7487cdb124,
0xbf901a0d30932905,
0x3f83388f99b254da,
0xbf74844e245c65bd,
0x3f5fa777150197c6,
0xbf38c1ecf16a05c8,
],
[
0x3fdffffa36d1712e,
0xbfb17b523971bd4e,
0x3f9cddee26de2dee,
0xbf8fccb00abaaabc,
0x3f8269afc3622342,
0xbf72933152686752,
0x3f5a76d4956cc9a3,
0xbf32ce7d6dc651ce,
],
[
0x3fdffff5402ab3a1,
0xbfb17a5ba85da77a,
0x3f9cc96894e05c02,
0xbf8f532143cb832e,
0x3f819180b660ff09,
0xbf70c57417a78b3c,
0x3f562e26cbd7bb1e,
0xbf2ce28d33fe1df3,
],
[
0x3fdfffed8d639751,
0xbfb1790349f3ae76,
0x3f9caf9a4fd1b398,
0xbf8ec986b111342e,
0x3f80b53c3ad4baa4,
0xbf6e3c2282eeace4,
0x3f52a55369f55bbe,
0xbf2667fe48c396e8,
],
[
0x3fdfffe24b714161,
0xbfb177394fbcb719,
0x3f9c90652d920ebd,
0xbf8e3239197bddf1,
0x3f7fb2188525b025,
0xbf6b3aadd451afc7,
0x3f4f74020f31fdab,
0xbf218b0cb246768d,
],
[
0x3fdfffd298bec9e2,
0xbfb174efbfd34648,
0x3f9c6bcfe48ea92b,
0xbf8d8f9f2a16157c,
0x3f7e0044f56c8864,
0xbf6883e2347fe76c,
0x3f4a9f0e3c1b7af5,
0xbf1bb5acc0e60825,
],
[
0x3fdfffbd8b784c4d,
0xbfb1721abdd3722e,
0x3f9c41fee756d4b0,
0xbf8ce40bccf8065f,
0x3f7c59b684b70ef9,
0xbf66133d027996b3,
0x3f469cad01106397,
0xbf160f8e45494156,
],
[
0x3fdfffa23749cf88,
0xbfb16eb0a8285c06,
0x3f9c132d762e1b0d,
0xbf8c31a959398f4e,
0x3f7ac1c5b46bc8a0,
0xbf63e34f1abe51dc,
0x3f4346738737c0b9,
0xbf11b227a3f5c750,
],
[
0x3fdfff7fb25bb407,
0xbfb16aaa14d75640,
0x3f9bdfa75fca5ff2,
0xbf8b7a6e260d079c,
0x3f793ab06911033c,
0xbf61ee5560967fd5,
0x3f407d31060838bf,
0xbf0c96f33a283115,
],
];
/// Computes asin(x)/PI
///
/// Max ULP 0.5
#[inline]
pub fn f_asinpif(x: f32) -> f32 {
let ax = x.abs();
let az = ax as f64;
let z = x as f64;
let t = x.to_bits();
let e: i32 = ((t >> 23) & 0xff) as i32;
if e >= 127 {
// |x| >= 1 or nan
if ax == 1.0 {
return f32::copysign(0.5, x);
} // |x| = 1
if e == 0xff && (t.wrapping_shl(9)) != 0 {
return x + x;
} // x = nan
return f32::NAN; // |x| > 1
}
let s: i32 = 146i32.wrapping_sub(e);
let mut i = 0i32;
// s<32 corresponds to |x| >= 2^-12
if s < 32 {
i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
}
let z2 = z * z;
let z4 = z2 * z2;
let c = ASINCOSF_PI_TABLE[i as usize & 15];
if i == 0 {
// |x| < 2^-4
let mut c0 = f_fmla(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 = f_fmla(c2, z4, c0);
c4 = f_fmla(c6, z4, c4);
c0 += c4 * (z4 * z4);
(z * c0) as f32
} else {
// |x| >= 2^-4
let f = (1. - az).sqrt();
let mut c0 = f_fmla(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 = f_fmla(c2, z2, c0);
c4 = f_fmla(c6, z2, c4);
c0 += c4 * z4;
let r = f_fmla(
-c0,
f64::copysign(f, x as f64),
f64::copysign(0.5, x as f64),
);
r as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asinpif() {
assert_eq!(f_asinpif(0.0), 0.);
assert_eq!(f_asinpif(0.5), 0.16666667);
assert!(f_asinpif(1.5).is_nan());
}
}

292
vendor/pxfm/src/bessel/alpha0.rs vendored Normal file
View File

@@ -0,0 +1,292 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
//
/// See [bessel_0_asympt_alpha] for the info
pub(crate) fn bessel_0_asympt_alpha_hard(reciprocal: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 18] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x85555555_55555555_55555555_55555555_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xd6999999_99999999_99999999_9999999a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0xd1ac2492_49249249_24924924_92492492_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -123,
mantissa: 0xbbcd0fc7_1c71c71c_71c71c71_c71c71c7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x85e8fe45_8ba2e8ba_2e8ba2e8_ba2e8ba3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0x8b5a8f33_63c4ec4e_c4ec4ec4_ec4ec4ec_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xc7661d79_9d59b555_55555555_55555555_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xbbced715_c2897a28_78787878_78787878_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -96,
mantissa: 0xe14b19b4_aae3f7fe_be1af286_bca1af28_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xa7af7341_db2192db_975e0c30_c30c30c3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -82,
mantissa: 0x97a8f676_b349f6fc_5cefd338_590b2164_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -75,
mantissa: 0xa3d299fb_6f304d73_86e15f12_0fd70a3d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -68,
mantissa: 0xd050b737_cbc044ef_e8807e3c_87f43da1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -60,
mantissa: 0x9a02379b_daa7e492_854f42de_6d3dffe6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -52,
mantissa: 0x83011a39_380e467d_de6b70ec_b92ce0cc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -45,
mantissa: 0xfe16521f_c79e5d9a_a5bed653_e3844e9a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -36,
mantissa: 0x8b54b13d_3fb3e1c4_15dbb880_0bb32218_u128,
},
];
let x2 = reciprocal * reciprocal;
let mut p = C[17];
for i in (0..17).rev() {
p = x2 * p + C[i];
}
p * reciprocal
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_alpha(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0x3fc0000000000000),
(0x3c55555555555555, 0xbfb0aaaaaaaaaaab),
(0x3c5999999999999a, 0x3fcad33333333333),
(0xbc92492492492492, 0xbffa358492492492),
(0xbcbc71c71c71c71c, 0x403779a1f8e38e39),
(0xbd0745d1745d1746, 0xc080bd1fc8b1745d),
(0xbd7d89d89d89d89e, 0x40d16b51e66c789e),
(0x3dc5555555555555, 0xc128ecc3af33ab37),
(0x3e2143c3c3c3c3c4, 0x418779dae2b8512f),
(0x3df41e50d79435e5, 0xc1ec296336955c7f),
(0x3ef6dcbaf0618618, 0x4254f5ee683b6432),
(0x3f503a3102cc7a6f, 0xc2c2f51eced6693f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[11]),
DoubleDouble::from_bit_pair(C[10]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[9]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[8]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
let z = DoubleDouble::quick_mult(p, recip);
DoubleDouble::from_exact_add(z.hi, z.lo)
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_alpha_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 12] = [
0x3fc0000000000000,
0xbfb0aaaaaaaaaaab,
0x3fcad33333333333,
0xbffa358492492492,
0x403779a1f8e38e39,
0xc080bd1fc8b1745d,
0x40d16b51e66c789e,
0xc128ecc3af33ab37,
0x418779dae2b8512f,
0xc1ec296336955c7f,
0x4254f5ee683b6432,
0xc2c2f51eced6693f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[2]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[0]));
DoubleDouble::quick_mult(z, recip)
}

296
vendor/pxfm/src/bessel/alpha1.rs vendored Normal file
View File

@@ -0,0 +1,296 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_alpha_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 12] = [
0xbfd8000000000000,
0x3fc5000000000000,
0xbfd7bccccccccccd,
0x4002f486db6db6db,
0xc03e9fbf40000000,
0x4084997b55945d17,
0xc0d4a914195269d9,
0x412cd1b53816aec1,
0xc18aa4095d419351,
0x41ef809305f11b9d,
0xc2572e6809ed618b,
0x42c4c5b6057839f9,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[2]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[0]));
DoubleDouble::quick_mult(z, recip)
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_alpha(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0xbfd8000000000000),
(0x0000000000000000, 0x3fc5000000000000),
(0x3c6999999999999a, 0xbfd7bccccccccccd),
(0x3cab6db6db6db6db, 0x4002f486db6db6db),
(0x0000000000000000, 0xc03e9fbf40000000),
(0x3d21745d1745d174, 0x4084997b55945d17),
(0x3d789d89d89d89d9, 0xc0d4a914195269d9),
(0xbdb999999999999a, 0x412cd1b53816aec1),
(0xbdfe5a5a5a5a5a5a, 0xc18aa4095d419351),
(0x3e7e0ca50d79435e, 0x41ef809305f11b9d),
(0xbedff8b720000000, 0xc2572e6809ed618b),
(0xbf64e5d8ae68b7a7, 0x42c4c5b6057839f9),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[11]),
DoubleDouble::from_bit_pair(C[10]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[9]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[8]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
let z = DoubleDouble::quick_mult(p, recip);
DoubleDouble::from_exact_add(z.hi, z.lo)
}
//
/// See [bessel_1_asympt_alpha] for the info
pub(crate) fn bessel_1_asympt_alpha_hard(reciprocal: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 18] = [
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0xc0000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xa8000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0xbde66666_66666666_66666666_66666666_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x97a436db_6db6db6d_b6db6db6_db6db6db_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xf4fdfa00_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa4cbdaac_a2e8ba2e_8ba2e8ba_2e8ba2e9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -113,
mantissa: 0xa548a0ca_934ec4ec_4ec4ec4e_c4ec4ec5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0xe68da9c0_b5760666_66666666_66666666_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -102,
mantissa: 0xd5204aea_0c9a8879_69696969_69696969_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xfc04982f_88dce9e0_ca50d794_35e50d79_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xb973404f_6b0c58ff_c5b90000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -82,
mantissa: 0xa62db02b_c1cfc563_44ea32e9_0b21642d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -75,
mantissa: 0xb220e7ff_443c1584_7e85f4e0_55eb851f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -68,
mantissa: 0xe10a255c_ca5e68cc_00c2d6c0_acdc8000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -60,
mantissa: 0xa573790c_5186f23b_5db502ea_d9fa5432_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -52,
mantissa: 0x8c0ffedc_407a7015_453df84e_9c3f1d39_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -44,
mantissa: 0x874226ed_c298a17a_d8c49a4e_dc9281a5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -36,
mantissa: 0x93cab36c_9ab9495c_310fa9cd_4b065359_u128,
},
];
let x2 = reciprocal * reciprocal;
let mut p = C[17];
for i in (0..17).rev() {
p = x2 * p + C[i];
}
p * reciprocal
}

157
vendor/pxfm/src/bessel/bessel_exp.rs vendored Normal file
View File

@@ -0,0 +1,157 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1};
use crate::round::RoundFinite;
#[inline(always)]
fn exp_poly(z: f64) -> DoubleDouble {
/* The following is a degree-4 polynomial generated by Sollya for exp(x)
over [-2^-12.905,2^-12.905]
with absolute error < 2^-74.34 (see sollya/Q_1.sollya). */
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555997996,
0x3fa5555555849d8d,
];
let mut q = dd_fmla(f64::from_bits(Q_1[4]), z, f64::from_bits(Q_1[3]));
q = dd_fmla(q, z, f64::from_bits(Q_1[2]));
let h0 = dd_fmla(q, z, f64::from_bits(Q_1[1]));
let v1 = DoubleDouble::from_exact_mult(z, h0);
DoubleDouble::f64_add(f64::from_bits(Q_1[0]), v1)
}
#[inline]
pub(crate) fn i0_exp(r: f64) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r * INVLOG2).round_finite();
const LOG_2E: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
let zh = f_fmla(LOG_2E.lo, k, f_fmla(-LOG_2E.hi, k, r));
let bk = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly(zh);
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = f64::from_bits(du).to_bits();
DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}
#[inline(always)]
fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
// Generated by Sollya:
// d = [-2^-12.905,2^-12.905];
// f = exp(x);
// w = 1;
// p = remez(f, 6, d, w);
// pf = fpminimax(f, [|0,1,2,3,4,5,6|], [|1, 107...|], d, absolute, floating, 0, p);
// err_p = -log2(dirtyinfnorm(pf*w-f, d));
// display = decimal;
const Q_1: [(u64, u64); 7] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3a20e40000000000, 0x3ff0000000000000),
(0x3a04820000000000, 0x3fe0000000000000),
(0xbc756423c5338a66, 0x3fc5555555555556),
(0xbc5560f74db5556c, 0x3fa5555555555556),
(0x3c3648eca89bc6ac, 0x3f8111111144fbee),
(0xbbd53d924ae90c8c, 0x3f56c16c16ffeecc),
];
let mut p = DoubleDouble::quick_mul_add(
z,
DoubleDouble::from_bit_pair(Q_1[6]),
DoubleDouble::from_bit_pair(Q_1[5]),
);
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[4]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[3]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[2]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[1]));
DoubleDouble::quick_mul_add_f64(z, p, f64::from_bits(0x3ff0000000000000))
}
#[cold]
pub(crate) fn i0_exp_accurate(r: f64) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r * INVLOG2).round_finite();
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
let dx = f_fmla(-L2.hi, k, r);
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), k);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let bk = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly_dd(dz);
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = f64::from_bits(du).to_bits();
DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0() {
assert_eq!(i0_exp(0.5).to_f64(), 1.6487212707001282);
assert_eq!(i0_exp_accurate(0.5).to_f64(), 1.6487212707001282);
}
}

260
vendor/pxfm/src/bessel/beta0.rs vendored Normal file
View File

@@ -0,0 +1,260 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_beta(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0xbfb0000000000000),
(0x0000000000000000, 0x3fba800000000000),
(0x0000000000000000, 0xbfe15f0000000000),
(0x0000000000000000, 0x4017651180000000),
(0x0000000000000000, 0xc05ab8c13b800000),
(0x0000000000000000, 0x40a730492f262000),
(0x0000000000000000, 0xc0fc73a7acd696f0),
(0xbdf3a00000000000, 0x41577458dd9fce68),
(0xbe4ba6b000000000, 0xc1b903ab9b27e18f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[7].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[6].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[5].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[4].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[2].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
p
}
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_beta_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 10] = [
0x3ff0000000000000,
0xbfb0000000000000,
0x3fba800000000000,
0xbfe15f0000000000,
0x4017651180000000,
0xc05ab8c13b800000,
0x40a730492f262000,
0xc0fc73a7acd696f0,
0x41577458dd9fce68,
0xc1b903ab9b27e18f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
);
DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[0]))
}
/// see [bessel_0_asympt_beta] for more info
pub(crate) fn bessel_0_asympt_beta_hard(recip: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -131,
mantissa: 0xd4000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x8af80000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -125,
mantissa: 0xbb288c00_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0xd5c609dc_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xb9824979_31000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -111,
mantissa: 0xe39d3d66_b4b78000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -105,
mantissa: 0xbba2c6ec_fe733d8c_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xc81d5cd9_3f0c79ba_6b000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -92,
mantissa: 0x86118ddf_c1ffc100_0ee1b000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xdc7ccfa9_930b874d_52df3464_00000000_u128,
},
];
let x2 = recip * recip;
let mut p = C[11];
for i in (0..11).rev() {
p = x2 * p + C[i];
}
p
}

264
vendor/pxfm/src/bessel/beta1.rs vendored Normal file
View File

@@ -0,0 +1,264 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_beta_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 10] = [
0x3ff0000000000000,
0x3fc8000000000000,
0xbfc8c00000000000,
0x3fe9c50000000000,
0xc01ef5b680000000,
0x40609860dd400000,
0xc0abae9b7a06e000,
0x41008711d41c1428,
0xc15ab70164c8be6e,
0x41bc1055e24f297f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
);
DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[0]))
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_beta(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000), // 1
(0x0000000000000000, 0x3fc8000000000000), // 2
(0x0000000000000000, 0xbfc8c00000000000), // 3
(0x0000000000000000, 0x3fe9c50000000000), // 4
(0x0000000000000000, 0xc01ef5b680000000), // 5
(0x0000000000000000, 0x40609860dd400000), // 6
(0x0000000000000000, 0xc0abae9b7a06e000), // 7
(0x0000000000000000, 0x41008711d41c1428), // 8
(0xbdf7a00000000000, 0xc15ab70164c8be6e),
(0xbe40e1f000000000, 0x41bc1055e24f297f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[7].1)); // 8
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[6].1)); // 7
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[5].1)); // 6
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[4].1)); // 5
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1)); // 4
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[2].1)); // 3
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1)); // 2
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1)); // 1
p
}
/// see [bessel_1_asympt_beta] for more info
pub(crate) fn bessel_1_asympt_beta_hard(recip: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xc0000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -130,
mantissa: 0xc6000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xce280000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -125,
mantissa: 0xf7adb400_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x84c306ea_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -116,
mantissa: 0xdd74dbd0_37000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x84388ea0_e0a14000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xd5b80b26_45f372f4_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0xe082af12_794bf6f1_e1000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0x94a06149_f30146bc_fe8ed000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xf212edfc_42a62526_4fac2b0c_00000000_u128,
},
];
let x2 = recip * recip;
let mut p = C[11];
for i in (0..11).rev() {
p = x2 * p + C[i];
}
p
}

1118
vendor/pxfm/src/bessel/i0.rs vendored Normal file

File diff suppressed because it is too large Load Diff

834
vendor/pxfm/src/bessel/i0e.rs vendored Normal file
View File

@@ -0,0 +1,834 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::bessel_exp::i0_exp_accurate;
use crate::bessel::i0::{
bessel_rsqrt_hard, eval_small_hard_3p6_to_7p5, i0_0_to_3p6_dd, i0_0_to_3p6_hard,
i0_3p6_to_7p5_dd,
};
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes exp(-|x|)*I0(x)
pub fn f_i0e(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// |x| <= 2.2204460492503131e-24f64
return 1.;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I0(x)Exp[-x] ~ 1 - x + O(x^2)
return 1. - x;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x = NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if xb <= 0x4023000000000000u64 {
// |x| <= 9.5
if xb <= 0x400ccccccccccccdu64 {
// |x| <= 3.6
return i0e_0_to_3p6_exec(f64::from_bits(xb));
} else if xb <= 0x401e000000000000u64 {
// |x| <= 7.5
return i0e3p6_to_7p5(f64::from_bits(xb));
}
return i0e_7p5_to_9p5(f64::from_bits(xb));
}
i0e_asympt(f64::from_bits(xb))
}
/**
Computes I0 on interval [-7.5; -3.6], [3.6; 7.5]
**/
#[inline]
fn i0e3p6_to_7p5(x: f64) -> f64 {
let mut r = i0_3p6_to_7p5_dd(x);
let v_exp = i0_exp(-x);
r = DoubleDouble::quick_mult(r, v_exp);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c56a09e667f3bcd), // 2^-57.5
f64::from_bits(0x3c00000000000000), // 2^-63
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return ub;
}
let v = eval_small_hard_3p6_to_7p5(x);
let v_exp_accurate = i0_exp_accurate(-x);
DoubleDouble::quick_mult(v, v_exp_accurate).to_f64()
}
#[inline]
fn i0e_0_to_3p6_exec(x: f64) -> f64 {
let mut r = i0_0_to_3p6_dd(x);
let v_exp = i0_exp(-x);
r = DoubleDouble::quick_mult(r, v_exp);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-66
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return ub;
}
let v = i0_0_to_3p6_hard(x);
let v_exp_accurate = i0_exp_accurate(-x);
DoubleDouble::quick_mult(v, v_exp_accurate).to_f64()
}
/**
Mid-interval [7.5;9.5] generated by Wolfram:
I0(x)=R(1/x)/sqrt(x)*Exp(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},11,11},WorkingPrecision->120]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=den;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0e_7p5_to_9p5(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0x3c778e3de1f76f48, 0x3fd988450531281b),
(0xbcb572f6149f389e, 0xc01a786676fb4d3a),
(0x3cf2f373365347ed, 0x405c0e8405fdb642),
(0x3d276a94c8f1e627, 0xc0885e4718dfb761),
(0x3d569f8a993434e2, 0x40b756d52d5fa90c),
(0xbd6f953f7dd1a223, 0xc0c8818365c47790),
(0xbd74247967fbf7b2, 0x40e8cf89daf87353),
(0x3db449add7abb056, 0x41145d3c2d96d159),
(0xbdc5cc822b71f891, 0xc123694c58fd039b),
(0x3da2047ac1a6fba8, 0x415462e630bf3e7e),
(0xbdc2f2c06eda6a95, 0xc14c6984ebdd6792),
(0xbdf51fa85dafeca5, 0x4166a437c202d27b),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cde08e4cbf324d1, 0xc030b67bd69af0ca),
(0x3cec5e4ee7e77024, 0x4071b54c0f58409c),
(0xbd340e1131739e2f, 0xc09f140a738b14b3),
(0x3d607673189d6644, 0x40cdb44bd822add2),
(0xbd7793a4f1dd74d1, 0xc0e03fe2689b802d),
(0xbd8415501228a87e, 0x410009beafea72cc),
(0x3dcecdac2702661f, 0x4128c2073da9a447),
(0xbdd8386404f3dec5, 0xc1389ec7d7186bf4),
(0xbe06eb53a3e86436, 0x4168b7a2dc85ed0b),
(0x3e098e2cefaf8299, 0xc1604f8cf34af02c),
(0x3e1a5e496b547001, 0x41776b1e0153d1e9),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3bc0000000000000),
f64::from_bits(0x392bdb8cdadbe111),
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up != lb {
return i0e_7p5_to_9p5_hard(x);
}
r.to_f64()
}
/**
Mid-interval [7.5;9.5] generated by Wolfram Mathematica:
I0(x)=R(1/x)/sqrt(x)*Exp(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},13,13},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i0e_7p5_to_9p5_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 14] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc422a04_45cde144_75a3800b_45c38460_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xada66144_fcccc1a3_036f76b2_cabd6281_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xeabdda02_fa201d98_10e58d1f_7eb62bd7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -116,
mantissa: 0xbbfd3297_6f88a7df_5924587b_d5bdcdb8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xfca29453_efe393bf_1651627b_7d543875_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -110,
mantissa: 0xee7c7220_bbbd248e_bb6adac6_f9a5ce95_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -107,
mantissa: 0xc07455dd_830ba705_414408c6_06732a5a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xe2247793_b50cd0f0_80e8981d_933f75da_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0xe14a9831_82582a0b_dd27e8b6_4ed9aac2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0xa3b2ae2f_5b64f37e_c1538435_34f02faf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -100,
mantissa: 0xbab73503_5b7e38d9_bbe4a84b_9007c6e8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xa68911fc_5d87bbe7_0d4fe854_5c681ac5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0x9e997222_55ef4045_fa9f311d_57d082a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xbe93656a_b0a4f32d_3ebbfdeb_b1cbb839_u128,
},
];
static Q: [DyadicFloat128; 14] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xdaa34a7e_861dddff_a0642080_cd83dd65_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x93f05740_f4758772_bb9992f9_91e72795_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -115,
mantissa: 0xeddcb810_054c9aab_fa7e4214_d59d18b0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xa0180fcd_831ff6c0_ac2b8f02_37f3cfd1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0x97d25106_3b66907e_90b4f786_26daa0bb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -106,
mantissa: 0xf595ce38_aac16c11_001b874a_99603b45_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -103,
mantissa: 0x912b3715_4aca68f6_5821c2ed_43d77111_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -101,
mantissa: 0x90f97141_b896e2b6_38b87354_8945a43c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xd3e5f967_89097d6b_3a3060fe_852ff580_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0xf0d6de35_939da009_9ced21fd_48af7281_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xd2a0b183_6ac613b2_6745ce1d_8ed1c323_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -98,
mantissa: 0xbb9c089a_b7e939a2_732b3fb5_2e66cd77_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xcb2107c3_736bef81_609718c0_ba82cd8e_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[13];
for i in (0..13).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[13];
for i in (0..13).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64()
}
/**
I0(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},11,11},WorkingPrecision->120]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]]
```
**/
#[inline]
fn i0e_asympt(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc7ca19c5d824c54, 0x3fd9884533d43651),
(0x3cca32eb734e010e, 0xc03b7ca35caf42eb),
(0x3d03af8238d6f25e, 0x408b92cfcaa7070f),
(0xbd7a8ff7fdebed70, 0xc0d0a3be432cce93),
(0xbdababdb579bb076, 0x410a77dc51f1804d),
(0x3dc5e4e3c972832a, 0xc13cb0be2f74839c),
(0x3e01076f9b102e38, 0x41653b064cc61661),
(0xbe2157e700d445f4, 0xc184e1b076927460),
(0xbdfa4577156dde56, 0x41999e9653f9dc5f),
(0xbe47aa238a739ffe, 0xc1a130f6ded40c00),
(0xbe331b560b6fbf4a, 0x419317f11e674cae),
(0xbe0765596077d1e3, 0xc16024404db59d3f),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcf687703e843d07, 0xc051418f1c4dd0b9),
(0x3d468ab92cb87a0b, 0x40a15891d823e48d),
(0x3d8bfc17e5183376, 0xc0e4fce40dd82796),
(0xbdccbbcc2ecf8d4c, 0x4120beef869c61ec),
(0xbdf42170b4d5d150, 0xc1523ad18834a7ed),
(0xbe0eaa32f405afd4, 0x417b24ec57a8f480),
(0x3e3ec900705e7252, 0xc19af2a91d23d62e),
(0x3e3e220e274fa46b, 0x41b0cb905cc99ff5),
(0xbe46c6c61dee11f6, 0xc1b7452e50518520),
(0x3e3ed0fc063187bf, 0x41ac1772d1749896),
(0xbe11c578f04f4be1, 0xc180feb5b2ca47cb),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-65
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up != lb {
return i0e_asympt_hard(x);
}
lb
}
/**
I0(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},15,15},WorkingPrecision->120]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i0e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_7e5aad4a_70b749c4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0xabb4209d_ca11bdaa_186bef7f_390e6b77_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x8a2725e2_4749db25_625ad1f2_12a2a16c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -111,
mantissa: 0x8b4c2cd4_9e5d0c8b_c9be4d3e_781bb676_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -107,
mantissa: 0xc33fad7c_40599f7d_713e3081_6b5ad791_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -103,
mantissa: 0xc81da271_b623ba88_0be032b5_827d92fa_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0x99ec4975_b6aa7cae_7692a287_ed8ae47c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -96,
mantissa: 0xb3aa4745_fc2dd441_2dbd3e3c_d4539687_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -93,
mantissa: 0x9f14edc2_6882afca_29d2a067_dc459729_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -91,
mantissa: 0xd35c4d01_78d8cec6_fc8ae0ee_834da837_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xcdc529c7_6e082342_faad3073_07a9b61f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8ccac88f_2598c8a6_423b1f42_63591cb9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xfc044cfb_a20f0885_93d58660_17819ed5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0x813a700c_74d23f52_f81b179d_7ff0da9f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xe6c43da4_297216bf_bdd987cb_636906cf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -91,
mantissa: 0xa4998323_649c3cf2_64477869_3d2c6afd_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0xd772d5fd_a7077638_6e007274_d83b013c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xad914ef0_451ced2e_515657ef_fc7eeb53_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -110,
mantissa: 0xaf41180c_dffe96e5_f192fa40_0b1bff1e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -106,
mantissa: 0xf60dc728_241f71fd_5b93e653_ccbedace_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -102,
mantissa: 0xfcaefef9_39cf96e7_3cb75a98_da5e9761_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -98,
mantissa: 0xc2d2c837_5789587a_13ef38c6_a24c3413_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -95,
mantissa: 0xe41725c3_51d14486_a650044e_e8588f7b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -92,
mantissa: 0xcabeed9b_5e2e888d_81d32b11_d289a624_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0x8764876d_11ad6607_8a8d5382_3ffe82d9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0x84c9f9e5_6a5f5034_ad2c8512_16cb1ba1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb7c1d143_a15d8aab_03a7fa3e_b7d07a36_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0xa78f8257_4658040f_7a1ad39c_91ea9483_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xb231e0ca_b729a404_44c38f52_be208507_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xae317981_42349081_8bc68b28_f69b8e49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xb451abd3_5cd79c6d_7e578164_32f16da1_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0e() {
assert_eq!(f_i0e(0.00000000000000000000000000052342), 1.0);
assert_eq!(f_i0e(f64::EPSILON), 0.9999999999999998);
assert_eq!(f_i0e(9.500000000005492,), 0.13125126081422883);
assert!(f_i0e(f64::NAN).is_nan());
assert_eq!(f_i0e(f64::INFINITY), 0.);
assert_eq!(f_i0e(f64::NEG_INFINITY), 0.);
assert_eq!(f_i0e(7.500000000788034), 0.14831583006929877);
assert_eq!(f_i0e(715.), 0.014922205745802662);
assert_eq!(f_i0e(12.), 0.11642622121344044);
assert_eq!(f_i0e(16.), 0.10054412736125203);
assert_eq!(f_i0e(18.432), 0.09357372647647);
assert_eq!(f_i0e(26.432), 0.07797212360059241);
assert_eq!(f_i0e(0.2), 0.8269385516343293);
assert_eq!(f_i0e(7.5), 0.14831583007739552);
assert_eq!(f_i0e(-1.5), 0.36743360905415834);
assert_eq!(i0e_asympt_hard(18.432), 0.09357372647647);
}
}

335
vendor/pxfm/src/bessel/i0ef.rs vendored Normal file
View File

@@ -0,0 +1,335 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{
f_estrin_polyeval5, f_estrin_polyeval7, f_estrin_polyeval8, f_polyeval6, f_polyeval10,
};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes exp(-|x|)*I0(x)
///
/// Max ULP 0.5
pub fn f_i0ef(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb <= 0x40f00000u32 {
// |x| <= 7.5
let core_expf = core_expf(-f32::from_bits(xb));
if xb < 0x3f800000u32 {
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I0(x) * exp(-x) ~ 1 - x + O(x^2)
return 1. - x;
}
// |x| < 1
return i0f_small(f32::from_bits(xb), core_expf);
} else if xb <= 0x40600000u32 {
// |x| <= 3.5
return i0ef_1_to_3p5(f32::from_bits(xb), core_expf);
} else if xb <= 0x40c00000u32 {
// |x| <= 6
return i0f_3p5_to_6(f32::from_bits(xb), core_expf);
}
return i0f_6_to_7p5(f32::from_bits(xb), core_expf);
}
i0ef_asympt(f32::from_bits(xb))
}
/**
How polynomial is obtained described at [i0f_1_to_7p5].
Computes I0(x) as follows:
I0(x) = 1 + (x/2)^2 * P(x)
This method valid only [0;1]
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i0f_small(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff000000000013a),
f64::from_bits(0x3fcffffffffc20b6),
f64::from_bits(0x3f9c71c71e6cd6a2),
f64::from_bits(0x3f5c71c65b0af15f),
f64::from_bits(0x3f1234796fceb081),
f64::from_bits(0x3ec0280faf31678c),
f64::from_bits(0x3e664fd494223545),
);
(f_fmla(p, eval_x, 1.) * v_exp) as f32
}
/**
Computes I0.
/// Valid only on interval [1;3.5]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{1,3.5},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0ef_1_to_3p5(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffffb69),
f64::from_bits(0x3fc9ed7bd9dc97a7),
f64::from_bits(0x3f915c14693c842e),
f64::from_bits(0x3f45c6dc6a719e42),
f64::from_bits(0x3eeacb79eba725f7),
f64::from_bits(0x3e7b51e2acfc4355),
);
let p_den = f_estrin_polyeval5(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa84a10988f28eb),
f64::from_bits(0x3f50f5599197a4be),
f64::from_bits(0xbeea420cf9b13b1b),
f64::from_bits(0x3e735d0c1eb6ed7d),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
// Valid only on interval [6;7]
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{6,7},7,6},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_6_to_7p5(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_estrin_polyeval8(
eval_x,
f64::from_bits(0x3fefffffffffff7d),
f64::from_bits(0x3fcb373b00569ccf),
f64::from_bits(0x3f939069c3363b81),
f64::from_bits(0x3f4c2095c90c66b3),
f64::from_bits(0x3ef6713f648413db),
f64::from_bits(0x3e947efa2f9936b4),
f64::from_bits(0x3e2486a182f49420),
f64::from_bits(0x3da213034a33de33),
);
let p_den = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa32313fea59d9e),
f64::from_bits(0x3f460594c2ec6706),
f64::from_bits(0xbedf725fb714690f),
f64::from_bits(0x3e6d9cb39b19555c),
f64::from_bits(0xbdf1900e3abcb7a6),
f64::from_bits(0x3d64a21a2ea78ef6),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
// Valid only on interval [3.5;6]
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{3.5,6},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_3p5_to_6(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffd9550),
f64::from_bits(0x3fc97e18ee033fb4),
f64::from_bits(0x3f90b3199079bce1),
f64::from_bits(0x3f442c300a425372),
f64::from_bits(0x3ee7831030ae18ca),
f64::from_bits(0x3e76387d67354932),
);
let p_den = f_polyeval6(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfaa079c484e406a),
f64::from_bits(0x3f5452098f1556fb),
f64::from_bits(0xbef33efb4a8128ac),
f64::from_bits(0x3e865996e19448ca),
f64::from_bits(0xbe09acbb64533c3e),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
/**
Asymptotic expansion for I0.
Computes:
sqrt(x) * exp(-x) * I0(x) = Pn(1/x)/Qn(1/x)
hence:
I0(x)exp(-x) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1/7.5},9,9},WorkingPrecision->70]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=num;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=den;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d4364f),
f64::from_bits(0xc02ed6c9269921a7),
f64::from_bits(0x4070ee77ffed64a5),
f64::from_bits(0xc0a4ffd558b06889),
f64::from_bits(0x40cf2633e2840f6f),
f64::from_bits(0xc0ea813a9ba42b84),
f64::from_bits(0x40f569bf5d63eb8c),
f64::from_bits(0xc0b3138874cdd180),
f64::from_bits(0xc0fa3152ed485937),
f64::from_bits(0x40ddaccbed454f47),
);
let p_den = f_polyeval10(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc0436352c350b88c),
f64::from_bits(0x40855eaa17b05edd),
f64::from_bits(0xc0baa46f155bd266),
f64::from_bits(0x40e3e9fd90a2e695),
f64::from_bits(0xc1012dc621dfc1e8),
f64::from_bits(0x410cafeea713e8ce),
f64::from_bits(0xc0e0a3ee0077d7f7),
f64::from_bits(0xc110bcced6a39e9e),
f64::from_bits(0x40f9a1e4a91be4d6),
);
let z = p_num / p_den;
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0f() {
assert!(f_i0ef(f32::NAN).is_nan());
assert_eq!(f_i0ef(f32::NEG_INFINITY), 0.);
assert_eq!(f_i0ef(f32::INFINITY), 0.);
assert_eq!(f_i0ef(1.), 0.4657596);
assert_eq!(f_i0ef(5.), 0.1835408);
assert_eq!(f_i0ef(16.), 0.100544125);
assert_eq!(f_i0ef(32.), 0.070804186);
assert_eq!(f_i0ef(92.0), 0.04164947);
assert_eq!(f_i0ef(0.), 1.0);
assert_eq!(f_i0ef(28.), 0.075736605);
assert_eq!(f_i0ef(-28.), 0.075736605);
assert_eq!(f_i0ef(-32.), 0.070804186);
assert_eq!(f_i0ef(-92.0), 0.04164947);
assert_eq!(f_i0ef(-0.), 1.0);
}
}

354
vendor/pxfm/src/bessel/i0f.rs vendored Normal file
View File

@@ -0,0 +1,354 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{
f_estrin_polyeval5, f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval6,
};
/// Modified Bessel of the first kind of order 0
///
/// Max ULP 0.5
pub fn f_i0f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return f32::INFINITY;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb >= 0x42b7cd32u32 {
// |x| >= 91.90077
return f32::INFINITY;
}
if xb < 0x40f00000u32 {
// |x| < 7.5
if xb < 0x3f800000u32 {
// |x| < 1
if xb <= 0x34000000u32 {
// |x| < f32::EPSILON
// taylor series for I0(x) ~ 1 + x^2/4 + O(x^4)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
return f_fmlaf(x, x * 0.25, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let dx = x as f64;
return f_fmla(dx, dx * 0.25, 1.) as f32;
}
}
return i0f_small(f32::from_bits(xb)) as f32;
} else if xb <= 0x40600000u32 {
// |x| < 3.5
return i0f_1_to_3p5(f32::from_bits(xb));
} else if xb <= 0x40c00000u32 {
// |x| < 6
return i0f_3p5_to_6(f32::from_bits(xb));
}
return i0f_6_to_7p5(f32::from_bits(xb));
}
i0f_asympt(f32::from_bits(xb))
}
/**
How polynomial is obtained described at [i0f_1_to_7p5].
Computes I0(x) as follows:
I0(x) = 1 + (x/2)^2 * P(x)
This method valid only [0;1]
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i0f_small(x: f32) -> f64 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff000000000013a),
f64::from_bits(0x3fcffffffffc20b6),
f64::from_bits(0x3f9c71c71e6cd6a2),
f64::from_bits(0x3f5c71c65b0af15f),
f64::from_bits(0x3f1234796fceb081),
f64::from_bits(0x3ec0280faf31678c),
f64::from_bits(0x3e664fd494223545),
);
f_fmla(p, eval_x, 1.)
}
/**
Computes I0.
/// Valid only on interval [1;3.5]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{1,3.5},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_1_to_3p5(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffffb69),
f64::from_bits(0x3fc9ed7bd9dc97a7),
f64::from_bits(0x3f915c14693c842e),
f64::from_bits(0x3f45c6dc6a719e42),
f64::from_bits(0x3eeacb79eba725f7),
f64::from_bits(0x3e7b51e2acfc4355),
);
let p_den = f_estrin_polyeval5(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa84a10988f28eb),
f64::from_bits(0x3f50f5599197a4be),
f64::from_bits(0xbeea420cf9b13b1b),
f64::from_bits(0x3e735d0c1eb6ed7d),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
// Valid only on interval [6;7]
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{6,7},7,6},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_6_to_7p5(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_estrin_polyeval8(
eval_x,
f64::from_bits(0x3fefffffffffff7d),
f64::from_bits(0x3fcb373b00569ccf),
f64::from_bits(0x3f939069c3363b81),
f64::from_bits(0x3f4c2095c90c66b3),
f64::from_bits(0x3ef6713f648413db),
f64::from_bits(0x3e947efa2f9936b4),
f64::from_bits(0x3e2486a182f49420),
f64::from_bits(0x3da213034a33de33),
);
let p_den = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa32313fea59d9e),
f64::from_bits(0x3f460594c2ec6706),
f64::from_bits(0xbedf725fb714690f),
f64::from_bits(0x3e6d9cb39b19555c),
f64::from_bits(0xbdf1900e3abcb7a6),
f64::from_bits(0x3d64a21a2ea78ef6),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
// Valid only on interval [3.5;6]
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{3.5,6},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_3p5_to_6(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffd9550),
f64::from_bits(0x3fc97e18ee033fb4),
f64::from_bits(0x3f90b3199079bce1),
f64::from_bits(0x3f442c300a425372),
f64::from_bits(0x3ee7831030ae18ca),
f64::from_bits(0x3e76387d67354932),
);
let p_den = f_polyeval6(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfaa079c484e406a),
f64::from_bits(0x3f5452098f1556fb),
f64::from_bits(0xbef33efb4a8128ac),
f64::from_bits(0x3e865996e19448ca),
f64::from_bits(0xbe09acbb64533c3e),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
/**
Asymptotic expansion for I0.
Computes:
sqrt(x) * exp(-x) * I0(x) = Pn(1/x)/Qn(1/x)
hence:
I0(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{1/92.3,1/7.5},8,8},WorkingPrecision->70]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=num;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_estrin_polyeval9(
recip,
f64::from_bits(0x3fd9884533d44829),
f64::from_bits(0xc02c940f40595581),
f64::from_bits(0x406d41c495c2f762),
f64::from_bits(0xc0a10ab76dda4520),
f64::from_bits(0x40c825b1c2a48d07),
f64::from_bits(0xc0e481d606d0b748),
f64::from_bits(0x40f34759deefbd40),
f64::from_bits(0xc0ef4b7fb49fa116),
f64::from_bits(0x40c409a6f882ba00),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc041f8a9131ad229),
f64::from_bits(0x408278e56af035bb),
f64::from_bits(0xc0b5a34a108f3e35),
f64::from_bits(0x40dee6f278ee24f5),
f64::from_bits(0xc0fa95093b0c4f9f),
f64::from_bits(0x4109982b87f75651),
f64::from_bits(0xc10618cc3c91e2db),
f64::from_bits(0x40e30895aec6fc4f),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0f() {
assert!(f_i0f(f32::NAN).is_nan());
assert_eq!(f_i0f(f32::NEG_INFINITY), f32::INFINITY);
assert_eq!(f_i0f(f32::INFINITY), f32::INFINITY);
assert_eq!(f_i0f(1.), 1.2660658);
assert_eq!(f_i0f(5.), 27.239872);
assert_eq!(f_i0f(16.), 893446.25);
assert_eq!(f_i0f(32.), 5590908000000.0);
assert_eq!(f_i0f(92.0), f32::INFINITY);
assert_eq!(f_i0f(0.), 1.0);
assert_eq!(f_i0f(28.), 109534600000.0);
assert_eq!(f_i0f(-28.), 109534600000.0);
assert_eq!(f_i0f(-16.), 893446.25);
assert_eq!(f_i0f(-32.), 5590908000000.0);
}
}

656
vendor/pxfm/src/bessel/i1.rs vendored Normal file
View File

@@ -0,0 +1,656 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::polyeval::{f_estrin_polyeval5, f_polyeval6};
/// Modified Bessel of the first kind of order 1
///
/// Max found ULP 0.5
pub fn f_i1(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// Power series of I1(x) ~ x/2 + O(x^3)
// |x| <= 2.2204460492503131e-24
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I1(x) ~ x/2 + x^3/16 + O(x^4)
const A0: f64 = 1. / 2.;
const A1: f64 = 1. / 16.;
let r0 = f_fmla(x, x * A1, A0);
return r0 * x;
}
if x.is_infinite() {
return if x.is_sign_positive() {
f64::INFINITY
} else {
f64::NEG_INFINITY
};
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if xb >= 0x40864fe69ff9fec8u64 {
// |x| >= 713.9876098185423
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb < 0x401f000000000000u64 {
// |x| <= 7.75
return f64::copysign(i1_0_to_7p75(f64::from_bits(xb)).to_f64(), sign_scale);
}
i1_asympt(f64::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.5},9,9},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i1_0_to_7p75(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 5] = [
(0x3c55555555555555, 0x3fb5555555555555),
(0x3c1253e1df138479, 0x3f7304597c4fbd4c),
(0x3bcec398b7059ee9, 0x3f287b5b01f6b9c0),
(0xbb7354e7c92c4f77, 0x3ed21de117470d10),
(0xbb1d35ac0d7923cc, 0x3e717f3714dddc04),
];
let ps_num = f_estrin_polyeval5(
eval_x.hi,
f64::from_bits(0x3e063684ca1944a4),
f64::from_bits(0x3d92ac4a0e48a9bb),
f64::from_bits(0x3d1425988b0b0aea),
f64::from_bits(0x3c899839e74ddefc),
f64::from_bits(0x3bed8747bcdd1e61),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 4] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc3e59afb81ac7ea, 0xbf9c4848e0661d70),
(0x3bd62fa3dbc1a19c, 0x3f38a9eafcd7e674),
(0x3b6f4688b9eab7d0, 0xbecbfdec51454533),
];
let ps_den = f_polyeval6(
eval_x.hi,
f64::from_bits(0x3e56e7cde9266a32),
f64::from_bits(0xbddc316dff4a672f),
f64::from_bits(0x3d5a43aaee30ebb5),
f64::from_bits(0xbcd1fb023f4f1fa0),
f64::from_bits(0x3c4089ede324209f),
f64::from_bits(0xbb9f64f47ba69604),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(eval_x, p_den, f64::from_bits(0x3ff0000000000000));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
let r = DoubleDouble::quick_mult(z, x_over_05);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-65
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
i1_0_to_7p5_hard(x)
}
// Polynomial generated by Wolfram Mathematica:
// I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
// g[z_]:=f[2 Sqrt[z]]
// {err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.5},9,9},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[cold]
#[inline(never)]
pub(crate) fn i1_0_to_7p5_hard(x: f64) -> DoubleDouble {
const ONE_OVER_4: f64 = 1. / 4.;
let eval_x = DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_mult(x, x), ONE_OVER_4);
const P: [(u64, u64); 10] = [
(0x3c55555555555555, 0x3fb5555555555555),
(0x3c1253e1df138479, 0x3f7304597c4fbd4c),
(0x3bcec398b7059ee9, 0x3f287b5b01f6b9c0),
(0xbb7354e7c92c4f77, 0x3ed21de117470d10),
(0xbb1d35ac0d7923cc, 0x3e717f3714dddc04),
(0xba928dee24678e32, 0x3e063684ca1944a4),
(0xba36aa59912fcbed, 0x3d92ac4a0e48a9bb),
(0x39bad76f18b5ad37, 0x3d1425988b0b0aea),
(0xb923a6bab6928df4, 0x3c899839e74ddefc),
(0x3864356cdfa7b321, 0x3bed8747bcdd1e61),
];
let mut p_num = DoubleDouble::mul_add(
eval_x,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[7]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[6]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[5]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc3e59afb81ac7ea, 0xbf9c4848e0661d70),
(0x3bd62fa3dbc1a19c, 0x3f38a9eafcd7e674),
(0x3b6f4688b9eab7d0, 0xbecbfdec51454533),
(0x3af0fb4a17103ef8, 0x3e56e7cde9266a32),
(0xba71755779c6d4bd, 0xbddc316dff4a672f),
(0x39cf8ed8d449e2c6, 0x3d5a43aaee30ebb5),
(0x39704e900a373874, 0xbcd1fb023f4f1fa0),
(0xb8e33e87e4bffbb1, 0x3c4089ede324209f),
(0x380fb09b3fd49d5c, 0xbb9f64f47ba69604),
];
let mut p_den = DoubleDouble::mul_add(
eval_x,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[7]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[6]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[5]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[4]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
DoubleDouble::quick_mult(z, x_over_05)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1_asympt(x: f64, sign_scale: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc73a823f28a2f5e, 0x3fd9884533d43651),
(0x3cc0d5bb78e674b3, 0xc0354325c8029263),
(0x3d20e1155aaaa283, 0x4080c09b027c46a4),
(0xbd5b90dcf81b99c1, 0xc0bfc1311090c839),
(0xbd98f2fda9e8fa1b, 0x40f3bb81bb190ae2),
(0xbdcec960752b60da, 0xc1207c0bbbc31cd9),
(0x3dd3c9a299c9c41f, 0x414253e25c4584af),
(0xbde82e7b9a3e1acc, 0xc159a656aece377c),
(0x3e0d3d30d701a8ab, 0x416398df24c74ef2),
(0xbdf57b85ab7006e2, 0xc151fd119be1702b),
(0x3dd760928f4515fd, 0xc1508327e42639bc),
(0x3dc09e71bc648589, 0x4143e4933afa621c),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb334d5a476d9ad, 0xc04a75f94c1a0c1a),
(0xbd324d58ed98bfae, 0x4094b00e60301c42),
(0x3d7c8725666c4360, 0xc0d36b9d28d45928),
(0x3d7f8457c2945822, 0x4107d6c398a174ed),
(0x3dbc655ea216594b, 0xc1339393e6776e38),
(0xbdebb5dffef78272, 0x415537198d23f6a1),
(0xbdb577f8abad883e, 0xc16c6c399dcd6949),
(0x3e14261c5362f109, 0x4173c02446576949),
(0x3dc382ededad42c5, 0xc1547dff5770f4ec),
(0xbe05c0f74d4c7956, 0xc165c88046952562),
(0xbdbf9145927aa2c7, 0x414395e46bc45d5b),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let e = i0_exp(dx * 0.5);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = DoubleDouble::quick_mult(z * r_sqrt * e, e);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return f64::copysign(r.to_f64(), sign_scale);
}
i1_asympt_hard(x, sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i1_asympt_hard(x: f64, sign_scale: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_bea7da47_28f13acc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xda979406_3df6e66f_cf31c3f5_f194b48c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -120,
mantissa: 0xd60b7b96_c958929b_cabe1d8c_3d874767_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd27aad9a_8fb38d56_46ab4510_8479306e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xe0167305_c451bd1f_d2f17b68_5c62e2ff_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0x8f6d238f_c80d8e4a_08c130f6_24e1c925_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xfe32280f_2ea99024_d9924472_92d7ac8f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xa48815ac_d265609f_da4ace94_811390b2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0x9ededfe5_833b4cc1_731efd5c_f8729c6c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xe5b43203_2784ae6a_f7458556_0a8308ea_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf5df279a_3fb4ef60_8d10adee_7dd2f47b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xbdb59963_7a757ed1_87280e0e_7f93ca2b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xc87fdea5_53177ca8_c91de5fb_3f8f78d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0x846d16a7_4663ef5d_ad42d599_5bc726b8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb3ed2055_74262d95_389f33e4_2ac3774a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xa511aa32_c18c34e4_3d029a90_a71b7a55_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0x877b771a_ad8f5fd3_5aacf5f9_f04ee9de_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x89475ecd_9c84361e_800c8a3a_c8af23bf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x837d1196_cf2723f1_23b54da8_225efe05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0x8ae3aecb_15355751_a9ee12e5_a4dd9dde_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xb0886afa_bc13f996_ab45d252_75c8f587_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0x9b37d7cd_b114b86b_7d14a389_26599aa1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0xc716bf54_09d5dd9f_bc16679b_93aaeca4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xbe0cd82e_c8af8371_ab028ed9_c7902dd2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0x875f8d91_8ef5d434_a39d00f9_2aed3d2a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8e030781_5aa4ce7f_70156b82_8b216b7c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xd4dd2687_92646fbd_5ea2d422_da64fc0b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xd6d72ab3_64b4a827_0499af0f_13a51a80_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -84,
mantissa: 0x828f4e8b_728747a9_2cebe54a_810e2681_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0x91570096_36a3fcfb_6b936d44_68dda1be_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xf082ad00_86024ed4_dd31613b_ec41e3f8_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
let f_exp = rational128_exp(x);
(z * r_sqrt * f_exp).fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fi1() {
assert_eq!(
f_i1(0.0000000000000000000000000000000006423424234121),
3.2117121170605e-34
);
assert_eq!(f_i1(7.750000000757874), 315.8524811496668);
assert_eq!(f_i1(7.482812501363189), 245.58002285881892);
assert_eq!(f_i1(-7.750000000757874), -315.8524811496668);
assert_eq!(f_i1(-7.482812501363189), -245.58002285881892);
assert!(f_i1(f64::NAN).is_nan());
assert_eq!(f_i1(f64::INFINITY), f64::INFINITY);
assert_eq!(f_i1(f64::NEG_INFINITY), f64::NEG_INFINITY);
assert_eq!(f_i1(0.01), 0.005000062500260418);
assert_eq!(f_i1(-0.01), -0.005000062500260418);
assert_eq!(f_i1(-9.01), -1040.752038018038);
assert_eq!(f_i1(9.01), 1040.752038018038);
}
}

460
vendor/pxfm/src/bessel/i1e.rs vendored Normal file
View File

@@ -0,0 +1,460 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::i1::i1_0_to_7p75;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 1
///
/// Computes exp(-|x|)*I1(x)
pub fn f_i1e(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// |x| <= 2.2204460492503131e-24
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I1(x)*exp(-|x|) ~ x/2 - x^2/2 + O(x^3)
return f_fmla(x, -x * 0.5, x * 0.5);
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb < 0x401f000000000000u64 {
// |x| <= 7.75
let v_exp = i0_exp(-f64::from_bits(xb));
let vi1 = i1_0_to_7p75(f64::from_bits(xb));
let r = DoubleDouble::quick_mult(vi1, v_exp);
return f64::copysign(r.to_f64(), sign_scale);
}
i1e_asympt(f64::from_bits(xb), sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)exp(-|x|) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1e_asympt(x: f64, sign_scale: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc73a823f28a2f5e, 0x3fd9884533d43651),
(0x3cc0d5bb78e674b3, 0xc0354325c8029263),
(0x3d20e1155aaaa283, 0x4080c09b027c46a4),
(0xbd5b90dcf81b99c1, 0xc0bfc1311090c839),
(0xbd98f2fda9e8fa1b, 0x40f3bb81bb190ae2),
(0xbdcec960752b60da, 0xc1207c0bbbc31cd9),
(0x3dd3c9a299c9c41f, 0x414253e25c4584af),
(0xbde82e7b9a3e1acc, 0xc159a656aece377c),
(0x3e0d3d30d701a8ab, 0x416398df24c74ef2),
(0xbdf57b85ab7006e2, 0xc151fd119be1702b),
(0x3dd760928f4515fd, 0xc1508327e42639bc),
(0x3dc09e71bc648589, 0x4143e4933afa621c),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb334d5a476d9ad, 0xc04a75f94c1a0c1a),
(0xbd324d58ed98bfae, 0x4094b00e60301c42),
(0x3d7c8725666c4360, 0xc0d36b9d28d45928),
(0x3d7f8457c2945822, 0x4107d6c398a174ed),
(0x3dbc655ea216594b, 0xc1339393e6776e38),
(0xbdebb5dffef78272, 0x415537198d23f6a1),
(0xbdb577f8abad883e, 0xc16c6c399dcd6949),
(0x3e14261c5362f109, 0x4173c02446576949),
(0x3dc382ededad42c5, 0xc1547dff5770f4ec),
(0xbe05c0f74d4c7956, 0xc165c88046952562),
(0xbdbf9145927aa2c7, 0x414395e46bc45d5b),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return f64::copysign(r.to_f64(), sign_scale);
}
i1e_asympt_hard(x, sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)exp(-|x|) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i1e_asympt_hard(x: f64, sign_scale: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_bea7da47_28f13acc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xda979406_3df6e66f_cf31c3f5_f194b48c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -120,
mantissa: 0xd60b7b96_c958929b_cabe1d8c_3d874767_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd27aad9a_8fb38d56_46ab4510_8479306e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xe0167305_c451bd1f_d2f17b68_5c62e2ff_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0x8f6d238f_c80d8e4a_08c130f6_24e1c925_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xfe32280f_2ea99024_d9924472_92d7ac8f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xa48815ac_d265609f_da4ace94_811390b2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0x9ededfe5_833b4cc1_731efd5c_f8729c6c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xe5b43203_2784ae6a_f7458556_0a8308ea_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf5df279a_3fb4ef60_8d10adee_7dd2f47b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xbdb59963_7a757ed1_87280e0e_7f93ca2b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xc87fdea5_53177ca8_c91de5fb_3f8f78d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0x846d16a7_4663ef5d_ad42d599_5bc726b8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb3ed2055_74262d95_389f33e4_2ac3774a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xa511aa32_c18c34e4_3d029a90_a71b7a55_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0x877b771a_ad8f5fd3_5aacf5f9_f04ee9de_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x89475ecd_9c84361e_800c8a3a_c8af23bf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x837d1196_cf2723f1_23b54da8_225efe05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0x8ae3aecb_15355751_a9ee12e5_a4dd9dde_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xb0886afa_bc13f996_ab45d252_75c8f587_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0x9b37d7cd_b114b86b_7d14a389_26599aa1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0xc716bf54_09d5dd9f_bc16679b_93aaeca4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xbe0cd82e_c8af8371_ab028ed9_c7902dd2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0x875f8d91_8ef5d434_a39d00f9_2aed3d2a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8e030781_5aa4ce7f_70156b82_8b216b7c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xd4dd2687_92646fbd_5ea2d422_da64fc0b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xd6d72ab3_64b4a827_0499af0f_13a51a80_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -84,
mantissa: 0x828f4e8b_728747a9_2cebe54a_810e2681_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0x91570096_36a3fcfb_6b936d44_68dda1be_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xf082ad00_86024ed4_dd31613b_ec41e3f8_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fi1e() {
assert_eq!(f_i1e(f64::EPSILON), 1.1102230246251563e-16);
assert_eq!(f_i1e(7.750000000757874), 0.13605110007443239);
assert_eq!(f_i1e(7.482812501363189), 0.13818116726273896);
assert_eq!(f_i1e(-7.750000000757874), -0.13605110007443239);
assert_eq!(f_i1e(-7.482812501363189), -0.13818116726273896);
assert!(f_i1e(f64::NAN).is_nan());
assert_eq!(f_i1e(f64::INFINITY), 0.);
assert_eq!(f_i1e(f64::NEG_INFINITY), 0.);
assert_eq!(f_i1e(0.01), 0.004950311047118276);
assert_eq!(f_i1e(-0.01), -0.004950311047118276);
assert_eq!(f_i1e(-9.01), -0.12716101566063667);
assert_eq!(f_i1e(9.01), 0.12716101566063667);
assert_eq!(f_i1e(763.), 0.014435579051182581);
assert_eq!(i1e_asympt_hard(9.01, 1.), 0.12716101566063667);
}
}

224
vendor/pxfm/src/bessel/i1ef.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval9, f_polyeval10};
/// Modified exponentially scaled Bessel of the first kind of order 1
///
/// Computes exp(-|x|)*I1(x)
///
/// Max ULP 0.5
pub fn f_i1ef(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, x == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { -0. };
}
return x + f32::NAN; // |x| == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb <= 0x40f80000u32 {
// |x| <= 7.75
let core_expf = core_expf(-f32::from_bits(xb));
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I1(x)/Exp(x) ~ x/2 - x^2/2 + O(x^3)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
let half_x = 0.5 * x;
return f_fmlaf(x, -half_x, half_x);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let dx = x as f64;
let half_x = 0.5 * dx;
return f_fmla(dx, -half_x, half_x) as f32;
}
}
return i1ef_small(f32::from_bits(xb), sign_scale, core_expf) as f32;
}
i1ef_asympt(f32::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.75},6,6},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1ef_small(x: f32, sign_scale: f64, core_expf: f64) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555555),
f64::from_bits(0x3f706cdccca396c4),
f64::from_bits(0x3f23f9e12bdbba92),
f64::from_bits(0x3ec8e39208e926b2),
f64::from_bits(0x3e62e53b433c42ff),
f64::from_bits(0x3def7cb16d10fb46),
f64::from_bits(0x3d6747cd73d9d783),
);
let p_den = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa2075f77b54885),
f64::from_bits(0x3f438c6d797c29f5),
f64::from_bits(0xbeda57e2a258c6da),
f64::from_bits(0x3e677e777c569432),
f64::from_bits(0xbdea9212a96babc1),
f64::from_bits(0x3d5e183186d5d782),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two * sign_scale * core_expf
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)*exp(-x) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1/7.75},9,8},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1ef_asympt(x: f32, sign_scale: f64) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d43652),
f64::from_bits(0xc030686a3694d13c),
f64::from_bits(0x407344697f45c2ee),
f64::from_bits(0xc0aa037ee36a8967),
f64::from_bits(0x40d5b2eab8cf5b17),
f64::from_bits(0xc0f65addf81dbee8),
f64::from_bits(0x410afc22ec1f9b8b),
f64::from_bits(0xc110821dd0fc12b4),
f64::from_bits(0x40feb3452c93aada),
f64::from_bits(0xc0c6d04e8c5d02f3),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc04460707a9ceed4),
f64::from_bits(0x4087ac89fcf51e9b),
f64::from_bits(0xc0bf830689f31b42),
f64::from_bits(0x40e9c281c367fab2),
f64::from_bits(0xc109b59ade76eb8c),
f64::from_bits(0x411d553a9f5673c5),
f64::from_bits(0xc11f9dbe0665523b),
f64::from_bits(0x4103b62a329b60d7),
);
let z = p_num / p_den;
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i1ef() {
assert!(f_i1ef(f32::NAN).is_nan());
assert_eq!(f_i1ef(f32::INFINITY), 0.0);
assert_eq!(f_i1ef(f32::NEG_INFINITY), 0.0);
assert_eq!(f_i1ef(0.), 0.);
assert_eq!(f_i1ef(1.), 0.20791042);
assert_eq!(f_i1ef(-1.), -0.20791042);
assert_eq!(f_i1ef(9.), 0.12722498);
assert_eq!(f_i1ef(-9.), -0.12722498);
assert_eq!(f_i1ef(0.000000000543453), 2.717265e-10);
assert_eq!(f_i1ef(-0.000000000543453), -2.717265e-10);
}
}

210
vendor/pxfm/src/bessel/i1f.rs vendored Normal file
View File

@@ -0,0 +1,210 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval9, f_polyeval10};
/// Modified Bessel of the first kind of order 1
///
/// Max ULP 0.5
pub fn f_i1f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
f32::INFINITY
} else {
f32::NEG_INFINITY
};
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb > 0x42b7d001 {
// x > 91.906261
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb <= 0x40f80000u32 {
// |x| <= 7.75
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I1(x) ~ x/2 + O(x^3)
return x * 0.5;
}
return i1f_small(f32::from_bits(xb), sign_scale) as f32;
}
i1f_asympt(f32::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.75},6,6},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32, sign_scale: f64) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555555),
f64::from_bits(0x3f706cdccca396c4),
f64::from_bits(0x3f23f9e12bdbba92),
f64::from_bits(0x3ec8e39208e926b2),
f64::from_bits(0x3e62e53b433c42ff),
f64::from_bits(0x3def7cb16d10fb46),
f64::from_bits(0x3d6747cd73d9d783),
);
let p_den = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa2075f77b54885),
f64::from_bits(0x3f438c6d797c29f5),
f64::from_bits(0xbeda57e2a258c6da),
f64::from_bits(0x3e677e777c569432),
f64::from_bits(0xbdea9212a96babc1),
f64::from_bits(0x3d5e183186d5d782),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two * sign_scale
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err, approx,err1}=MiniMaxApproximation[g[z],{z,{1/91.9,1/7.75},9,8},WorkingPrecision->60]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_asympt(x: f32, sign_scale: f64) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d43711),
f64::from_bits(0xc0309c047537243a),
f64::from_bits(0x4073bdb14a29bf68),
f64::from_bits(0xc0aaf9eca14d15af),
f64::from_bits(0x40d6c629318a9e42),
f64::from_bits(0xc0f7bee33088a4b0),
f64::from_bits(0x410d018cef093ee2),
f64::from_bits(0xc111f32b325d3fe4),
f64::from_bits(0x4100dddad80e0b42),
f64::from_bits(0xc0c96006c91a00e2),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc044a11d10bae889),
f64::from_bits(0x408843069497d993),
f64::from_bits(0xc0c058710de4b9b9),
f64::from_bits(0x40eb0d97f71420ae),
f64::from_bits(0xc10b55d181ef9ea1),
f64::from_bits(0x411f9413e1932a48),
f64::from_bits(0xc1213bff5bc7d2d6),
f64::from_bits(0x4105c53e92d9b9c0),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i1f() {
assert!(f_i1f(f32::NAN).is_nan());
assert!(f_i1f(f32::INFINITY).is_infinite());
assert!(f_i1f(f32::NEG_INFINITY).is_infinite());
assert_eq!(f_i1f(0.), 0.);
assert_eq!(f_i1f(1.), 0.5651591);
assert_eq!(f_i1f(-1.), -0.5651591);
assert_eq!(f_i1f(9.), 1030.9147);
assert_eq!(f_i1f(-9.), -1030.9147);
}
}

587
vendor/pxfm/src/bessel/i2.rs vendored Normal file
View File

@@ -0,0 +1,587 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
/// Modified bessel of the first kind of order 2
pub fn f_i2(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux == 0 {
// |x| == 0, |x| == inf, x == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return f64::INFINITY;
}
return x + f64::NAN; // x = NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
if xb < 0x401f000000000000u64 {
// |x| < 7.75
if xb <= 0x3cb0000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I2(x) ~ x^2/8 + O(x^4)
const R: f64 = 1. / 8.;
let x2 = x * x * R;
return x2;
}
return i2_small(f64::from_bits(xb));
}
if xb >= 0x40864feaeefb23b8 {
// x >= 713.9897136326099
return f64::INFINITY;
}
i2_asympt(f64::from_bits(xb))
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000000001,7.75},11,11},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2_small(x: f64) -> f64 {
const P: [(u64, u64); 12] = [
(0x0000000000000000, 0x3fc0000000000000),
(0x3c247833fda9de9a, 0x3f8387c6e72a1b5f),
(0xbbccaf0be91261a6, 0x3f30ba88efff56fa),
(0x3b57c911bfebe1d7, 0x3ecc62e53d061300),
(0x3af3b963f26a3d05, 0x3e5bb090327a14e1),
(0x3a898bff9d40e030, 0x3de0d29c3d37e5b5),
(0xb9f2f63c80d377db, 0x3d5a9e365f1bf6e0),
(0xb965e6d78e1c2b65, 0x3ccbf7ef0929b813),
(0xb8da83d7d40e7310, 0x3c33737520046f4d),
(0xb83f811d5aa3f36e, 0x3b91506558dab318),
(0xb78e601bf5c998c3, 0x3ae2013b3e858bd1),
(0xb6c8185c51734ed8, 0x3a20cc277a5051ba),
];
let x_sqr = DoubleDouble::from_exact_mult(x, x);
let x2 = x_sqr * x_sqr;
let x4 = x2 * x2;
let x8 = x4 * x4;
let e0 = DoubleDouble::mul_add_f64(
x_sqr,
DoubleDouble::from_bit_pair(P[1]),
f64::from_bits(0x3fc0000000000000),
);
let e1 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc0ba42af56ed76b, 0xbf7cd8e6e2b39f60),
(0x3b90697aa005e598, 0x3efa0260394e1a3d),
(0xbb0c7ccde1f63c82, 0xbe6f1766ec64e492),
(0x3a63f18409bc336f, 0x3ddb80b6b5abad98),
(0xb9e0cd49f22132fe, 0xbd42ff9b55d553da),
(0xb934bfe64905d309, 0x3ca50814fa258ebc),
(0x38a1e35c2d6860f4, 0xbc02c4f2faca2195),
(0xb7ff39e268277e4e, 0x3b5aa545a2c1f16d),
(0xb71053f58545760c, 0xbaacde4c133d42d1),
(0xb68d0c2ccab0ae5b, 0x39f5a965b92b06bc),
(0xb5dc35bda16bee7b, 0xb931375b1c9cfbc7),
];
let e0 = DoubleDouble::mul_add_f64(
x_sqr,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let p = DoubleDouble::div(p_num, p_den);
DoubleDouble::quick_mult(p, x_sqr).to_f64()
}
/**
Asymptotic expansion for I2.
I2(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/714.0,1/7.5},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2_asympt(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0x3c718bb28ebc5f4e, 0x3fd9884533d43650),
(0x3c96e15a87b6e1d1, 0xc0350acc9e5cb0f9),
(0xbd20b212a79e08f5, 0x40809251af67598a),
(0xbd563b7397df3a54, 0xc0bfc09ede682c8b),
(0xbd5eb872cb057d91, 0x40f44253a9e1e1ab),
(0x3d7614735e566fc5, 0xc121cbcd96dc8765),
(0xbddc4f8df2010026, 0x4145a592e8ec74ad),
(0x3dea227617b678a7, 0xc161df96fb6a9df9),
(0x3e17c9690d906194, 0x41732c71397757f8),
(0x3e0638226ce0b938, 0xc178893fde0e6ed7),
(0xbe09d8dc4e7930ce, 0x417066abe24b31df),
(0xbde152007ee29e54, 0xc150531da3f31b16),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcd0d33e9e73b503, 0xc0496f5a09751d50),
(0x3d2f9c44a069dc4b, 0x40934427187ac370),
(0xbd69e2e5a3618381, 0xc0d19983f74fdf52),
(0x3d88c69a62ae8b44, 0x410524fcaa71e85a),
(0xbdc0345b806dd0bf, 0xc13120daf531b66b),
(0xbdd35875712fff6f, 0x4152943a4f9f1c7f),
(0xbdf8dd50e92553fd, 0xc169b83aeede08ea),
(0x3e0800ecaa77f79e, 0x41746c61554a08ce),
(0x3dd74fbc32c5f696, 0xc16ba2febd1932a3),
(0x3dc23eb2c943b539, 0x413574ae68b6b378),
(0xbd95d86c5c94cd65, 0xc104adac99eaa90c),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let mut e = i0_exp(dx * 0.5);
e = DoubleDouble::from_exact_add(e.hi, e.lo);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = DoubleDouble::quick_mult(z * r_sqrt * e, e);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return r.to_f64();
}
i2_asympt_hard(x)
}
/**
Asymptotic expansion for I2.
I2(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/714.0,1/7.5},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i2_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_3bb16645_ba1dc793_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xe202abf7_de10e93f_2a2e6a0f_af69c788_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xf70296c3_ad33bde6_866cfd01_0e846cfc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -113,
mantissa: 0xa83df971_736c4e6c_1a35479b_ad6d9172_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9baa2015_9c5ca461_0aff0b62_54a70fdb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0xc70af95d_f95d14ad_1094ea1b_e46b2d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0xa838fb48_e79fb706_642da604_6a73b4f8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0x8fe29f37_02b1e876_39e88664_1c8b3b5d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xc8e9a474_0a03f93a_16d2e7a9_627eba4e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0x8807d1f6_6d646a08_8c7e8900_12d6a5ed_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0xe5c25026_97518024_36878256_fd81c08f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xeaa075f0_f5151bed_95ec612f_ab9834a7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0x9b267222_82d5c666_348d7d1d_0fedfba4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0x81b45c4c_3e828396_1d5bdede_869c3b84_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf4495d43_4bc8dba6_42bdb5d6_c8ba2c9c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -90,
mantissa: 0xc9b29546_0c226270_bb428035_587b6d6a_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0x89e18bae_ca9629a1_26927ba2_fbdd66ab_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x92a90fc2_e905f634_4946e8a0_dd8e3874_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -112,
mantissa: 0xc1742696_d29e3846_3e183737_29db8b68_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0xabf61cc0_236a0e90_2572113d_fa339591_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xcff0fe90_dac1b08e_9a5740ae_b2984fc1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0x9ff36729_e407c538_cfcea3a7_63f39043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0xc86ff6a3_9b803a31_d385e9ea_83f9d751_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xb4a125b1_6cab70f3_0f314558_708843df_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -94,
mantissa: 0x9670fd33_f83bcaa7_85cf2d82_c0bf8cd5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xd70b4ea5_32fedb9d_78a3c047_05e650f4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -90,
mantissa: 0xb9c7904c_3f97b633_c2c0ad9b_ad573ede_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xc2023c21_5155e9fe_6fb17bb2_c865becd_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xd9400a5e_27c58803_22948cf3_6154ac49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -90,
mantissa: 0x87aa272d_6a9700b4_449a9db8_1a93b0ee_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0xd1a86655_5b259611_dfc7affc_6ffb0e20_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
let f_exp = rational128_exp(x);
(z * r_sqrt * f_exp).fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i2() {
assert_eq!(f_i2(7.750000000757874), 257.0034362785801);
assert_eq!(f_i2(7.482812501363189), 198.26765887136534);
assert_eq!(f_i2(-7.750000000757874), 257.0034362785801);
assert_eq!(f_i2(-7.482812501363189), 198.26765887136534);
assert!(f_i2(f64::NAN).is_nan());
assert_eq!(f_i2(f64::INFINITY), f64::INFINITY);
assert_eq!(f_i2(f64::NEG_INFINITY), f64::INFINITY);
assert_eq!(f_i2(0.01), 1.2500104166992188e-5);
assert_eq!(f_i2(-0.01), 1.2500104166992188e-5);
assert_eq!(f_i2(-9.01), 872.9250699638584);
assert_eq!(f_i2(9.01), 872.9250699638584);
}
}

194
vendor/pxfm/src/bessel/i2f.rs vendored Normal file
View File

@@ -0,0 +1,194 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval8, f_estrin_polyeval9};
/// Modified Bessel of the first kind of order 2
///
/// ULP 0.5
pub fn f_i2f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return f32::INFINITY;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb >= 0x42b7d875u32 {
// |x| >= 91.92277 it's infinity
return f32::INFINITY;
}
if xb <= 0x40f80000u32 {
// |x| <= 7.75
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
const R: f64 = 1. / 8.;
return (dx * dx * R) as f32;
}
return i2f_small(f32::from_bits(xb));
}
i2f_asympt(f32::from_bits(xb))
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000000001,7.75},8,7},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_small(x: f32) -> f32 {
let dx = x as f64;
let x_sqr = dx * dx;
let p_num = f_estrin_polyeval9(
x_sqr,
f64::from_bits(0x3fc0000000000000),
f64::from_bits(0x3f831469a38d72c7),
f64::from_bits(0x3f2f453dd3dd98f4),
f64::from_bits(0x3ec8af52ee8fce9b),
f64::from_bits(0x3e5589f2cb4e0ec9),
f64::from_bits(0x3dd60fa268a4206d),
f64::from_bits(0x3d4ab3091ee18d6b),
f64::from_bits(0x3cb1efec43b15186),
f64::from_bits(0x3c050992c6e9e63f),
);
let p_den = f_estrin_polyeval8(
x_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf82075d8e3f1476),
f64::from_bits(0x3f03ef86564a284b),
f64::from_bits(0xbe7c498fab4a57d8),
f64::from_bits(0x3dec162ca0f68486),
f64::from_bits(0xbd53bb6398461540),
f64::from_bits(0x3cb265215261e64a),
f64::from_bits(0xbc01cf52cc350e81),
);
let p = p_num / p_den;
(p * x_sqr) as f32
}
/**
Asymptotic expansion for I2.
Computes:
sqrt(x) * exp(-x) * I2(x) = Pn(1/x)/Qn(1/x)
hence:
I2(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/92.3,1/7.5},8,8},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_estrin_polyeval9(
recip,
f64::from_bits(0x3fd9884533d45f46),
f64::from_bits(0xc02b979526807e1e),
f64::from_bits(0x406b1dd3e795bbed),
f64::from_bits(0xc09e43629031ec91),
f64::from_bits(0x40c48c03a39aec1d),
f64::from_bits(0xc0e0f022ccb8807a),
f64::from_bits(0x40f0302eeb22a776),
f64::from_bits(0xc0f02b01549d38b8),
f64::from_bits(0x40dad4e70f2bc264),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc0405a71a88b191c),
f64::from_bits(0x407e19f7d247d098),
f64::from_bits(0xc0aeaac6e0ca17fe),
f64::from_bits(0x40d2301702f40a98),
f64::from_bits(0xc0e7e6c6c01841b3),
f64::from_bits(0x40ed67317e9e46cc),
f64::from_bits(0xc0d13786aa1ef416),
f64::from_bits(0xc0a6c9cfe579ae22),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i2f() {
assert_eq!(f_i2f(0.), 0.);
assert_eq!(f_i2f(f32::INFINITY), f32::INFINITY);
assert_eq!(f_i2f(f32::NEG_INFINITY), f32::INFINITY);
assert_eq!(f_i2f(1.), 0.13574767);
assert_eq!(f_i2f(-1.), 0.13574767);
assert_eq!(f_i2f(9.432), 1314.6553);
assert_eq!(f_i2f(-9.432), 1314.6553);
}
}

626
vendor/pxfm/src/bessel/j0.rs vendored Normal file
View File

@@ -0,0 +1,626 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::alpha0::{
bessel_0_asympt_alpha, bessel_0_asympt_alpha_fast, bessel_0_asympt_alpha_hard,
};
use crate::bessel::beta0::{
bessel_0_asympt_beta, bessel_0_asympt_beta_fast, bessel_0_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j0_coeffs_remez::J0_COEFFS_REMEZ;
use crate::bessel::j0_coeffs_taylor::J0_COEFFS_TAYLOR;
use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval19};
use crate::sin_helper::{cos_dd_small, cos_dd_small_fast, cos_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
/// Bessel of the first kind of order 0
pub fn f_j0(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, |x| == NaN
if ux <= 0x77723ef88126da90u64 {
// |x| <= 0.00000000000000000000532
return 1.;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// J0(x) ~ 1-x^2/4+O[x]^4
let half_x = 0.5 * x; // exact.
return f_fmla(half_x, -half_x, 1.);
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ax = f64::from_bits(x_abs);
if x_abs <= 0x4052b33333333333u64 {
// |x| <= 74.8
if x_abs <= 0x3ff199999999999au64 {
// |x| <= 1.1
return j0_maclaurin_series_fast(ax);
}
return j0_small_argument_fast(ax);
}
j0_asympt_fast(ax)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
real_i = 0
for i in range(0, 24, 2):
print_double_double("", DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[inline]
pub(crate) fn j0_maclaurin_series_fast(x: f64) -> f64 {
const C: [u64; 12] = [
0x3ff0000000000000,
0xbfd0000000000000,
0x3f90000000000000,
0xbf3c71c71c71c71c,
0x3edc71c71c71c71c,
0xbe723456789abcdf,
0x3e002e85c0898b71,
0xbd8522a43f65486a,
0x3d0522a43f65486a,
0xbc80b313289be0b9,
0x3bf5601885e63e5d,
0xbb669ca9cf3b7f54,
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval10(
dx2.hi,
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(dx2, p, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(dx2, z, f64::from_bits(C[0]));
// squaring error (2^-56) + poly error 2^-75
let err = f_fmla(
dx2.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3b40000000000000), // 2^-75
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_maclaurin_series(x)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
real_i = 0
for i in range(0, 24, 2):
print_double_double("", DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[cold]
pub(crate) fn j0_maclaurin_series(x: f64) -> f64 {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0xbfd0000000000000),
(0x0000000000000000, 0x3f90000000000000),
(0xbbdc71c71c71c71c, 0xbf3c71c71c71c71c),
(0x3b7c71c71c71c71c, 0x3edc71c71c71c71c),
(0xbab23456789abcdf, 0xbe723456789abcdf),
(0xba8b6edec0692e65, 0x3e002e85c0898b71),
(0x3a2604db055bd075, 0xbd8522a43f65486a),
(0xb9a604db055bd075, 0x3d0522a43f65486a),
(0x3928824198c6f6e1, 0xbc80b313289be0b9),
(0xb869b0b430eb27b8, 0x3bf5601885e63e5d),
(0x380ee6b4638f3a25, 0xbb669ca9cf3b7f54),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
dx2,
DoubleDouble::from_bit_pair(C[0]),
DoubleDouble::from_bit_pair(C[1]),
DoubleDouble::from_bit_pair(C[2]),
DoubleDouble::from_bit_pair(C[3]),
DoubleDouble::from_bit_pair(C[4]),
DoubleDouble::from_bit_pair(C[5]),
DoubleDouble::from_bit_pair(C[6]),
DoubleDouble::from_bit_pair(C[7]),
DoubleDouble::from_bit_pair(C[8]),
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[10]),
DoubleDouble::from_bit_pair(C[11]),
);
let r = DoubleDouble::from_exact_add(p.hi, p.lo);
const ERR: f64 = f64::from_bits(0x39d0000000000000); // 2^-98
let ub = r.hi + (r.lo + ERR);
let lb = r.hi + (r.lo - ERR);
if ub == lb {
return r.to_f64();
}
j0_maclaurin_series_hard(x)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const P: [DyadicFloat128; 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
# print(poly)
real_i = 0
for i in range(0, 24, 2):
print_dyadic(DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[cold]
#[inline(never)]
pub(crate) fn j0_maclaurin_series_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -133,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -139,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -145,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -151,
mantissa: 0x91a2b3c4_d5e6f809_1a2b3c4d_5e6f8092_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -158,
mantissa: 0x81742e04_4c5b8724_8909fcb6_8cd4e410_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -166,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -174,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -182,
mantissa: 0x85989944_df05c4ef_b7cce721_23e1b391_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -191,
mantissa: 0xab00c42f_31f2e799_3d2f3c53_6120e5d8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -200,
mantissa: 0xb4e54e79_dbfa9c23_29738e18_bb602809_u128,
},
];
let dx = DyadicFloat128::new_from_f64(x);
let x2 = dx * dx;
let mut p = P[11];
for i in (0..11).rev() {
p = x2 * p + P[i];
}
p.fast_as_f64()
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn j0_small_argument_fast(x: f64) -> f64 {
// let avg_step = 74.6145 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299043751549631;
let fx = x * INV_STEP;
const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x).abs();
let dist1 = (found_zero1.hi - x).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j0_maclaurin_series_fast(x);
}
let is_too_close_too_zero = dist.abs() < 1e-3;
let c = if is_too_close_too_zero {
&J0_COEFFS_TAYLOR[idx - 1]
} else {
&J0_COEFFS_REMEZ[idx - 1]
};
let r = DoubleDouble::full_add_f64(-found_zero, x.abs());
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J0_ZEROS_VALUE[idx]);
}
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_small_argument_dd(r, c)
}
#[cold]
fn j0_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3a90000000000000), // 2^-86
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j0_small_argument_hard(r, c);
}
p.to_f64()
}
#[cold]
#[inline(never)]
fn j0_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64()
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[inline]
pub(crate) fn j0_asympt_fast(x: f64) -> f64 {
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_0_asympt_alpha_fast(recip);
let beta = bessel_0_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), // 2^-65
f64::from_bits(0x3a60000000000000), // 2^-89
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt(x, recip, r_sqrt, angle)
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
pub(crate) fn j0_asympt(
x: f64,
recip: DoubleDouble,
r_sqrt: DoubleDouble,
angle: DoubleDouble,
) -> f64 {
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_0_asympt_alpha(recip);
let beta = bessel_0_asympt_beta(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), // 2^-66
f64::from_bits(0x39e0000000000000), // 2^-97
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt_hard(x)
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[cold]
#[inline(never)]
pub(crate) fn j0_asympt_hard(x: f64) -> f64 {
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_0_asympt_alpha_hard(recip);
let beta = bessel_0_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = cos_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j0() {
assert_eq!(f_j0(f64::EPSILON), 1.0);
assert_eq!(f_j0(-0.000000000000000000000532), 1.0);
assert_eq!(f_j0(0.0000000000000000000532), 1.0);
assert_eq!(f_j0(-2.000976555054876), 0.22332760641907712);
assert_eq!(f_j0(-2.3369499004222215E+304), -3.3630754230844632e-155);
assert_eq!(
f_j0(f64::from_bits(0xd71a31ffe2ff7e9f)),
f64::from_bits(0xb2e58532f95056ff)
);
assert_eq!(f_j0(6.1795701510782757E+307), 6.075192922402001e-155);
assert_eq!(f_j0(6.1795701510782757E+301), 4.118334155030934e-152);
assert_eq!(f_j0(6.1795701510782757E+157), 9.5371668900364e-80);
assert_eq!(f_j0(79.), -0.08501719554953485);
// Without FMA 2.703816901253004e-16
#[cfg(any(
all(target_arch = "x86_64", target_feature = "fma"),
target_arch = "aarch64"
))]
assert_eq!(f_j0(93.463718781944774171190), 2.7038169012530046e-16);
assert_eq!(f_j0(99.746819858680596470279979), -8.419106281522749e-17);
assert_eq!(f_j0(f64::INFINITY), 0.);
assert_eq!(f_j0(f64::NEG_INFINITY), 0.);
assert!(f_j0(f64::NAN).is_nan());
}
}

1347
vendor/pxfm/src/bessel/j0_coeffs_remez.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1292
vendor/pxfm/src/bessel/j0_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

420
vendor/pxfm/src/bessel/j0f.rs vendored Normal file
View File

@@ -0,0 +1,420 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE, J0F_COEFFS};
use crate::bessel::trigo_bessel::cos_small;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval14};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the first kind of order 0
///
/// Max ulp 0.5
pub fn f_j0f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 {
// |x| == 0, |x| == inf, |x| == NaN, |x| <= f32::EPSILON
if ux == 0 {
// |x| == 0
return f64::from_bits(0x3ff0000000000000) as f32;
}
if x.is_infinite() {
return 0.;
}
if ux <= 0x6800_0000u32 {
// |x| < f32::EPSILON
// taylor series for J0(x) ~ 1 - x^2/4 + O(x^4)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
return f_fmlaf(x, -x * 0.25, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::common::f_fmla;
let dx = x as f64;
return f_fmla(dx, -dx * 0.25, 1.) as f32;
}
}
return x + f32::NAN; // x == NaN
}
let x_abs = x.to_bits() & 0x7fff_ffff;
if x_abs <= 0x4295999au32 {
// |x| <= 74.8
if x_abs <= 0x3e800000u32 {
// |x| <= 0.25
return j0f_maclaurin_series(x);
}
if x_abs == 0x401a42e8u32 {
return f32::from_bits(0xbb3b2f69u32);
}
return small_argument_path(x);
}
// Exceptions
if x_abs == 0x65ce46e4 {
return f32::from_bits(0x1eed85c4);
} else if x_abs == 0x7e3dcda0 {
return f32::from_bits(0x92b81111);
} else if x_abs == 0x76d84625 {
return f32::from_bits(0x95d7a68b);
} else if x_abs == 0x6bf68a7b {
return f32::from_bits(0x1dc70a09);
} else if x_abs == 0x7842c820 {
return f32::from_bits(0x17ebf13e);
} else if x_abs == 0x4ba332e9 {
return f32::from_bits(0x27250206);
}
j0f_asympt(f32::from_bits(x_abs))
}
/**
Generated by SageMath:
```python
# Maclaurin series for j0
def print_expansion_at_0_f():
print(f"pub(crate) const J0_MACLAURIN_SERIES: [u64; 9] = [")
from mpmath import mp, j0, taylor
mp.prec = 60
poly = taylor(lambda val: j0(val), 0, 18)
z = 0
for i in range(0, 18, 2):
print(f"{double_to_hex(poly[i])},")
print("];")
print(f"poly {poly}")
print_expansion_at_0_f()
```
**/
#[inline]
fn j0f_maclaurin_series(x: f32) -> f32 {
pub(crate) const C: [u64; 9] = [
0x3ff0000000000000,
0xbfd0000000000000,
0x3f90000000000000,
0xbf3c71c71c71c71c,
0x3edc71c71c71c71c,
0xbe723456789abcdf,
0x3e002e85c0898b71,
0xbd8522a43f65486a,
0x3d0522a43f65486a,
];
let dx = x as f64;
f_polyeval9(
dx * dx,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn small_argument_path(x: f32) -> f32 {
let x_abs = f32::from_bits(x.to_bits() & 0x7fff_ffff) as f64;
// let avg_step = 74.6145 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299043751549631;
let fx = x_abs * INV_STEP;
const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j0f_maclaurin_series(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J0_ZEROS_VALUE[idx]) as f32;
}
let c = &J0F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
p as f32
}
#[inline]
pub(crate) fn j1f_rsqrt(x: f64) -> f64 {
(1. / x) * x.sqrt()
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[inline]
fn j0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j0f_asympt_alpha(dx);
let beta = j0f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = cos_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn j0f_asympt_alpha(x: f64) -> f64 {
const C: [u64; 12] = [
0x3fc0000000000000,
0xbfb0aaaaaaaaaaab,
0x3fcad33333333333,
0xbffa358492492492,
0x403779a1f8e38e39,
0xc080bd1fc8b1745d,
0x40d16b51e66c789e,
0xc128ecc3af33ab37,
0x418779dae2b8512f,
0xc1ec296336955c7f,
0x4254f5ee683b6432,
0xc2c2f51eced6693f,
];
let recip = 1. / x;
let x2 = recip * recip;
let p = f_polyeval12(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
p * recip
}
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x', default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn j0f_asympt_beta(x: f64) -> f64 {
const C: [u64; 10] = [
0x3ff0000000000000,
0xbfb0000000000000,
0x3fba800000000000,
0xbfe15f0000000000,
0x4017651180000000,
0xc05ab8c13b800000,
0x40a730492f262000,
0xc0fc73a7acd696f0,
0x41577458dd9fce68,
0xc1b903ab9b27e18f,
];
let recip = 1. / x;
let x2 = recip * recip;
f_polyeval10(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
)
}
#[cfg(test)]
mod tests {
use crate::f_j0f;
#[test]
fn test_j0f() {
println!("0x{:8x}", f32::EPSILON.to_bits().wrapping_shl(1));
assert_eq!(f_j0f(-3123.), 0.012329336);
assert_eq!(f_j0f(-0.1), 0.99750155);
assert_eq!(f_j0f(-15.1), -0.03456193);
assert_eq!(f_j0f(3123.), 0.012329336);
assert_eq!(f_j0f(0.1), 0.99750155);
assert_eq!(f_j0f(15.1), -0.03456193);
assert_eq!(f_j0f(f32::INFINITY), 0.);
assert_eq!(f_j0f(f32::NEG_INFINITY), 0.);
assert!(f_j0f(f32::NAN).is_nan());
}
}

1050
vendor/pxfm/src/bessel/j0f_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

697
vendor/pxfm/src/bessel/j1.rs vendored Normal file
View File

@@ -0,0 +1,697 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(clippy::excessive_precision)]
use crate::bessel::alpha1::{
bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard,
};
use crate::bessel::beta1::{
bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval8, f_polyeval9, f_polyeval12, f_polyeval19};
use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
/// Bessel of the first kind of order 1
///
/// Note about accuracy:
/// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly
/// in the same precision, since any nearest representable number have ULP > 0.5,
/// for example `J1(0.000000000000000000000000000000000000023509886)` in single precision
/// have 0.7 ULP for any number with extended precision that would be represented in f32
/// Same applies to J1(4.4501477170144018E-309) in double precision and some others subnormal numbers
pub fn f_j1(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x72338c9356bb0314u64 {
// |x| <= 0.000000000000000000000000000000001241
// J1(x) ~ x/2+O[x]^3
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// J1(x) ~ x/2-x^3/16+O[x]^5
let quad_part_x = x * 0.125; // exact. x / 8
return f_fmla(quad_part_x, -quad_part_x, 0.5) * x;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax < 0x4052a6784230fcf8u64 {
// |x| < 74.60109
if ax < 0x3feccccccccccccd {
// |x| < 0.9
return j1_maclaurin_series_fast(x);
}
return j1_small_argument_fast(x);
}
j1_asympt_fast(x)
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn j1_asympt_fast(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_1_asympt_alpha_fast(recip);
let beta = bessel_1_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), // 2^-65
f64::from_bits(0x3a60000000000000), // 2^-89
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt(origin_x, recip, r_sqrt, angle)
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
fn j1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_1_asympt_alpha(recip);
let beta = bessel_1_asympt_beta(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bc0000000000000), // 2^-67
f64::from_bits(0x39c0000000000000), // 2^-99
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt_hard(origin_x)
}
/**
Generated in Sollya:
```text
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1_at_zero_fast.sollya
**/
#[inline]
pub(crate) fn j1_maclaurin_series_fast(x: f64) -> f64 {
const C0: DoubleDouble = DoubleDouble::from_bit_pair((0x3b30e9e087200000, 0x3fe0000000000000));
let x2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
x2.hi,
f64::from_bits(0xbfb0000000000000),
f64::from_bits(0x3f65555555555555),
f64::from_bits(0xbf0c71c71c71c45e),
f64::from_bits(0x3ea6c16c16b82b02),
f64::from_bits(0xbe3845c87ec0cbef),
f64::from_bits(0x3dc27e0313e8534c),
f64::from_bits(0xbd4443dd2d0305d0),
f64::from_bits(0xbd0985a435fe9aa1),
f64::from_bits(0x3d10c82d92c46d30),
f64::from_bits(0xbd0aa3684321f219),
f64::from_bits(0x3cf8351f29ac345a),
f64::from_bits(0xbcd333fe6cd52c9f),
);
let mut z = DoubleDouble::mul_f64_add(x2, p, C0);
z = DoubleDouble::quick_mult_f64(z, x);
// squaring error (2^-56) + poly error 2^-75
let err = f_fmla(
x2.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3b40000000000000), // 2^-75
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j1_maclaurin_series(x)
}
/**
Generated in Sollya:
```text
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1_at_zero.sollya
**/
pub(crate) fn j1_maclaurin_series(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const CL: [(u64, u64); 5] = [
(0xb930000000000000, 0x3fe0000000000000),
(0x39c8e80000000000, 0xbfb0000000000000),
(0x3c05555554f3add7, 0x3f65555555555555),
(0xbbac71c4eb0f8c94, 0xbf0c71c71c71c71c),
(0xbb3f56b7a43206d4, 0x3ea6c16c16c16c17),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval8(
dx2.hi,
f64::from_bits(0xbe3845c8a0ce5129),
f64::from_bits(0x3dc27e4fb7789ea2),
f64::from_bits(0xbd4522a43f633af1),
f64::from_bits(0x3cc2c97589d53f97),
f64::from_bits(0xbc3ab8151dca7912),
f64::from_bits(0x3baf08732286d1d4),
f64::from_bits(0xbb10ac65637413f4),
f64::from_bits(0xbae4d8336e4f779c),
);
let mut p_e = DoubleDouble::mul_f64_add(dx2, p, DoubleDouble::from_bit_pair(CL[4]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[3]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[2]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[1]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[0]));
let p = DoubleDouble::quick_mult_f64(p_e, x);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), // 2^-66
f64::from_bits(0x3a00000000000000), // 2^-95
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_maclaurin_series_hard(origin_x);
}
p.to_f64() * sign_scale
}
/**
Taylor expansion at 0
Generated by SageMath:
```python
def print_expansion_at_0():
print(f"static C: [DyadicFloat128; 13] = ")
from mpmath import mp, j1, taylor, expm1
poly = taylor(lambda val: j1(val), 0, 26)
real_i = 0
print("[")
for i in range(1, len(poly), 2):
print_dyadic(poly[i])
real_i = real_i + 1
print("],")
print("];")
mp.prec = 180
print_expansion_at_0()
```
**/
#[cold]
#[inline(never)]
fn j1_maclaurin_series_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
static C: [DyadicFloat128; 13] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -136,
mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -142,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -148,
mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -155,
mantissa: 0xc22e4506_72894ab6_cd8efb11_d33f5618_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -162,
mantissa: 0x93f27dbb_c4fae397_780b69f5_333c725b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -170,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -178,
mantissa: 0x964bac6d_7ae67d8d_aec68405_485dea03_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -187,
mantissa: 0xd5c0f53a_fe6fa17f_8c7b0b68_39691f4e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -196,
mantissa: 0xf8bb4be7_8e7896b0_58fee362_01a4370c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -205,
mantissa: 0xf131bdf7_cff8d02e_e1ef6820_f9d58ab6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -214,
mantissa: 0xc5e72c48_0d1aec75_3caa2e0d_edd008ca_u128,
},
];
let rx = DyadicFloat128::new_from_f64(x);
let dx = rx * rx;
let mut p = C[12];
for i in (0..12).rev() {
p = dx * p + C[i];
}
(p * rx).fast_as_f64() * sign_scale
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn j1_small_argument_fast(x: f64) -> f64 {
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j1_maclaurin_series_fast(x);
}
let r = DoubleDouble::full_add_f64(-found_zero, x_abs);
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale;
}
let is_zero_too_close = dist.abs() < 1e-3;
let c = if is_zero_too_close {
&J1_COEFFS_TAYLOR[idx - 1]
} else {
&J1_COEFFS[idx - 1]
};
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64() * sign_scale;
}
j1_small_argument_dd(sign_scale, r, c)
}
fn j1_small_argument_dd(sign_scale: f64, r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3a00000000000000), // 2^-95
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_small_argument_path_hard(sign_scale, r, c);
}
p.to_f64() * sign_scale
}
#[cold]
#[inline(never)]
fn j1_small_argument_path_hard(sign_scale: f64, r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64() * sign_scale
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
This method is required for situations where x*x or 1/(x*x) will overflow
*/
#[cold]
#[inline(never)]
fn j1_asympt_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_1_asympt_alpha_hard(recip);
let beta = bessel_1_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j1() {
assert_eq!(f_j1(0.000000000000000000000000000000001241), 6.205e-34);
assert_eq!(f_j1(0.0000000000000000000000000000004321), 2.1605e-31);
assert_eq!(f_j1(0.00000000000000000004321), 2.1605e-20);
assert_eq!(f_j1(73.81695991658546), -0.06531447184607607);
assert_eq!(f_j1(0.01), 0.004999937500260416);
assert_eq!(f_j1(0.9), 0.4059495460788057);
assert_eq!(
f_j1(162605674999778540000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008686943178258183
);
assert_eq!(f_j1(3.831705970207517), -1.8501090915423025e-15);
assert_eq!(f_j1(-3.831705970207517), 1.8501090915423025e-15);
assert_eq!(f_j1(-6.1795701510782757E+307), 8.130935041593236e-155);
assert_eq!(
f_j1(0.000000000000000000000000000000000000008827127),
0.0000000000000000000000000000000000000044135635
);
assert_eq!(
f_j1(-0.000000000000000000000000000000000000008827127),
-0.0000000000000000000000000000000000000044135635
);
assert_eq!(f_j1(5.4), -0.3453447907795863);
assert_eq!(
f_j1(77.743162408196766932633181568235159),
0.09049267898021947
);
assert_eq!(
f_j1(84.027189586293545175976760219782591),
0.0870430264022591
);
assert_eq!(f_j1(f64::NEG_INFINITY), 0.0);
assert_eq!(f_j1(f64::INFINITY), 0.0);
assert!(f_j1(f64::NAN).is_nan());
}
}

1543
vendor/pxfm/src/bessel/j1_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1282
vendor/pxfm/src/bessel/j1_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

422
vendor/pxfm/src/bessel/j1f.rs vendored Normal file
View File

@@ -0,0 +1,422 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1f_coeffs::J1F_COEFFS;
use crate::bessel::trigo_bessel::sin_small;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval7, f_polyeval10, f_polyeval12, f_polyeval14};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the first kind of order 1
///
/// Max ULP 0.5
///
/// Note about accuracy:
/// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly
/// in the same precision, since any nearest representable number have ULP > 0.5.
/// For example `J1(0.000000000000000000000000000000000000023509886)` in single precision
/// have an error at least 0.72 ULP for any number with extended precision,
/// that would be represented in f32.
pub fn f_j1f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return x;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if ax < 0x429533c2u32 {
// |x| <= 74.60109
if ax < 0x3e800000u32 {
// |x| <= 0.25
if ax <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for J1(x) ~ x/2 + O(x^3)
return x * 0.5;
}
return poly_near_zero(x);
}
return small_argument_path(x);
}
// Exceptional cases:
if ax == 0x6ef9be45 {
return if x.is_sign_negative() {
f32::from_bits(0x187d8a8f)
} else {
-f32::from_bits(0x187d8a8f)
};
} else if ax == 0x7f0e5a38 {
return if x.is_sign_negative() {
-f32::from_bits(0x131f680b)
} else {
f32::from_bits(0x131f680b)
};
}
j1f_asympt(x) as f32
}
#[inline]
fn j1f_rsqrt(x: f64) -> f64 {
(1. / x) * x.sqrt()
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn j1f_asympt(x: f32) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = f32::from_bits(x.to_bits() & 0x7fff_ffff);
let dx = x as f64;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_small(r0);
let z0 = beta * m_sin;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
scale * z0 * sign_scale
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn j1f_asympt_alpha(x: f64) -> f64 {
const C: [u64; 12] = [
0xbfd8000000000000,
0x3fc5000000000000,
0xbfd7bccccccccccd,
0x4002f486db6db6db,
0xc03e9fbf40000000,
0x4084997b55945d17,
0xc0d4a914195269d9,
0x412cd1b53816aec1,
0xc18aa4095d419351,
0x41ef809305f11b9d,
0xc2572e6809ed618b,
0x42c4c5b6057839f9,
];
let recip = 1. / x;
let x2 = recip * recip;
let p = f_polyeval12(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
p * recip
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn j1f_asympt_beta(x: f64) -> f64 {
const C: [u64; 10] = [
0x3ff0000000000000,
0x3fc8000000000000,
0xbfc8c00000000000,
0x3fe9c50000000000,
0xc01ef5b680000000,
0x40609860dd400000,
0xc0abae9b7a06e000,
0x41008711d41c1428,
0xc15ab70164c8be6e,
0x41bc1055e24f297f,
];
let recip = 1. / x;
let x2 = recip * recip;
f_polyeval10(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
)
}
/**
Generated in Sollya:
```python
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12|], [|D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1f_at_zero.sollya
**/
#[inline]
fn poly_near_zero(x: f32) -> f32 {
let dx = x as f64;
let x2 = dx * dx;
let p = f_polyeval7(
x2,
f64::from_bits(0x3fe0000000000000),
f64::from_bits(0xbfaffffffffffffc),
f64::from_bits(0x3f65555555554089),
f64::from_bits(0xbf0c71c71c2a74ae),
f64::from_bits(0x3ea6c16bbd1dc5c1),
f64::from_bits(0xbe384562afb69e7d),
f64::from_bits(0x3dc248d0d0221cd0),
);
(p * dx) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn small_argument_path(x: f32) -> f32 {
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
let x_abs = f32::from_bits(x.to_bits() & 0x7fff_ffff) as f64;
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return poly_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return (f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale) as f32;
}
let c = &J1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
(p * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_f_j1f() {
assert_eq!(
f_j1f(77.743162408196766932633181568235159),
0.09049267898021947
);
assert_eq!(
f_j1f(-0.000000000000000000000000000000000000008827127),
-0.0000000000000000000000000000000000000044135635
);
assert_eq!(
f_j1f(0.000000000000000000000000000000000000008827127),
0.0000000000000000000000000000000000000044135635
);
assert_eq!(f_j1f(5.4), -0.3453447907795863);
assert_eq!(
f_j1f(84.027189586293545175976760219782591),
0.0870430264022591
);
assert_eq!(f_j1f(f32::INFINITY), 0.);
assert_eq!(f_j1f(f32::NEG_INFINITY), 0.);
assert!(f_j1f(f32::NAN).is_nan());
assert_eq!(f_j1f(-1.7014118e38), 0.000000000000000000006856925);
}
}

857
vendor/pxfm/src/bessel/j1f_coeffs.rs vendored Normal file
View File

@@ -0,0 +1,857 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
Generating Taylor expansions for zero and extremums see [crate::bessel::j1_coeffs::J1_ZEROS]
to start of explanation.
Generated by SageMath and Sollya:
```python
def compute_intervals(zeros):
intervals = []
for i in range(0, len(zeros)):
if i == 0:
a = (zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
elif i + 1 > len(zeros) - 1:
a = (zeros[i - 1] + zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i]) + 0.83 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
else:
a = (zeros[i - 1] + zeros[i]) / 2 - zeros[i] - 0.05
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
return intervals
intervals = compute_intervals(j1_zeros)
# print(intervals)
def build_sollya_script(a, b, zero, deg):
return f"""
prec = 500;
bessel_j1 = library("./pxfm/notes/bessel_sollya/cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x + {zero});
d = [{a}, {b}];
pf = remez(f, {deg}, d, 1, 1e-25);
for i from 0 to degree(pf) do {{
write(coeff(pf, i)) >> "coefficients.txt";
write("\\n") >> "coefficients.txt";
}};
"""
def load_coefficients(filename):
with open(filename, "r") as f:
return [RR(line.strip()) for line in f if line.strip()]
def call_sollya_on_interval(a, b, zero, degree=12):
sollya_script = build_sollya_script(a, b, zero, degree)
with open("tmp_interval.sollya", "w") as f:
f.write(sollya_script)
import subprocess
if os.path.exists("coefficients.txt"):
os.remove("coefficients.txt")
try:
result = subprocess.run(
["sollya", "tmp_interval.sollya"],
check=True,
capture_output=True,
text=True
)
except subprocess.CalledProcessError as e:
return
degree = 13
print(f"pub(crate) static J1F_COEFFS: [[u64;{degree + 1}]; {len(intervals)}] = [")
for i in range(0, len(intervals)):
interval = intervals[i]
call_sollya_on_interval(interval[0], interval[1], interval[2], degree)
coeffs = load_coefficients(f"coefficients.txt")
print("[")
for c in coeffs:
print(double_to_hex(c) + ",")
print("],")
print("];")
```
**/
pub(crate) static J1F_COEFFS: [[u64; 14]; 47] = [
[
0x3fe29ea3d19f035d,
0xbce22d3695a081b6,
0xbfca41115c5deeab,
0x3f78d1448e710c46,
0x3f8c441a2f9a4f69,
0xbf386671c22a634e,
0xbf39e2504b2e7b5b,
0x3ee34ccc14eef789,
0x3eda49718b72405e,
0xbe810474efe3c9c6,
0xbe70fa29fb791201,
0x3e1362d76c062ab0,
0x3dfdd76f07295520,
0xbda1a753bf39cb58,
],
[
0xbc600f4743bf63e1,
0xbfd9c6cf582cbf8a,
0x3faae8a39f51ad73,
0x3fab589d1da1462b,
0xbf7537544c334c23,
0xbf624b34099ee01c,
0x3f26e4c2d53c4f46,
0x3f083a06ee794927,
0xbec9799d241e5d2b,
0xbea3382caabc394d,
0x3e617039a48bb9f6,
0x3e345a8c920dcd07,
0xbdf014c3bf3000a5,
0xbdc034aa4e0a0169,
],
[
0xbfd626ee83500bf2,
0x3cb26dfd317b25ec,
0x3fc55f6bec9ef90d,
0xbf83d23336fd2aca,
0xbf88c77a98398e83,
0x3f45cdc98dc64f81,
0x3f373576fec2e394,
0xbef24614559dc202,
0xbed7b852bb05a2ff,
0x3e90ac054c63ba46,
0x3e6ea70b302a9ba3,
0xbe23616f333984e9,
0xbdfb0a064790ebe6,
0x3db0d9e08e0394a8,
],
[
0xbc69b70cbe5811c2,
0x3fd33518b3874e8d,
0xbf95e70dc6036109,
0xbfa80c83bdeee89d,
0x3f69a4b292e2ab0d,
0x3f613fbc7d6bc462,
0xbf207358bb38afba,
0xbf0796a754e5c2d1,
0x3ec4255a5a67b552,
0x3ea3026fd5c83da8,
0xbe5d482c06555943,
0xbe34473271553ffa,
0x3dec0515656e1a0c,
0x3dbf11e1948b68d2,
],
[
0x3fd17dbf09d40d24,
0xbca5b86542306439,
0xbfc1404bf647c233,
0x3f74f4df276a170d,
0x3f85c628542932bb,
0xbf3d68ab724123eb,
0xbf356acb63c4c7d5,
0x3eec10b48d91d910,
0x3ed67eaa56c73092,
0xbe8bb65dbfeed0f6,
0xbe6d8683c673b075,
0x3e20f7b6316797c6,
0x3dfa451f70b9922a,
0xbdadce2c76e4d044,
],
[
0x3c65df812ede650d,
0xbfcff654544ebcd3,
0x3f89223ff2c07565,
0x3fa4b0c5d5da68d8,
0xbf5f91a9ee0b0e8a,
0xbf5f51c2489da5c1,
0x3f16b4c9c8efdfe6,
0x3f063c54768ebb67,
0xbebe3724b50493f9,
0xbea25c12f8827c9d,
0x3e5747d49182153b,
0x3e33e41718262cb9,
0xbde738f2d8f2cac8,
0xbdbe80a4948dbf09,
],
[
0xbfcddceb4ce1bf49,
0x3c9e79566c79eb3d,
0x3fbda52116c0a587,
0xbf6a9da4603b9358,
0xbf8331e74ea51630,
0x3f33e5cb6ecbca42,
0x3f33885fe920e6b8,
0xbee494c100626ece,
0xbed512b940a2ae49,
0x3e85a8688c9ba4ce,
0x3e6c31a31b773184,
0xbe1bd11439ffd259,
0xbdf96a9daeb33936,
0x3da9176221cc5aa0,
],
[
0xbc62383f10698557,
0x3fcbf3337873a7d9,
0xbf80c83a2d7adab7,
0xbfa251858011820e,
0x3f559eb160bdad7b,
0x3f5c5bce33b024a1,
0xbf10413e2f7af958,
0xbf04a6704d9a9d07,
0x3eb6c43df550ea17,
0x3ea16abdc27eeb92,
0xbe52576e7fc9d1e6,
0xbe332dc1c1ed3ee2,
0x3de2f6391206ebbc,
0x3dbda410af6fe5f4,
],
[
0x3fca7f63fea81f25,
0xbc9710bf367611f2,
0xbfba60afb0664019,
0x3f62c1e930937e24,
0x3f814506466cfd08,
0xbf2cca8c0c28fab3,
0xbf31df821c353039,
0x3edee8816088c0d5,
0x3ed3a365144be247,
0xbe80ed354ac60d34,
0xbe6ab31b90ea8e41,
0x3e168836e968cdd7,
0x3df8613c2e496c6b,
0xbda4daa0300cec8d,
],
[
0x3c5e59bc05abf185,
0xbfc925c6fca08f55,
0x3f786dd32e0596e8,
0x3fa09463bbd036c8,
0xbf4fda0298c57ed5,
0xbf59f4be6075f749,
0x3f0877991961e89e,
0x3f032cb00f1d1bde,
0xbeb19d8c17e4c965,
0xbea06a043bd432bf,
0x3e4d398ca8f49a5e,
0x3e3250f2ec743ceb,
0xbddf086f08c63838,
0xbdbc8e4fa8a9f9e9,
],
[
0xbfc810f50225b04a,
0x3c923e2f625151bc,
0x3fb7fdf97ac36a6f,
0xbf5c3c256a8cde19,
0xbf7f98feb7276ef1,
0x3f25f6559e6b5a2c,
0x3f3080f57a3a527d,
0xbed80c5147824d09,
0xbed256dac8ee5bae,
0x3e7af7628377d0c7,
0x3e6938ef2e239da2,
0xbe12633fe4f5465f,
0xbdf745af34a2de92,
0x3da15e1c189cc1e4,
],
[
0xbc59737d6e4fe431,
0x3fc70c511227d5aa,
0xbf72ccb0e975555d,
0xbf9e7dc08e70e9c5,
0x3f48acdc5b030cb1,
0x3f580503724ae80a,
0xbf032ee4c8c82218,
0xbf01e5d2836968fa,
0x3eac129da754f086,
0x3e9ef1612a209ee4,
0xbe47b90193cc5cb3,
0xbe316f0e2a3b6246,
0x3dd9aabe334f3655,
0x3dbb62c2bd937db8,
],
[
0x3fc633e7f7f05300,
0xbc8dba9947515d38,
0xbfb6273784c1bfc4,
0x3f563ae94ade4347,
0x3f7d4666536b9564,
0xbf216d528356c33f,
0xbf2ec0dcdab1fcc9,
0x3ed34e967676159e,
0x3ed135c5c78436c9,
0xbe75f7c3c6380689,
0xbe67dba82b616a97,
0x3e0e71b8431ababd,
0x3df62fc761d5cf41,
0xbd9d2fe54f4a496b,
],
[
0x3c558a68c87f4030,
0xbfc5664e13b70622,
0x3f6e16555e1087dd,
0x3f9c5e1ad9fb2f2d,
0xbf43d369f956c6bd,
0xbf566f4ec27a7a37,
0x3eff0de050de72b9,
0x3f00cf26431ce3a2,
0xbea6f46c2694b0df,
0xbe9d407f232dd5f2,
0x3e43a29f2e4c9c8a,
0x3e3098ca879e4471,
0xbdd585d6255bc3df,
0xbdba39fcc1ea78bf,
],
[
0xbfc4b71d4ca2cc69,
0x3c88c930c0b4c560,
0x3fb4ae245697fb03,
0xbf5215e4e1a6153c,
0xbf7b633ed6d8e543,
0x3f1c7f17b4d42a82,
0x3f2ce01b8a6eca10,
0xbecfced72e1b750f,
0xbed03c9cd706bc3a,
0x3e72450e1e5d9dd3,
0x3e66a249f63c5bc1,
0xbe0999366388212b,
0xbdf52ba007be60f3,
0x3d98ced5beb32a74,
],
[
0xbc526f6d035edf6d,
0x3fc40f90793605bb,
0xbf68c833077fb99d,
0xbf9aa0ce0421d16e,
0x3f405fa598ed8bab,
0x3f551d30d78a7993,
0xbef9c5807480a6e1,
0xbeffc1bbf50ca15b,
0x3ea32dfda14ee884,
0x3e9bc2119616c18d,
0xbe408b0d01f43d88,
0xbe2fa87db40715d8,
0x3dd24d20c9bf3988,
0x3db920af9a64d8b9,
],
[
0x3fc37dfa8f5a550a,
0xbc850d0284917193,
0xbfb3775c1a04efff,
0x3f4e2b4810a4a882,
0x3f79d151a72aa26d,
0xbf17d8e5a0a8f01d,
0xbf2b49a641814268,
0x3ecac10968085b43,
0x3ececa610eed952d,
0xbe6eefd23aebb19b,
0xbe658bda5ec8aafe,
0x3e05d77b1c39da47,
0x3df43d63ca6b9538,
0xbd9555dba21d3a76,
],
[
0x3c4fe3057c054c4c,
0xbfc2f2072e638cf3,
0x3f64df208bbd408f,
0x3f992bb5e1e159a9,
0xbf3ba181c0657121,
0xbf53fe9d5ba9fb4a,
0x3ef5d17600fd9483,
0x3efe26d373f4ffea,
0xbea0509689b62f58,
0xbe9a70f1b160bc28,
0x3e3c4fa74da61f57,
0x3e2e44cc2feeed24,
0xbdcf87b5a4255e18,
0xbdb81bebaaec3c7c,
],
[
0xbfc2768d29c69936,
0x3c822565e3c86e7f,
0x3fb271811730b057,
0xbf49a8df96a15635,
0xbf787c81cf1b96e9,
0x3f14549cdbcc339c,
0x3f29ed2567282f3d,
0xbec6e4137cf2411c,
0xbecd53321406f402,
0x3e6a98443cd6fc90,
0x3e6494adc7c6521b,
0xbe02e1d787962f20,
0xbdf3653d7772f823,
0x3d928dbabedf1d31,
],
[
0xbc4bd8c1a48b98b5,
0x3fc1ff5eec6a01cd,
0xbf61e438b722bfe0,
0xbf97ed5fffc1c711,
0x3f37b7997ba917ee,
0x3f53081def95b78f,
0xbef2c5f5ec3350b1,
0xbefcc11a59469f36,
0x3e9c2c3894e80d05,
0x3e9946d150444e47,
0xbe388ce82c32666d,
0xbe2d044ba8d28f8d,
0x3dcb7a77f047774e,
0x3db72cad88060e0b,
],
[
0x3fc194eba75b32f9,
0xbc7faef3b1a5e821,
0xbfb190f7dc273599,
0x3f462bb47a5c8cc1,
0x3f7756ef20f501d3,
0xbf1198b0baaa058c,
0xbf28be8cf854b2d7,
0x3ec3dd6f88b69c69,
0x3ecc09c72877c12b,
0xbe6728ec2da828ad,
0xbe63b897c2c7b139,
0x3e008344f3db34b5,
0x3df2a1a5ef6e57ff,
0xbd904c70dc90d3bc,
],
[
0x3c4888e51c985983,
0xbfc12dd57bf18ad9,
0x3f5f1e1e7f3937bf,
0x3f96d9afe883018e,
0xbf34a538a4802887,
0xbf52316250b44b33,
0x3ef05f11562b37ff,
0x3efb86bad38b7b43,
0xbe98a1b250bb7d2f,
0xbe983dca646511fe,
0x3e3588f1be962e6d,
0x3e2be36513882145,
0xbdc831edc515fa05,
0xbdb6520b1c14e6f6,
],
[
0xbfc0d0d36473e98c,
0x3c7bf69dcc64d467,
0x3fb0cda9974abd9e,
0xbf4367f38f204418,
0xbf7656b75e3b5a4f,
0x3f0ed82abf947b58,
0x3f27b4e5b765cd39,
0xbec171fd1a726d85,
0xbecae62a6c526e99,
0x3e64648b6f18fd5e,
0x3e62f3b53a117819,
0xbdfd2c72ca1c90f8,
0xbdf1f085bd0fab41,
0x3d8ce566a6478844,
],
[
0xbc45ca84b624bf30,
0x3fc076826cc2c191,
0xbf5b62885e006ac2,
0xbf95e7f53001e43e,
0x3f322ebeb8d9f78a,
0x3f517444a79fe500,
0xbeece06f1cc95449,
0xbefa7006e603acdb,
0x3e95c42dcf4cb755,
0x3e9750c9cbea6158,
0xbe3313f5f0d7c83f,
0xbe2ade4e1f9c8db0,
0x3dc57f801723eb46,
0x3db58a5b7895974e,
],
[
0x3fc02455675ab6d2,
0xbc78e3f9125495c0,
0xbfb021c155a72057,
0x3f412be56fc16829,
0x3f75749d556a12df,
0xbf0b51f1f9db1832,
0xbf26c96a07103bad,
0x3ebef3a7bef163ee,
0x3ec9e206eb2ce693,
0xbe6220bf8745e1a3,
0xbe6242a68bb3eb7f,
0x3df9ffc1c0ac86f0,
0x3df14fb8a5e39fac,
0xbd89d286115bf7a6,
],
[
0x3c4380441b0b0c6a,
0xbfbfa8b41711c839,
0x3f5857d39699926e,
0x3f9511c6dadaa99b,
0xbf302c289dbbcc5a,
0xbf50cc2238d1bf52,
0x3ee9b64d5a4aa86c,
0x3ef976fb01920f8b,
0xbe93693a8cc790fd,
0xbe967b9496685d1b,
0x3e310c25f77e4f25,
0x3e29f17f3e13ecd9,
0xbdc3414987038eec,
0xbdb4d3b9dc98b4b1,
],
[
0xbfbf161d0c28b48b,
0x3c765dc0b792167d,
0x3faf11d837aa6e5c,
0xbf3eab76da4d4788,
0xbf74ab329f05bdc5,
0x3f086ada57d5a903,
0x3f25f6e78e464093,
0xbebbb2720677d252,
0xbec8f8525854df7e,
0x3e603f882886871e,
0x3e61a293516bd71e,
0xbdf75995de0fcb8d,
0xbdf0bd411cec4c70,
0x3d873e4d136b8f8b,
],
[
0xbc418c91b7939a2c,
0x3fbe8727daa3daec,
0xbf55d353e285455c,
0xbf94524d4813cbac,
0x3f2d037574df02eb,
0x3f50356bb7473b5d,
0xbee7156bfaea76f5,
0xbef896d7dbd3810e,
0x3e9172c5e1abd5c6,
0x3e95baadfc18282d,
0xbe2eb240c0cc9c75,
0xbe2919d9b9e0a0b9,
0x3dc15e50952db326,
0x3db42c51c147e65d,
],
[
0x3fbe0357c158b118,
0xbc74361048923786,
0xbfadffc2fc1a90f5,
0x3f3b9b82ae081404,
0x3f73f64e05315346,
0xbf05fe4b66e63077,
0xbf2539518d55a85c,
0x3eb8f8d02bcc2897,
0x3ec825039164993f,
0xbe5d566920c2c9ab,
0xbe61111befe6e2b8,
0x3df51d4c70439a2f,
0x3df0375b8b7f66b3,
0xbd850e412a9bff06,
],
[
0x3c3fc518d24f616f,
0xbfbd8293aa55d18f,
0x3f53b6beb83f212f,
0x3f93a5ccbc12a602,
0xbf2a3765d26776da,
0xbf4f5ab33747e91e,
0x3ee4df6f1a6da3df,
0x3ef7cbd49b834b1a,
0xbe8f9607c8362a02,
0xbe950b374b4c0d92,
0x3e2bd1f7180ef6ba,
0x3e2854abbc0c7de8,
0xbdbf87db79765a71,
0xbdb3926b2e69585a,
],
[
0xbfbd0b36e5737457,
0x3c726585805a22d8,
0x3fad082ce3c6b4a2,
0xbf3905d00c5e9c91,
0xbf7352b073fcfa33,
0x3f03f1ccfed800a0,
0x3f248d74577878fa,
0xbeb6a9ef1ba885bb,
0xbec764d8b51b8b1c,
0x3e5aa78ed7e846ea,
0x3e608c46d6182272,
0xbdf33581106d6379,
0xbdef78fcff7e62ac,
0x3d832f39fadd44c1,
],
[
0xbc3cea65a1050db5,
0x3fbc96700bf039e1,
0xbf51ec0b5de4bafb,
0xbf93095734a2441c,
0x3f27d74e122576e6,
0x3f4e636fe2585c98,
0xbee2fe11959a4f56,
0xbef712e4d39f1e3a,
0x3e8cc3ac0e3e5a57,
0x3e946ad2493deefc,
0xbe295ca72f5034a9,
0xbe279fa7ce9e0732,
0x3dbcc7fe67868a0c,
0x3db30482af82a3e3,
],
[
0x3fbc29ae8400a31f,
0xbc70d624180ba1cb,
0xbfac27138da31b39,
0x3f36d141fcbed86f,
0x3f72bdc71061ff60,
0xbf0231cf645337e2,
0xbf23f0bf3a855d26,
0x3eb4b05ea24a407d,
0x3ec6b52ac7705590,
0xbe585a82e1962dc0,
0xbe60126ea6b0d3d5,
0x3df191f5eda7279d,
0x3dee96ae83ca14cd,
0xbd8191df21c5049c,
],
[
0x3c3a725871d54f1e,
0xbfbbbf246914235e,
0x3f5062daee353d6e,
0x3f927a96f174b658,
0xbf25cdb5dea7195a,
0xbf4d818348f8b2ae,
0x3ee160aab6b91ebc,
0x3ef6698d6e3dde27,
0xbe8a56325d99553d,
0xbe93d7884737e010,
0x3e273dfa1c71021f,
0x3e26f8d7f1f40cd1,
0xbdba675a7a73a904,
0xbdb28141631b6b6a,
],
[
0xbfbb5b8273b75054,
0x3c6ef081b4f49e8c,
0x3fab59418c36a598,
0xbf34eafeaa92d6ad,
0xbf7235801af9154a,
0x3f00af9747e26a9d,
0x3f23611db02b9d63,
0xbeb2fbe420b9b6ee,
0xbec613cc016f8c79,
0x3e565cfa070daeea,
0x3e5f4465aa2f3924,
0xbdf0262c878a66b9,
0xbdedc58772bf3be9,
0x3d802a5900529d85,
],
[
0xbc384aa4fbafc099,
0x3fbaf9cb49c4f934,
0xbf4e1d930b512b68,
0xbf91f7a8fec6eb30,
0x3f240a553105f569,
0x3f4cb20c812efe23,
0xbedff5195120ac4b,
0xbef5cdc48eb38532,
0x3e883b07bbc753fc,
0x3e934fb5f8f40030,
0xbe2566435ca657bc,
0xbe265e90a4422b0e,
0x3db85513c8865e5e,
0x3db2077cf853887e,
],
[
0x3fba9e13a0db6429,
0xbc6c8fc49071e774,
0xbfaa9c1ca2161ab5,
0x3f3344a09efdef03,
0x3f71b82c43097eb4,
0xbefebfb97beaa2a2,
0xbf22dcdb1b1095b7,
0x3eb180048a016cca,
0x3ec57eee38f33b34,
0xbe54a0cd96cea7fb,
0xbe5e74d5d6cab4bf,
0x3dedd0d2df39d680,
0x3ded0395b2b002c8,
0xbd7ddf4961633a0a,
],
[
0x3c366129d7cdda38,
0xbfba4407e04298d1,
0x3f4bcc9df0cefa78,
0x3f917f0266db20d1,
0xbf2280a052210834,
0xbf4bf2ada1f36d37,
0x3edd83d57dbfecdc,
0x3ef53dd97231158c,
0xbe8663c0bacd7255,
0xbe92d1fb6736ed8c,
0x3e23c94599131ea8,
0x3e25cf6504993c56,
0xbdb683e80faa06e8,
0xbdb19631ce482d25,
],
[
0xbfb9ef3bb2213b0a,
0x3c6a7968b4e09650,
0x3fa9ed82007a9965,
0xbf31d2fdeeb2c55d,
0xbf71446866fe7a9f,
0x3efc73b6851770db,
0x3f22628de4e7b12f,
0xbeb03303cb6b68e6,
0xbec4f50f5682c560,
0x3e531ae61db4a107,
0x3e5db4387e5cde8e,
0xbdeba1fde369500d,
0xbdec4f2d9a6ca338,
0x3d7bb4d1335b858d,
],
[
0xbc34b2fde0d60fa7,
0x3fb99be744018c90,
0xbf49c3f52a2af15f,
0xbf910f5ca51f983b,
0x3f2126c8e8c80fca,
0x3f4b416f7d4ef26a,
0xbedb5e2e533b6e90,
0xbef4b86226f8434d,
0x3e84c505ef1872eb,
0x3e925d2f3adca31c,
0xbe225d5434e81821,
0xbe254a1c99a03af6,
0x3db4e94bd07d8a6f,
0x3db12c7cbd69804c,
],
[
0x3fb94d3276914e50,
0xbc68a054fb407a48,
0xbfa94bac1950e23d,
0x3f308d4ff8f228ce,
0x3f70d90d29bf518f,
0xbefa6d56164ba1ad,
0xbf21f107d97a6716,
0x3eae1a62755de03b,
0x3ec474ea4fd020bb,
0xbe51c2774b54b19c,
0xbe5d00e34f95d0da,
0x3de9b39b1acf50d9,
0x3deba6dd96aebb23,
0xbd79c96d87e11eb2,
],
[
0x3c3336a57f397478,
0xbfb8ffc9bd24fe07,
0x3f47f7d46ab331c5,
0x3f90a7a725d3fb50,
0xbf1fea1728ee3283,
0xbf4a9cac69f01f7f,
0x3ed977f48dd3df20,
0x3ef43c2d8dc63c79,
0xbe8355d08ef44310,
0xbe91f054ae49cd66,
0x3e211ab3e0c31baf,
0x3e24cdaac969d4dd,
0xbdb37cc6ee5acaa3,
0xbdb0c99a10f9a149,
],
[
0xbfb8b67a2481077c,
0x3c66ff282f533f22,
0x3fa8b51f21068dcb,
0xbf2ed935c7af3be8,
0xbf707522a502e55b,
0x3ef8a1960639d120,
0x3f21874a473e56eb,
0xbeac10cf4666ac95,
0xbec3fd6b83d91ed3,
0x3e509072f594172b,
0x3e5c5969c0715bb8,
0xbde7fbce7fa1d168,
0xbdeb09679444c7c3,
0x3d7813cf0f88ee50,
],
[
0xbc31df60e9bfe4d4,
0x3fb86e51be0a9153,
0xbf465ed1b387e0dc,
0xbf9046fc5a218a13,
0x3f1dca617fec07ec,
0x3f4a030022145da6,
0xbed7c761870dcbbb,
0xbef3c83888dc1ceb,
0x3e820edd9a880dbb,
0x3e918a9391ba3964,
0xbe1ff64d38db85c5,
0xbe245927a9c55d79,
0x3db2377b4d9f1923,
0x3db06cdd9727d79f,
],
[
0x3fb829d06fee9265,
0xbc6580de45a47453,
0xbfa8289a526d76b3,
0x3f2cd680355cdc28,
0x3f7017d70f5091c7,
0xbef707978e429e47,
0xbf21247ce0bc7ead,
0x3eaa3f6135c91ad4,
0x3ec38da7a255e74a,
0xbe4efe445dfcd93f,
0xbe5bbc925c9c4b66,
0x3de6729121dc2aec,
0x3dea75b6fa634294,
0xbd768bad7f03cc70,
],
[
0x3c30ac9cf88bc679,
0xbfb7e656efb009ad,
0x3f44f15066f3d3cc,
0x3f8fd932c26aacb2,
0xbf1be460dd833bb1,
0xbf49733b5917b1ec,
0x3ed64488c387546b,
0x3ef35ba58b547387,
0xbe80ea46c863072a,
0xbe912b31edd1db5a,
0x3e1df32c5fd2f995,
0x3e23ebca247be24d,
0xbdb113cb7b7c70db,
0xbdb015b1107de244,
],
[
0xbfb7a62320798174,
0x3c9957b1463c023c,
0x3fa7a50ca4504ab9,
0xbf2b095ccb52d0c4,
0xbf6f80ef11d944d8,
0x3ef59822dd4acc8e,
0x3f20c7e6a7116068,
0xbea89e01408239eb,
0xbec324d470ec229a,
0x3e4d13ff680fb32a,
0x3e5b2943a9554368,
0xbde5283c9b463c57,
0xbde9e8488fabd47c,
0x3d7a620fb02a1ce9,
],
];

396
vendor/pxfm/src/bessel/jincpi.rs vendored Normal file
View File

@@ -0,0 +1,396 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(clippy::excessive_precision)]
use crate::bessel::alpha1::bessel_1_asympt_alpha_fast;
use crate::bessel::beta1::bessel_1_asympt_beta_fast;
use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval9, f_polyeval19};
use crate::round::RoundFinite;
use crate::sin_helper::sin_dd_small_fast;
/// Normalized jinc 2*J1(PI\*x)/(pi\*x)
pub fn f_jincpi(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
return 1.0;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x = NaN
}
let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax < 0x4052a6784230fcf8u64 {
// |x| < 74.60109
if ax < 0x3fd3333333333333 {
// |x| < 0.3
return jincpi_near_zero(f64::from_bits(ax));
}
let scaled_pix = f64::from_bits(ax) * std::f64::consts::PI; // just test boundaries
if scaled_pix < 74.60109 {
return jinc_small_argument_fast(f64::from_bits(ax));
}
}
jinc_asympt_fast(f64::from_bits(ax))
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn jinc_asympt_fast(ox: f64) -> f64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
let x = DoubleDouble::quick_mult_f64(PI, ox);
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
// argument reduction assuming x here value is already multiple of PI.
// k = round((x*Pi) / (pi*2))
let kd = (ox * 0.5).round_finite();
// y = (x * Pi) - k * 2
let rem = f_fmla(kd, -2., ox);
let angle = DoubleDouble::quick_mult_f64(PI, rem);
let recip = x.recip();
let alpha = bessel_1_asympt_alpha_fast(recip);
let beta = bessel_1_asympt_beta_fast(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let dx_sqrt = x.fast_sqrt();
let scale = DoubleDouble::div(SQRT_2_OVER_PI, dx_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
DoubleDouble::quick_mult(p, recip).to_f64() * 2.
}
#[inline]
pub(crate) fn jincpi_near_zero(x: f64) -> f64 {
// Polynomial Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=BesselJ[1,x*Pi]/(x*Pi)
// {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},7,7},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 8] = [
(0xbb2bddffe9450ca6, 0x3fe0000000000000),
(0x3c3b0b0a7393eccb, 0xbfce4cd3c3c87615),
(0xbc7f9f784e0594a6, 0xbfe043283b1e383f),
(0xbc6af77bca466875, 0x3fcee46673cf919f),
(0xbc0b62837b038ea8, 0x3fc0b7cc55c9a4af),
(0x3c5c08841871f124, 0xbfb002b65231dcdd),
(0xbc26cf2d89ea63bc, 0xbf849022a7a0712b),
(0xbbe535d492c0ac1c, 0x3f740b48910d5105),
];
const Q: [(u64, u64); 8] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c4aba6577f3253e, 0xbfde4cd3c3c87615),
(0x3c52f58f82e3438c, 0x3fcbd0a475006cf9),
(0x3c36e496237d6b49, 0xbfb9f4cea13b06e9),
(0xbbbbf3e4ef3a28fe, 0x3f967ed0cee85392),
(0x3c267ac442bb3bcf, 0xbf846e192e22f862),
(0x3bd84e9888993cb0, 0x3f51e0fff3cfddee),
(0x3bd7c0285797bd8e, 0xbf3ea7a621fa1c8c),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let p0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let q0 = DoubleDouble::mul_add(x2, p1, p0);
let q1 = DoubleDouble::mul_add(x2, p3, p2);
let p_num = DoubleDouble::mul_add(x4, q1, q0);
let p0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[1]),
x,
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let q0 = DoubleDouble::mul_add(x2, p1, p0);
let q1 = DoubleDouble::mul_add(x2, p3, p2);
let p_den = DoubleDouble::mul_add(x4, q1, q0);
DoubleDouble::quick_mult_f64(DoubleDouble::div(p_num, p_den), 2.).to_f64()
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn jinc_small_argument_fast(x: f64) -> f64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
let dx = DoubleDouble::quick_mult_f64(PI, x);
const INV_STEP: f64 = 0.6300176043004198;
let fx = dx.hi * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - dx.hi).abs();
let dist1 = (found_zero1.hi - dx.hi).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return jincpi_near_zero(x);
}
let r = DoubleDouble::quick_dd_sub(dx, found_zero);
// We hit exact zero, value, better to return it directly
if dist == 0. {
return DoubleDouble::quick_mult_f64(
DoubleDouble::from_f64_div_dd(f64::from_bits(J1_ZEROS_VALUE[idx]), dx),
2.,
)
.to_f64();
}
let is_zero_too_close = dist.abs() < 1e-3;
let c = if is_zero_too_close {
&J1_COEFFS_TAYLOR[idx - 1]
} else {
&J1_COEFFS[idx - 1]
};
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
z = DoubleDouble::quick_mult_f64(DoubleDouble::div(z, dx), 2.);
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j1_small_argument_dd(r, c, dx)
}
fn j1_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24], inv_scale: DoubleDouble) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let z = DoubleDouble::div(p, inv_scale);
DoubleDouble::quick_mult_f64(z, 2.).to_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jincpi() {
assert_eq!(f_jincpi(f64::EPSILON), 1.0);
assert_eq!(f_jincpi(0.5000000000020244), 0.7217028449014163);
assert_eq!(f_jincpi(73.81695991658546), -0.0004417546638317049);
assert_eq!(f_jincpi(0.01), 0.9998766350182722);
assert_eq!(f_jincpi(0.9), 0.28331697846510623);
assert_eq!(f_jincpi(3.831705970207517), -0.036684415010255086);
assert_eq!(f_jincpi(-3.831705970207517), -0.036684415010255086);
assert_eq!(
f_jincpi(0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(
f_jincpi(-0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(f_jincpi(5.4), -0.010821736808448256);
assert_eq!(
f_jincpi(77.743162408196766932633181568235159),
-0.00041799098646950523
);
assert_eq!(
f_jincpi(84.027189586293545175976760219782591),
-0.00023927934929850555
);
assert_eq!(f_jincpi(f64::NEG_INFINITY), 0.0);
assert_eq!(f_jincpi(f64::INFINITY), 0.0);
assert!(f_jincpi(f64::NAN).is_nan());
}
}

249
vendor/pxfm/src/bessel/jincpif.rs vendored Normal file
View File

@@ -0,0 +1,249 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
use crate::bessel::j1f_coeffs::J1F_COEFFS;
use crate::bessel::trigo_bessel::sin_small;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval6, f_polyeval14};
use crate::round::RoundFinite;
/// Normalized jinc 2*J1(PI\*x)/(pi\*x)
///
/// ULP 0.5
pub fn f_jincpif(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 {
// |x| <= f32::EPSILON, |x| == inf, |x| == NaN
if ux <= 0x6800_0000u32 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if ax < 0x429533c2u32 {
// |x| < 74.60109
if ax <= 0x3e800000u32 {
// |x| < 0.25
return jincf_near_zero(f32::from_bits(ax));
}
let scaled_pix = f32::from_bits(ax) * std::f32::consts::PI; // just test boundaries
if scaled_pix < 74.60109 {
return jincpif_small_argument(f32::from_bits(ax));
}
}
jincpif_asympt(f32::from_bits(ax)) as f32
}
#[inline]
fn jincf_near_zero(x: f32) -> f32 {
let dx = x as f64;
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=BesselJ[1,x*Pi]/(x*Pi)
// {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},6,0},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval6(
dx,
f64::from_bits(0x3fe0000000000002),
f64::from_bits(0xbfd46cd1822a5aa0),
f64::from_bits(0xbfde583c923dc6f4),
f64::from_bits(0x3fd3834f47496519),
f64::from_bits(0x3fb8118468756e6f),
f64::from_bits(0xbfafaff09f13df88),
);
let p_den = f_polyeval6(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfe46cd1822a4cb0),
f64::from_bits(0x3fd2447a026f477a),
f64::from_bits(0xbfc6bdf2192404e5),
f64::from_bits(0x3fa0cf182218e448),
f64::from_bits(0xbf939ab46c3f7a7d),
);
(p_num / p_den * 2.) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn jincpif_small_argument(ox: f32) -> f32 {
const PI: f64 = f64::from_bits(0x400921fb54442d18);
let x = ox as f64 * PI;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let inv_scale = x;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return jincf_near_zero(ox);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return (f64::from_bits(J1_ZEROS_VALUE[idx]) / inv_scale * 2.) as f32;
}
let c = &J1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
(p / inv_scale * 2.) as f32
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
pub(crate) fn jincpif_asympt(x: f32) -> f64 {
const PI: f64 = f64::from_bits(0x400921fb54442d18);
let dox = x as f64;
let dx = dox * PI;
let inv_scale = dx;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
// argument reduction assuming x here value is already multiple of PI.
// k = round((x*Pi) / (pi*2))
let kd = (dox * 0.5).round_finite();
// y = (x * Pi) - k * 2
let angle = f_fmla(kd, -2., dox) * PI;
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_small(r0);
let z0 = beta * m_sin;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
let j1pix = scale * z0;
(j1pix / inv_scale) * 2.
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jincpif() {
assert_eq!(f_jincpif(-102.59484), 0.00024380769);
assert_eq!(f_jincpif(102.59484), 0.00024380769);
assert_eq!(f_jincpif(100.08199), -0.00014386141);
assert_eq!(f_jincpif(0.27715185), 0.9081822);
assert_eq!(f_jincpif(0.007638072), 0.99992806);
assert_eq!(f_jincpif(-f32::EPSILON), 1.0);
assert_eq!(f_jincpif(f32::EPSILON), 1.0);
assert_eq!(
f_jincpif(0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(f_jincpif(5.4), -0.010821743);
assert_eq!(
f_jincpif(77.743162408196766932633181568235159),
-0.00041799102
);
assert_eq!(
f_jincpif(-77.743162408196766932633181568235159),
-0.00041799102
);
assert_eq!(
f_jincpif(84.027189586293545175976760219782591),
-0.00023927793
);
assert_eq!(f_jincpif(f32::INFINITY), 0.);
assert_eq!(f_jincpif(f32::NEG_INFINITY), 0.);
assert!(f_jincpif(f32::NAN).is_nan());
assert_eq!(f_jincpif(-1.7014118e38), -0.0);
}
}

643
vendor/pxfm/src/bessel/k0.rs vendored Normal file
View File

@@ -0,0 +1,643 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::logs::{fast_log_d_to_dd, log_dd};
use crate::polyeval::f_polyeval3;
/// Modified Bessel of the second kind of order 0
///
/// Max ULP 0.5
pub fn f_k0(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x40862e42fefa39f0u64 {
// x >= 709.7827128933841
return 0.;
}
if xb <= 0x3ff0000000000000 {
// x <= 1
return k0_small_dd(x).to_f64();
}
k0_asympt(x)
}
/**
Computes I0 on interval [0; 1]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolfram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0_0_to_1_fast(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 3] = [
(0xbae20452afd5045b, 0x3ff0000000000000),
(0xbc5b6ff3f140da20, 0x3fc93c83592c03de),
(0x3c25b350e9128d49, 0x3f904f33ef2de455),
];
let ps_num = f_polyeval3(
eval_x.hi,
f64::from_bits(0x3f433805a2fabaaa),
f64::from_bits(0x3ee5897e7f554966),
f64::from_bits(0x3e731401f0bb5de4),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c323fa63bef2b4e, 0xbfab0df29b4ff089),
(0x3bfedbdf64ed3110, 0x3f564662064157d2),
];
let ps_den = f_polyeval3(
eval_x.hi,
f64::from_bits(0xbef6bdbb484fd0a4),
f64::from_bits(0x3e8d6ced53309351),
f64::from_bits(0xbe13cff13854e945),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let z = DoubleDouble::quick_mult(p, eval_x);
DoubleDouble::full_add_f64(z, 1.)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Generated in Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn k0_small_dd(x: f64) -> DoubleDouble {
let dx = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0x3c1be095d044e896, 0x3fbdadb014541eb2),
(0x3c7321baa1d0a2d9, 0x3fd1b9f19bc9019a),
(0xbc33ce33a244e5bd, 0x3f94ec39f8744183),
(0x3bd7008dfc623255, 0x3f3d85175b25727d),
(0xbb4aa2a1c4905d30, 0x3ed007a860ef3235),
(0xbae8daa77abd6f7f, 0x3e4839e32c19f31a),
];
let ps_num = f_polyeval3(
dx.hi,
f64::from_bits(0x3f3d85175b25727d),
f64::from_bits(0x3ed007a860ef3235),
f64::from_bits(0x3e4839e32c19f31a),
);
let mut p_num = DoubleDouble::mul_f64_add(dx, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc2a82a292acdc83, 0xbf91be3a25c968d6),
(0xbb9d2c37183a6496, 0x3f23bac6961619d8),
];
let ps_den = f_polyeval3(
dx.hi,
f64::from_bits(0xbeac315b81faa1bf),
f64::from_bits(0x3e2ab2d2fbae0863),
f64::from_bits(0xbd9be23550f83df7),
);
let mut p_den = DoubleDouble::mul_f64_add(dx, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(dx, p_den, f64::from_bits(0x3ff0000000000000));
let prod = DoubleDouble::div(p_num, p_den);
let vi_log = fast_log_d_to_dd(x);
let vi = i0_0_to_1_fast(x);
let r = DoubleDouble::mul_add(vi_log, -vi, prod);
let err = r.hi * f64::from_bits(0x3c00000000000000); // 2^-63
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
k0_small_hard(x, vi)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Generated in Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k0_small_hard(x: f64, vi: DoubleDouble) -> DoubleDouble {
let dx = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0x3c1be095d044e896, 0x3fbdadb014541eb2),
(0x3c7321baa1d0a2d9, 0x3fd1b9f19bc9019a),
(0xbc33ce33a244e5bd, 0x3f94ec39f8744183),
(0x3bd7008dfc623255, 0x3f3d85175b25727d),
(0xbb4aa2a1c4905d30, 0x3ed007a860ef3235),
(0xbae8daa77abd6f7f, 0x3e4839e32c19f31a),
];
let mut p_num = DoubleDouble::mul_add(
dx,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 6] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc2a82a292acdc83, 0xbf91be3a25c968d6),
(0xbb9d2c37183a6496, 0x3f23bac6961619d8),
(0xbb32032e14c6c2b2, 0xbeac315b81faa1bf),
(0x3aa1a1dc04bfba96, 0x3e2ab2d2fbae0863),
(0x3a3e0f678099fcff, 0xbd9be23550f83df7),
];
let mut p_den = DoubleDouble::mul_add(
dx,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(dx, p_den, f64::from_bits(0x3ff0000000000000));
let prod = DoubleDouble::div(p_num, p_den);
let v_log = log_dd(x);
DoubleDouble::mul_add(v_log, -vi, prod)
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let e = i0_exp(x * 0.5);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a11d237114e, 0x3ff40d931ff62706),
(0x3cdd614ddf4929e5, 0x4040645168c3e483),
(0xbd1ecf9ea0af6ab2, 0x40757419a703a2ab),
(0xbd3da3551fb27770, 0x409d4e65365522a2),
(0xbd564d58855d1a46, 0x40b6dd32f5a199d9),
(0xbd6cf055ca963a8e, 0x40c4fd2368f19618),
(0x3d4b6cdfbdc058df, 0x40c68faa11ebcd59),
(0x3d5b4ce4665bfa46, 0x40bb6fbe08e0a8ea),
(0xbd4316909063be15, 0x40a1953103a5be31),
(0x3d12f3f8edf41af0, 0x4074d2cb001e175c),
(0xbcd7bba36540264f, 0x40316cffcad5f8f9),
(0xbc6bf28dfdd5d37d, 0x3fc2f487fe78b8d7),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb9e8a5b17e696a, 0x403a485acd64d64a),
(0x3cd2e2e9c87f71f7, 0x4071518092320ecb),
(0xbd0d05bdb9431a2f, 0x4097e57e4c22c08e),
(0x3d5207068ab19ba9, 0x40b2ebadb2db62f9),
(0xbd64e37674083471, 0x40c1c0e4e9d6493d),
(0x3d3efb7a9a62b020, 0x40c3b94e8d62cdc7),
(0x3d47d6ce80a2114b, 0x40b93c2fd39e868e),
(0xbd1dfda61f525861, 0x40a1877a53a7f8d8),
(0x3d1236ff523dfcfa, 0x4077c3a10c2827de),
(0xbcc889997c9b0fe7, 0x4039a1d80b11c4a1),
(0x3c7ded0e8d73dddc, 0x3fdafe4913722123),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, e * r_sqrt * e);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-62
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k0_asympt_hard(x);
}
r.to_f64()
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->90]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline(never)]
#[cold]
fn k0_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be520f51_a7b8f970_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc84d8d0c_7faeef84_e56abccc_3d70f8a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xd1a71096_3da22280_35768c9e_0d3ddf42_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xf202e474_3698aabb_05688da0_ea1a088d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xaaa01830_8138af4d_1137b2dd_11a216f5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x99e0649f_320bca1a_c7adadb3_f5d8498e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4d81657_de1baf00_918cbc76_c6974e96_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8a9a28c8_a61c2c7a_12416d56_51c0b3d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x88a079f1_d9bd4582_6353316c_3aeb9dc9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xa82e10eb_9dc6225a_ef6223e7_54aa254d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xf5fc07fe_6b652e8a_0b9e74ba_d0c56118_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xc5288444_c7354b24_4a4e1663_86488928_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x96d3d226_a220ae6e_d6cca1ae_40f01e27_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -121,
mantissa: 0xa7ab931b_499c4964_499c1091_4ab9673d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xf8373d1a_9ff3f9c6_e5cfbe0a_85ccc131_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xa05190f4_dcf0d35c_277e0f21_0635c538_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa8837381_94c38992_86c0995d_5e5fa474_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xc3a4f279_9297e905_f59cc065_75959de8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x8b05ade4_03432e06_881ce37d_a907216d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xfd77f85e_35626f21_355ae728_01b78cbe_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x972ed117_254970eb_661121dc_a4462d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xec9d204a_9294ab57_2ef500d5_59d701b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xf033522d_cae45860_53a01453_c56da895_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9a33640c_9896ead5_1ce040d7_b36544f3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xefe714fa_49da0166_fdf8bc68_57b77fa0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd323b84c_214196b0_e25b8931_930fea0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xbbb5240b_346642d8_010383cb_1e8a607e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x88dcfa2a_f9f7d2ab_dd017994_8fae7e87_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc891477c_526e0f5e_74c4ae9f_9d8732b5_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let e = rational128_exp(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q = Q[14];
for i in (0..14).rev() {
q = recip * q + Q[i];
}
let v = p0 * q.reciprocal();
let r = v * e.reciprocal() * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0() {
assert_eq!(f_k0(0.11), 2.3332678776741127);
assert_eq!(f_k0(0.643), 0.7241025575342853);
assert_eq!(f_k0(0.964), 0.4433737413379138);
assert_eq!(f_k0(2.964), 0.03621679838808167);
assert_eq!(f_k0(423.43), 7.784461905543397e-186);
assert_eq!(f_k0(0.), f64::INFINITY);
assert_eq!(f_k0(-0.), f64::INFINITY);
assert!(f_k0(-0.5).is_nan());
assert!(f_k0(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k0(f64::INFINITY), 0.);
}
}

434
vendor/pxfm/src/bessel/k0e.rs vendored Normal file
View File

@@ -0,0 +1,434 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::k0::k0_small_dd;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes K0(x)exp(x)
pub fn f_k0e(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3ff0000000000000 {
// x <= 1
let v_k0 = k0_small_dd(x);
let v_exp = i0_exp(x);
return DoubleDouble::quick_mult(v_exp, v_k0).to_f64();
}
k0e_asympt(x)
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0e_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a11d237114e, 0x3ff40d931ff62706),
(0x3cdd614ddf4929e5, 0x4040645168c3e483),
(0xbd1ecf9ea0af6ab2, 0x40757419a703a2ab),
(0xbd3da3551fb27770, 0x409d4e65365522a2),
(0xbd564d58855d1a46, 0x40b6dd32f5a199d9),
(0xbd6cf055ca963a8e, 0x40c4fd2368f19618),
(0x3d4b6cdfbdc058df, 0x40c68faa11ebcd59),
(0x3d5b4ce4665bfa46, 0x40bb6fbe08e0a8ea),
(0xbd4316909063be15, 0x40a1953103a5be31),
(0x3d12f3f8edf41af0, 0x4074d2cb001e175c),
(0xbcd7bba36540264f, 0x40316cffcad5f8f9),
(0xbc6bf28dfdd5d37d, 0x3fc2f487fe78b8d7),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb9e8a5b17e696a, 0x403a485acd64d64a),
(0x3cd2e2e9c87f71f7, 0x4071518092320ecb),
(0xbd0d05bdb9431a2f, 0x4097e57e4c22c08e),
(0x3d5207068ab19ba9, 0x40b2ebadb2db62f9),
(0xbd64e37674083471, 0x40c1c0e4e9d6493d),
(0x3d3efb7a9a62b020, 0x40c3b94e8d62cdc7),
(0x3d47d6ce80a2114b, 0x40b93c2fd39e868e),
(0xbd1dfda61f525861, 0x40a1877a53a7f8d8),
(0x3d1236ff523dfcfa, 0x4077c3a10c2827de),
(0xbcc889997c9b0fe7, 0x4039a1d80b11c4a1),
(0x3c7ded0e8d73dddc, 0x3fdafe4913722123),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, r_sqrt);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-62
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k0e_asympt_hard(x);
}
r.to_f64()
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->90]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline(never)]
#[cold]
fn k0e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be520f51_a7b8f970_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc84d8d0c_7faeef84_e56abccc_3d70f8a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xd1a71096_3da22280_35768c9e_0d3ddf42_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xf202e474_3698aabb_05688da0_ea1a088d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xaaa01830_8138af4d_1137b2dd_11a216f5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x99e0649f_320bca1a_c7adadb3_f5d8498e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4d81657_de1baf00_918cbc76_c6974e96_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8a9a28c8_a61c2c7a_12416d56_51c0b3d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x88a079f1_d9bd4582_6353316c_3aeb9dc9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xa82e10eb_9dc6225a_ef6223e7_54aa254d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xf5fc07fe_6b652e8a_0b9e74ba_d0c56118_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xc5288444_c7354b24_4a4e1663_86488928_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x96d3d226_a220ae6e_d6cca1ae_40f01e27_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -121,
mantissa: 0xa7ab931b_499c4964_499c1091_4ab9673d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xf8373d1a_9ff3f9c6_e5cfbe0a_85ccc131_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xa05190f4_dcf0d35c_277e0f21_0635c538_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa8837381_94c38992_86c0995d_5e5fa474_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xc3a4f279_9297e905_f59cc065_75959de8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x8b05ade4_03432e06_881ce37d_a907216d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xfd77f85e_35626f21_355ae728_01b78cbe_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x972ed117_254970eb_661121dc_a4462d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xec9d204a_9294ab57_2ef500d5_59d701b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xf033522d_cae45860_53a01453_c56da895_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9a33640c_9896ead5_1ce040d7_b36544f3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xefe714fa_49da0166_fdf8bc68_57b77fa0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd323b84c_214196b0_e25b8931_930fea0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xbbb5240b_346642d8_010383cb_1e8a607e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x88dcfa2a_f9f7d2ab_dd017994_8fae7e87_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc891477c_526e0f5e_74c4ae9f_9d8732b5_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q = Q[14];
for i in (0..14).rev() {
q = recip * q + Q[i];
}
let v = p0 * q.reciprocal();
let r = v * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0() {
assert_eq!(f_k0e(0.00060324324), 7.533665613459802);
assert_eq!(f_k0e(0.11), 2.6045757643537244);
assert_eq!(f_k0e(0.643), 1.3773725807788395);
assert_eq!(f_k0e(0.964), 1.1625987432322884);
assert_eq!(f_k0e(2.964), 0.7017119941259377);
assert_eq!(f_k0e(423.43), 0.06088931243251448);
assert_eq!(f_k0e(4324235240321.43), 6.027056776336986e-7);
assert_eq!(k0e_asympt_hard(423.43), 0.06088931243251448);
assert_eq!(f_k0e(0.), f64::INFINITY);
assert_eq!(f_k0e(-0.), f64::INFINITY);
assert!(f_k0e(-0.5).is_nan());
assert!(f_k0e(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k0e(f64::INFINITY), 0.);
}
}

184
vendor/pxfm/src/bessel/k0ef.rs vendored Normal file
View File

@@ -0,0 +1,184 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0f::i0f_small;
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval8};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes K0(x)exp(x)
///
/// Max ULP 0.5
pub fn f_k0ef(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for K0(x)exp(x) ~ (-euler_gamma + log(2) - log(x)) + (-euler_gamma + log(2) - log(x)) * x
let dx = x as f64;
let log_x = fast_logf(x);
const M_EULER_GAMMA_P_LOG2: f64 = f64::from_bits(0x3fbdadb014541eb2);
let c1 = -log_x + M_EULER_GAMMA_P_LOG2;
return f_fmla(c1, dx, c1) as f32;
}
return k0ef_small(x);
}
k0ef_asympt(x)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0ef_small(x: f32) -> f32 {
let v_log = fast_logf(x);
let i0 = i0f_small(x);
let v_exp = core_expf(x);
let dx = x as f64;
let p = f_estrin_polyeval7(
dx * dx,
f64::from_bits(0x3fbdadb014541ece),
f64::from_bits(0x3fd1dadb01453e9c),
f64::from_bits(0x3f99dadb01491ac7),
f64::from_bits(0x3f4bb90e82a4f609),
f64::from_bits(0x3eef4749ebd25b10),
f64::from_bits(0x3e85d5b5668593af),
f64::from_bits(0x3e15233b0788618b),
);
let c = f_fmla(-i0, v_log, p);
(c * v_exp) as f32
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let r_sqrt = j1f_rsqrt(dx);
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff62701),
f64::from_bits(0x402d8410a60e2ced),
f64::from_bits(0x404e9f18049bf704),
f64::from_bits(0x405c07682282783c),
f64::from_bits(0x4057379c68ce6d5e),
f64::from_bits(0x403ffd64a0105c4e),
f64::from_bits(0x400cc53ed67913b4),
f64::from_bits(0x3faf8cc8747a5d72),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4027ccde1d0eeb14),
f64::from_bits(0x40492418133aa7a7),
f64::from_bits(0x4057be8a004d0938),
f64::from_bits(0x4054cc77d1dfef26),
f64::from_bits(0x403fd2187097af1d),
f64::from_bits(0x4011c77649649e55),
f64::from_bits(0x3fc2080a5965ef9b),
);
let v = p_num / p_den;
let pp = v * r_sqrt;
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0f() {
assert_eq!(f_k0ef(2.034804e-5), 10.918679);
assert_eq!(f_k0ef(0.010260499), 4.743962);
assert_eq!(f_k0ef(0.3260499), 1.7963701);
assert_eq!(f_k0ef(0.72341), 1.3121376);
assert_eq!(f_k0ef(0.), f32::INFINITY);
assert_eq!(f_k0ef(-0.), f32::INFINITY);
assert!(f_k0ef(-0.5).is_nan());
assert!(f_k0ef(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k0ef(f32::INFINITY), 0.);
}
}

184
vendor/pxfm/src/bessel/k0f.rs vendored Normal file
View File

@@ -0,0 +1,184 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0f::i0f_small;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval8};
/// Modified Bessel of the second kind of order 0
///
/// Max ULP 0.5
///
/// This method have exactly one exception which is not correctly rounded with FMA.
pub fn f_k0f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x42cbc4fbu32 {
// x > 101.88473
return 0.;
}
if xb <= 0x3f800000u32 {
if xb <= 0x34000000u32 {
// |x| < f32::EPSILON
// taylor series for K0(x) ~ -euler_gamma + log(2) - log(x)
let log_x = fast_logf(x);
const EULER_GAMMA_PLUS_LOG2: f64 = f64::from_bits(0x3fbdadb014541eb2);
return (-log_x + EULER_GAMMA_PLUS_LOG2) as f32;
}
// x <= 1.0
return k0f_small(x);
}
k0f_asympt(x)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0f_small(x: f32) -> f32 {
let v_log = fast_logf(x);
let i0 = i0f_small(x);
let dx = x as f64;
let p = f_estrin_polyeval7(
dx * dx,
f64::from_bits(0x3fbdadb014541ece),
f64::from_bits(0x3fd1dadb01453e9c),
f64::from_bits(0x3f99dadb01491ac7),
f64::from_bits(0x3f4bb90e82a4f609),
f64::from_bits(0x3eef4749ebd25b10),
f64::from_bits(0x3e85d5b5668593af),
f64::from_bits(0x3e15233b0788618b),
);
let c = f_fmla(-i0, v_log, p);
c as f32
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff62701),
f64::from_bits(0x402d8410a62d9c17),
f64::from_bits(0x404e9f1804dd7e54),
f64::from_bits(0x405c076822dcd255),
f64::from_bits(0x4057379c6932949f),
f64::from_bits(0x403ffd64a0bd54b7),
f64::from_bits(0x400cc53ed733fd97),
f64::from_bits(0x3faf8cc8756944eb),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4027ccde1d27ffc9),
f64::from_bits(0x40492418136fb90f),
f64::from_bits(0x4057be8a00983906),
f64::from_bits(0x4054cc77d2379b76),
f64::from_bits(0x403fd218713ec08d),
f64::from_bits(0x4011c77649d3f65f),
f64::from_bits(0x3fc2080a59e87324),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0f() {
assert_eq!(f_k0f(2.034804e-5), 10.918458);
assert_eq!(f_k0f(0.010260499), 4.695535);
assert_eq!(f_k0f(0.3260499), 1.2965646);
assert_eq!(f_k0f(0.72341), 0.636511734);
assert_eq!(f_k0f(0.), f32::INFINITY);
assert_eq!(f_k0f(-0.), f32::INFINITY);
assert!(f_k0f(-0.5).is_nan());
assert!(f_k0f(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k0f(f32::INFINITY), 0.);
}
}

644
vendor/pxfm/src/bessel/k1.rs vendored Normal file
View File

@@ -0,0 +1,644 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::logs::{fast_log_d_to_dd, log_dd};
use crate::polyeval::f_polyeval3;
/// Modified Bessel of the second kind of order 1
///
/// Max ULP 0.5
pub fn f_k1(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x4086140538aa7d38u64 {
// 706.5025494880165
return 0.;
}
if xb <= 0x3ff0000000000000 {
// x <= 1
return k1_small(x).to_f64();
}
k1_asympt(x)
}
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
// g[z_]:=f[2 Sqrt[z]]
// {err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i1_fast(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 3] = [
(0x3c5555555553c008, 0x3fb5555555555555),
(0x3c06f1014b703de8, 0x3f6dfda17d0a2cef),
(0xbbc2594d655d84db, 0x3f21b2c299108f7b),
];
let ps_num = f_polyeval3(
eval_x.hi,
f64::from_bits(0x3ec37625c178f5e2),
f64::from_bits(0x3e5843215f0d5088),
f64::from_bits(0x3dd97f1f45f47244),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc32ebd3ac0e6253, 0xbfa42c718ce308f7),
(0xbbe1626e81e3c1bc, 0x3f482772320eab0e),
];
let ps_den = f_polyeval3(
eval_x.hi,
f64::from_bits(0xbee169811ef4f4a1),
f64::from_bits(0x3e6ebdab5dbe02a5),
f64::from_bits(0xbdeb1dbb29fec52a),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
DoubleDouble::quick_mult(z, x_over_05)
}
/**
Rational approximant for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1,x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn k1_small(x: f64) -> DoubleDouble {
let rcp = DoubleDouble::from_quick_recip(x);
let x2 = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0xbc7037c12b888927, 0xbfd3b5b6028a83d6),
(0x3c39dba459d023e5, 0xbfb4bac288cfe0cd),
(0x3be0575395050120, 0xbf6c4a1abe9061df),
(0x3b755df8e375b3d4, 0xbf0c850679678599),
(0xbb097e0ec926785f, 0xbe98c4a9b608ae1f),
(0xbaa029f31c786e81, 0xbe104efe2246ee51),
];
let ps_num = f_polyeval3(
x2.hi,
f64::from_bits(0xbf0c850679678599),
f64::from_bits(0xbe98c4a9b608ae1f),
f64::from_bits(0xbe104efe2246ee51),
);
let mut p_num = DoubleDouble::mul_f64_add(x2, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 5] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c19f62e592f3e71, 0xbf8d3bd595449ca9),
(0xbba8472b975a12d7, 0x3f194de71babe24a),
(0xbb2eec4b611c19b5, 0xbe994a5dbec84e4d),
(0x3a9bae2028402903, 0x3e0981ded64a954b),
];
let ps_den = f_fmla(
x2.hi,
f64::from_bits(0x3e0981ded64a954b),
f64::from_bits(0xbe994a5dbec84e4d),
);
let mut p_den = DoubleDouble::mul_f64_add(x2, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(x2, p_den, f64::from_bits(0x3ff0000000000000));
let p = DoubleDouble::div(p_num, p_den);
let lg = fast_log_d_to_dd(x);
let v_i = i1_fast(x);
let z = DoubleDouble::mul_add(v_i, lg, rcp);
let r = DoubleDouble::mul_f64_add(p, x, z);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c20000000000000), // 2^-61
f64::from_bits(0x3a80000000000000), // 2^-87
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
k1_small_hard(x)
}
/**
Rational approximant for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1,x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1_small_hard(x: f64) -> DoubleDouble {
let rcp = DoubleDouble::from_quick_recip(x);
let x2 = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0xbc7037c12b888927, 0xbfd3b5b6028a83d6),
(0x3c39dba459d023e5, 0xbfb4bac288cfe0cd),
(0x3be0575395050120, 0xbf6c4a1abe9061df),
(0x3b755df8e375b3d4, 0xbf0c850679678599),
(0xbb097e0ec926785f, 0xbe98c4a9b608ae1f),
(0xbaa029f31c786e81, 0xbe104efe2246ee51),
];
let mut p_num = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 5] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c19f62e592f3e71, 0xbf8d3bd595449ca9),
(0xbba8472b975a12d7, 0x3f194de71babe24a),
(0xbb2eec4b611c19b5, 0xbe994a5dbec84e4d),
(0x3a9bae2028402903, 0x3e0981ded64a954b),
];
let mut p_den = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(Q[4]),
DoubleDouble::from_bit_pair(Q[3]),
);
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let lg = log_dd(x);
let v_i = i1_fast(x);
let z = DoubleDouble::mul_add(v_i, lg, rcp);
DoubleDouble::mul_f64_add(p, x, z)
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let e = i0_exp(x * 0.5);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a0690becb3b, 0x3ff40d931ff62706),
(0xbce573e1bbf2f0b7, 0x40402cebfab5721d),
(0x3d11a739b7c11e7b, 0x4074f58abc0cfbf1),
(0xbd2682a09ded0116, 0x409c8315f8facef2),
(0xbd3a19e91a120168, 0x40b65f7a4caed8b9),
(0x3d449c3d2b834543, 0x40c4fe41fdb4e7b8),
(0xbd6bdd415ac7f7e1, 0x40c7aa402d035d03),
(0x3d528412ff0d6b24, 0x40bf68faddd7d850),
(0xbd48f4bb3f61dac6, 0x40a75f5650249952),
(0xbd1dc534b275e309, 0x4081bddd259c0582),
(0xbcce5103350bd226, 0x4046c7a049014484),
(0x3c8935f8acd6c1d0, 0x3fef7524082b1859),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cc0d2508437b3f4, 0x40396ff483adec14),
(0xbd130a9c9f8a5338, 0x4070225588d8c15d),
(0xbceceba8fa0e65a2, 0x4095481f6684e3bb),
(0x3d4099f3c178fd2a, 0x40afedc8a778bf42),
(0xbd3a7e6a6276a3e7, 0x40bc0c060112692e),
(0x3d11538c155b16d8, 0x40bcb12bd1101782),
(0xbd5f7b04cdea2c61, 0x40b07fa363202e10),
(0xbce444ed035b66c6, 0x4093d6fe8f44f838),
(0xbcf6f88fb942b610, 0x4065c99fa44030c3),
(0xbcbd1d2aedee5bc9, 0x40207ffabeb00eea),
(0xbc39a0c8091102c9, 0x3facff3d892cd57a),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let mut r_e = DoubleDouble::quick_mult(e, r_sqrt);
r_e = DoubleDouble::from_exact_add(r_e.hi, r_e.lo);
r_e = DoubleDouble::quick_mult(r_e, e);
r_e = DoubleDouble::from_exact_add(r_e.hi, r_e.lo);
let r = DoubleDouble::div(z, r_e);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-61
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k1_asympt_hard(x);
}
r.to_f64()
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be5210ac_f26f25d1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc5f546cb_659a39d0_fafbd188_36ca05b9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xcd0b7cfa_de158d26_7084bbe9_f1bdb66d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xeac7be2f_957d1260_8849508a_2a5a8972_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xa4d14fec_fecc6444_4c7b0287_dad71a86_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x94e3180c_01df9932_ad2acd8b_bab59c05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb0de10f8_74918442_94a96368_8eaa4d0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8adfea76_d6dbe5d9_46bfaf83_9341f4b5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8f0a4337_b69b602c_cf187222_f3a3379f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xbd4c3ebf_c2db0fad_1b425641_cc470043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x9b14d29f_9b97e3c8_c1a7b9d0_787f0ddb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x93e670d2_07a553ef_a90d4895_cf1b5011_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0x93e0ee0a_cb4d8910_6b4d3e37_f4f9df49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xff0ce10d_5585abd1_e8a53a12_65131ad4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xf020536d_822cbe51_c8de095a_03367c83_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x9c729dd5_4828a918_42807f58_d485a511_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x9ff6f631_0794001d_433ab0c5_d4c682a9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb3f81e8b_1e0e85a6_3928342e_c83088a1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xf6b1c203_a60d4294_239ad045_2c67c224_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xd7a98b14_7a499762_abde5c38_3a5b40e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xf4eb8b77_a2cdc686_afd1273f_d464c8b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4c1e12a_93ee86fc_930c6f94_cfa6ac3a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xaaeaab88_32b776b7_fdd76b0f_24349f41_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xc8ec9d61_5bf2ee9b_878b4962_4a5cee85_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x8b97bab0_3351673f_22f10d40_fd1c9ff3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -114,
mantissa: 0xd31cb80a_bf8cbedc_b0dcf7e7_c599f79e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -117,
mantissa: 0x96b354c8_69197193_ea4f608f_81943988_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x989af1bb_e48b5c44_7cd09746_f15e935a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xb7b51326_23c29ed5_8d3dcf5a_79bd9a4f_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let e = rational128_exp(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q0 = Q[14];
for i in (0..14).rev() {
q0 = recip * q0 + Q[i];
}
let v = p0 * q0.reciprocal();
let r = v * (e.reciprocal() * r_sqrt);
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1() {
assert_eq!(f_k1(0.643), 1.184534109892725);
assert_eq!(f_k1(0.964), 0.6402280656771248);
assert_eq!(f_k1(2.964), 0.04192888446074039);
assert_eq!(f_k1(8.43), 9.824733212831289e-5);
assert_eq!(f_k1(16.43), 2.3142404075259965e-8);
assert_eq!(f_k1(423.43), 7.793648638470207e-186);
assert_eq!(f_k1(0.), f64::INFINITY);
assert_eq!(f_k1(-0.), f64::INFINITY);
assert!(f_k1(-0.5).is_nan());
assert!(f_k1(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k1(f64::INFINITY), 0.);
}
}

425
vendor/pxfm/src/bessel/k1e.rs vendored Normal file
View File

@@ -0,0 +1,425 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::k1::k1_small;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the second kind of order 1
///
/// Computes K1(x)exp(x)
pub fn f_k1e(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3ff0000000000000 {
// x <= 1
let v_exp = i0_exp(x);
let v_k = k1_small(x);
return DoubleDouble::quick_mult(v_exp, v_k).to_f64();
}
k1e_asympt(x)
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1e_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a0690becb3b, 0x3ff40d931ff62706),
(0xbce573e1bbf2f0b7, 0x40402cebfab5721d),
(0x3d11a739b7c11e7b, 0x4074f58abc0cfbf1),
(0xbd2682a09ded0116, 0x409c8315f8facef2),
(0xbd3a19e91a120168, 0x40b65f7a4caed8b9),
(0x3d449c3d2b834543, 0x40c4fe41fdb4e7b8),
(0xbd6bdd415ac7f7e1, 0x40c7aa402d035d03),
(0x3d528412ff0d6b24, 0x40bf68faddd7d850),
(0xbd48f4bb3f61dac6, 0x40a75f5650249952),
(0xbd1dc534b275e309, 0x4081bddd259c0582),
(0xbcce5103350bd226, 0x4046c7a049014484),
(0x3c8935f8acd6c1d0, 0x3fef7524082b1859),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cc0d2508437b3f4, 0x40396ff483adec14),
(0xbd130a9c9f8a5338, 0x4070225588d8c15d),
(0xbceceba8fa0e65a2, 0x4095481f6684e3bb),
(0x3d4099f3c178fd2a, 0x40afedc8a778bf42),
(0xbd3a7e6a6276a3e7, 0x40bc0c060112692e),
(0x3d11538c155b16d8, 0x40bcb12bd1101782),
(0xbd5f7b04cdea2c61, 0x40b07fa363202e10),
(0xbce444ed035b66c6, 0x4093d6fe8f44f838),
(0xbcf6f88fb942b610, 0x4065c99fa44030c3),
(0xbcbd1d2aedee5bc9, 0x40207ffabeb00eea),
(0xbc39a0c8091102c9, 0x3facff3d892cd57a),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, r_sqrt);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-61
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k1e_asympt_hard(x);
}
r.to_f64()
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be5210ac_f26f25d1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc5f546cb_659a39d0_fafbd188_36ca05b9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xcd0b7cfa_de158d26_7084bbe9_f1bdb66d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xeac7be2f_957d1260_8849508a_2a5a8972_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xa4d14fec_fecc6444_4c7b0287_dad71a86_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x94e3180c_01df9932_ad2acd8b_bab59c05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb0de10f8_74918442_94a96368_8eaa4d0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8adfea76_d6dbe5d9_46bfaf83_9341f4b5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8f0a4337_b69b602c_cf187222_f3a3379f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xbd4c3ebf_c2db0fad_1b425641_cc470043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x9b14d29f_9b97e3c8_c1a7b9d0_787f0ddb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x93e670d2_07a553ef_a90d4895_cf1b5011_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0x93e0ee0a_cb4d8910_6b4d3e37_f4f9df49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xff0ce10d_5585abd1_e8a53a12_65131ad4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xf020536d_822cbe51_c8de095a_03367c83_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x9c729dd5_4828a918_42807f58_d485a511_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x9ff6f631_0794001d_433ab0c5_d4c682a9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb3f81e8b_1e0e85a6_3928342e_c83088a1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xf6b1c203_a60d4294_239ad045_2c67c224_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xd7a98b14_7a499762_abde5c38_3a5b40e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xf4eb8b77_a2cdc686_afd1273f_d464c8b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4c1e12a_93ee86fc_930c6f94_cfa6ac3a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xaaeaab88_32b776b7_fdd76b0f_24349f41_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xc8ec9d61_5bf2ee9b_878b4962_4a5cee85_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x8b97bab0_3351673f_22f10d40_fd1c9ff3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -114,
mantissa: 0xd31cb80a_bf8cbedc_b0dcf7e7_c599f79e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -117,
mantissa: 0x96b354c8_69197193_ea4f608f_81943988_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x989af1bb_e48b5c44_7cd09746_f15e935a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xb7b51326_23c29ed5_8d3dcf5a_79bd9a4f_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q0 = Q[14];
for i in (0..14).rev() {
q0 = recip * q0 + Q[i];
}
let v = p0 * q0.reciprocal();
let r = v * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1() {
assert_eq!(f_k1e(0.643), 2.253195748291852);
assert_eq!(f_k1e(0.964), 1.6787831013451477);
assert_eq!(f_k1e(2.964), 0.8123854795542738);
assert_eq!(f_k1e(8.43), 0.4502184086111872);
assert_eq!(f_k1e(16.43), 0.3161307996938612);
assert_eq!(f_k1e(423.43), 0.06096117017402597);
assert_eq!(f_k1e(9044.431), 0.01317914752085687);
assert_eq!(k1e_asympt_hard(16.43), 0.3161307996938612);
assert_eq!(k1e_asympt_hard(423.43), 0.06096117017402597);
assert_eq!(k1e_asympt_hard(9044.431), 0.01317914752085687);
assert_eq!(f_k1e(0.), f64::INFINITY);
assert_eq!(f_k1e(-0.), f64::INFINITY);
assert!(f_k1e(-0.5).is_nan());
assert!(f_k1e(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k1e(f64::INFINITY), 0.);
}
}

240
vendor/pxfm/src/bessel/k1ef.rs vendored Normal file
View File

@@ -0,0 +1,240 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval3, f_polyeval4};
/// Modified exponentially scaled Bessel of the second kind of order 1
///
/// Computes K1(x)exp(x)
///
/// Max ULP 0.5
pub fn f_k1ef(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
let leading_term = 1. / dx + 1.;
if xb <= 0x3109705fu32 {
// |x| <= 2e-9
// taylor series for tiny K1(x)exp(x) ~ 1/x + 1 + O(x)
return leading_term as f32;
}
// taylor series for small K1(x)exp(x) ~ 1/x+1+1/4 (1+2 EulerGamma-2 Log[2]+2 Log[x]) x + O(x^3)
const C: f64 = f64::from_bits(0xbffd8773039049e8); // 1 + 2 EulerGamma-2 Log[2]
let log_x = fast_logf(x);
let r = f_fmla(log_x, 2., C);
let w0 = f_fmla(dx * 0.25, r, leading_term);
return w0 as f32;
}
return k1ef_small(x);
}
k1ef_asympt(x)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,2},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_polyeval4(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555355),
f64::from_bits(0x3f6ebf07f0dbc49b),
f64::from_bits(0x3f1fdc02bf28a8d9),
f64::from_bits(0x3ebb5e7574c700a6),
);
let p_den = f_polyeval3(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa39b64b6135b5a),
f64::from_bits(0x3f3fa729bbe951f9),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two
}
/**
Series for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1, x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,3},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1ef_small(x: f32) -> f32 {
let dx = x as f64;
let rcp = 1. / dx;
let x2 = dx * dx;
let p_num = f_polyeval4(
x2,
f64::from_bits(0xbfd3b5b6028a83d6),
f64::from_bits(0xbfb3fde2c83f7cca),
f64::from_bits(0xbf662b2e5defbe8c),
f64::from_bits(0xbefa2a63cc5c4feb),
);
let p_den = f_polyeval4(
x2,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf9833197207a7c6),
f64::from_bits(0x3f315663bc7330ef),
f64::from_bits(0xbeb9211958f6b8c3),
);
let p = p_num / p_den;
let v_exp = core_expf(x);
let lg = fast_logf(x);
let v_i = i1f_small(x);
let z = f_fmla(lg, v_i, rcp);
let z0 = f_fmla(p, dx, z);
(z0 * v_exp) as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let r_sqrt = j1f_rsqrt(dx);
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff6270d),
f64::from_bits(0x402d250670ed7a6c),
f64::from_bits(0x404e517b9b494d38),
f64::from_bits(0x405cb02b7433a838),
f64::from_bits(0x405a03e606a1b871),
f64::from_bits(0x4045c98d4308dbcd),
f64::from_bits(0x401d115c4ce0540c),
f64::from_bits(0x3fd4213e72b24b3a),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x402681096aa3a87d),
f64::from_bits(0x404623ab8d72ceea),
f64::from_bits(0x40530af06ea802b2),
f64::from_bits(0x404d526906fb9cec),
f64::from_bits(0x403281caca389f1b),
f64::from_bits(0x3ffdb93996948bb4),
f64::from_bits(0x3f9a009da07eb989),
);
let v = p_num / p_den;
let pp = v * r_sqrt;
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1f() {
assert_eq!(f_k1ef(0.00000000005423), 18439980000.0);
assert_eq!(f_k1ef(0.0000000043123), 231894820.0);
assert_eq!(f_k1ef(0.3), 4.125158);
assert_eq!(f_k1ef(1.89), 1.0710458);
assert_eq!(f_k1ef(5.89), 0.5477655);
assert_eq!(f_k1ef(101.89), 0.12461915);
assert_eq!(f_k1ef(0.), f32::INFINITY);
assert_eq!(f_k1ef(-0.), f32::INFINITY);
assert!(f_k1ef(-0.5).is_nan());
assert!(f_k1ef(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k1ef(f32::INFINITY), 0.);
}
}

240
vendor/pxfm/src/bessel/k1f.rs vendored Normal file
View File

@@ -0,0 +1,240 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval3, f_polyeval4};
/// Modified Bessel of the second kind of order 1
///
/// Max ULP 0.5
pub fn f_k1f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN;
}
let xb = x.to_bits();
if xb >= 0x42cbc779u32 {
// x > 101.889595
return 0.;
}
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
let leading_term = 1. / dx;
if xb <= 0x3109705fu32 {
// |x| <= 2e-9
// taylor series for tiny K1(x) ~ 1/x + O(x)
return leading_term as f32;
}
// taylor series for small K1(x) ~ 1/x+1/4 (-1+2 EulerGamma-2 Log[2]+2 Log[x]) x + O(x^3)
const C: f64 = f64::from_bits(0xbff3b5b6028a83d7); // -1+2 EulerGamma-2 Log[2]
let log_x = fast_logf(x);
let r = f_fmla(log_x, 2., C);
let w0 = f_fmla(dx * 0.25, r, leading_term);
return w0 as f32;
}
return k1f_small(x);
}
k1f_asympt(x)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,2},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_polyeval4(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555355),
f64::from_bits(0x3f6ebf07f0dbc49b),
f64::from_bits(0x3f1fdc02bf28a8d9),
f64::from_bits(0x3ebb5e7574c700a6),
);
let p_den = f_polyeval3(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa39b64b6135b5a),
f64::from_bits(0x3f3fa729bbe951f9),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two
}
/**
Series for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1, x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,3},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1f_small(x: f32) -> f32 {
let dx = x as f64;
let rcp = 1. / dx;
let x2 = dx * dx;
let p_num = f_polyeval4(
x2,
f64::from_bits(0xbfd3b5b6028a83d6),
f64::from_bits(0xbfb3fde2c83f7cca),
f64::from_bits(0xbf662b2e5defbe8c),
f64::from_bits(0xbefa2a63cc5c4feb),
);
let p_den = f_polyeval4(
x2,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf9833197207a7c6),
f64::from_bits(0x3f315663bc7330ef),
f64::from_bits(0xbeb9211958f6b8c3),
);
let p = p_num / p_den;
let lg = fast_logf(x);
let v_i = i1f_small(x);
let z = f_fmla(lg, v_i, rcp);
let z0 = f_fmla(p, dx, z);
z0 as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff6270d),
f64::from_bits(0x402d250670ed7a6c),
f64::from_bits(0x404e517b9b494d38),
f64::from_bits(0x405cb02b7433a838),
f64::from_bits(0x405a03e606a1b871),
f64::from_bits(0x4045c98d4308dbcd),
f64::from_bits(0x401d115c4ce0540c),
f64::from_bits(0x3fd4213e72b24b3a),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x402681096aa3a87d),
f64::from_bits(0x404623ab8d72ceea),
f64::from_bits(0x40530af06ea802b2),
f64::from_bits(0x404d526906fb9cec),
f64::from_bits(0x403281caca389f1b),
f64::from_bits(0x3ffdb93996948bb4),
f64::from_bits(0x3f9a009da07eb989),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1f() {
assert_eq!(f_k1f(0.3), 3.055992);
assert_eq!(f_k1f(1.89), 0.16180483);
assert_eq!(f_k1f(5.89), 0.0015156545);
assert_eq!(f_k1f(101.89), 0.);
assert_eq!(f_k1f(0.), f32::INFINITY);
assert_eq!(f_k1f(-0.), f32::INFINITY);
assert!(f_k1f(-0.5).is_nan());
assert!(f_k1f(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k1f(f32::INFINITY), 0.);
}
}

262
vendor/pxfm/src/bessel/k2f.rs vendored Normal file
View File

@@ -0,0 +1,262 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval5, f_estrin_polyeval8, f_polyeval4, f_polyeval11};
/// Modified Bessel of the second kind of order 2
///
/// ulp 0.5
pub fn f_k2f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x42cbceefu32 {
// |x| >= 101.90417
return 0.;
}
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x3e9eb852u32 {
// x <= 0.31
if xb <= 0x34000000u32 {
// x <= f32::EPSILON
let dx = x as f64;
let r = 2. / (dx * dx);
return r as f32;
}
return k2f_tiny(x);
}
return k2f_small(x);
}
k2f_asympt(x)
}
#[inline]
fn k2f_tiny(x: f32) -> f32 {
// Power series at zero for K2
// 2.0000000000000000/x^2-0.50000000000000000-0.12500000000000000 (-0.86593151565841245+1.0000000000000000 Log[x]) x^2-0.010416666666666667 (-1.5325981823250791+1.0000000000000000 Log[x]) x^4-0.00032552083333333333 (-1.9075981823250791+1.0000000000000000 Log[x]) x^6-0.0000054253472222222222 (-2.1742648489917458+1.0000000000000000 Log[x]) x^8+O[x]^9
//-0.50000000000000000+2.0000000000000000/x^2 + a3 * x^8 + x^6 * a2 + x^4 * a1 + x^2 * a0
let dx = x as f64;
let log_x = fast_logf(x);
let a0 = f_fmla(-4.0000000000000000, log_x, 3.4637260626336498) * 0.031250000000000000;
let a1 = f_fmla(-12.000000000000000, log_x, 18.391178187900949) * 0.00086805555555555556;
let a2 = f_fmla(-24.000000000000000, log_x, 45.782356375801899) * 0.000013563368055555556;
let a3 = (log_x - 2.1742648489917458) * (-0.0000054253472222222222);
let dx_sqr = dx * dx;
let two_over_dx = 2. / dx_sqr;
let p = f_polyeval4(dx_sqr, a0, a1, a2, a3);
let r = f_fmla(p, dx_sqr, two_over_dx) - 0.5;
r as f32
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.3,1},4,4},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_small(x: f32) -> f64 {
let dx = x as f64;
let x_sqr = dx * dx;
let p_num = f_estrin_polyeval5(
x_sqr,
f64::from_bits(0x3fc0000000000000),
f64::from_bits(0x3f81520c0669099e),
f64::from_bits(0x3f27310bf5c5e9b0),
f64::from_bits(0x3eb8e2947e0a6098),
f64::from_bits(0x3e336dfad46e2f35),
);
let p_den = f_estrin_polyeval5(
x_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf900d253bb12edc),
f64::from_bits(0x3f1ed3d9ab228297),
f64::from_bits(0xbea14e6660c00303),
f64::from_bits(0x3e13eb951a6cf38f),
);
let p = p_num / p_den;
p * x_sqr
}
/**
Series for
R(x^2) := (BesselK(2, x) - Log(x)*BesselI(2, x) - 2/x^2)/(1+x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[2,x]-Log[x]BesselI[2,x]-2/(x^2))/(1+x^2)
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.3,1.0},10,10},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k2f_small(x: f32) -> f32 {
let dx = x as f64;
let dx_sqr = dx * dx;
let p_num = f_polyeval11(
dx_sqr,
f64::from_bits(0xbfdff794c9ee3b5c),
f64::from_bits(0xc047d3276f18e5d2),
f64::from_bits(0xc09200ed3702875a),
f64::from_bits(0xc0c39f395c47be27),
f64::from_bits(0xc0e0ec95bd1a3192),
f64::from_bits(0xc0e5973cb871c8d0),
f64::from_bits(0xc0cdaf528de00d53),
f64::from_bits(0xc0afe6d3009de17c),
f64::from_bits(0xc098417b22844112),
f64::from_bits(0x4025c45260bb1b6a),
f64::from_bits(0x402f2bf6b95ffe0c),
);
let p_den = f_polyeval11(
dx_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x405879a43b253224),
f64::from_bits(0x40a3a501408a0198),
f64::from_bits(0x40d8172abc4a8ccc),
f64::from_bits(0x40f9fcb05e98bdbd),
f64::from_bits(0x4109c45b54be586b),
f64::from_bits(0x4106ad7023dd0b90),
f64::from_bits(0x40ed7e988d2ba5a9),
f64::from_bits(0x40966305e1c1123a),
f64::from_bits(0xc090832b6a87317c),
f64::from_bits(0x403b48eb703f4644),
);
let p = p_num / p_den;
let two_over_dx_sqr = 2. / dx_sqr;
let lg = fast_logf(x);
let v_i = i2f_small(x);
let z = f_fmla(lg, v_i, two_over_dx_sqr);
let z0 = f_fmla(p, f_fmla(dx, dx, 1.), z);
z0 as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k2f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff626f2),
f64::from_bits(0x402d954dceb445df),
f64::from_bits(0x405084ea6680d028),
f64::from_bits(0x406242344a8ea488),
f64::from_bits(0x406594aa56f50fea),
f64::from_bits(0x405aa04eb4f0af1c),
f64::from_bits(0x403dd3e8e63849ef),
f64::from_bits(0x4004e85453648d43),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4023da9f4e05358e),
f64::from_bits(0x4040a4e4ceb523c9),
f64::from_bits(0x404725c423c9f990),
f64::from_bits(0x403a60c00deededc),
f64::from_bits(0x40149975b84c3946),
f64::from_bits(0x3fc69439846db871),
f64::from_bits(0xbf6400819bac6f45),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k2f() {
assert!(f_k2f(-1.).is_nan());
assert!(f_k2f(f32::NAN).is_nan());
assert_eq!(f_k2f(0.), f32::INFINITY);
assert_eq!(f_k2f(0.65), 4.3059196);
assert_eq!(f_k2f(1.65), 0.44830766);
}
}

107
vendor/pxfm/src/bessel/mod.rs vendored Normal file
View File

@@ -0,0 +1,107 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![deny(unreachable_pub)]
mod alpha0;
mod alpha1;
mod bessel_exp;
mod beta0;
mod beta1;
mod i0;
mod i0e;
mod i0ef;
mod i0f;
mod i1;
mod i1e;
mod i1ef;
mod i1f;
mod i2;
mod i2f;
mod j0;
mod j0_coeffs_remez;
mod j0_coeffs_taylor;
mod j0f;
mod j0f_coeffs;
mod j1;
mod j1_coeffs;
mod j1_coeffs_taylor;
mod j1f;
mod j1f_coeffs;
mod jincpi;
mod jincpif;
mod k0;
mod k0e;
mod k0ef;
mod k0f;
mod k1;
mod k1e;
mod k1ef;
mod k1f;
mod k2f;
mod trigo_bessel;
mod y0;
mod y0_coeffs;
mod y0_coeffs_taylor;
mod y0f;
mod y0f_coeffs;
mod y1;
mod y1_coeffs;
mod y1_coeffs_taylor;
mod y1f;
mod y1f_coeffs;
pub(crate) use bessel_exp::i0_exp;
pub use i0::f_i0;
pub use i0e::f_i0e;
pub use i0ef::f_i0ef;
pub use i0f::f_i0f;
pub use i1::f_i1;
pub use i1e::f_i1e;
pub use i1ef::f_i1ef;
pub use i1f::f_i1f;
pub use i2::f_i2;
pub use i2f::f_i2f;
pub use j0::f_j0;
pub use j0f::f_j0f;
pub use j1::f_j1;
pub use j1f::f_j1f;
pub use jincpi::f_jincpi;
pub use jincpif::f_jincpif;
pub use k0::f_k0;
pub use k0e::f_k0e;
pub use k0ef::f_k0ef;
pub use k0f::f_k0f;
pub use k1::f_k1;
pub use k1e::f_k1e;
pub use k1ef::f_k1ef;
pub use k1f::f_k1f;
pub use k2f::f_k2f;
pub use y0::f_y0;
pub use y0f::f_y0f;
pub use y1::f_y1;
pub use y1f::f_y1f;

285
vendor/pxfm/src/bessel/trigo_bessel.rs vendored Normal file
View File

@@ -0,0 +1,285 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, min_normal_f64};
use crate::round::RoundFinite;
// Generated by SageMath:
// print("[")
// for k in range(128):
// k = RealField(150)(k) * RealField(150).pi() / RealField(150)(64)
// print(double_to_hex(k.sin()) + ",")
// print("];")
pub(crate) static SIN_K_PI_OVER_64: [u64; 128] = [
0x0000000000000000,
0x3fa91f65f10dd814,
0x3fb917a6bc29b42c,
0x3fc2c8106e8e613a,
0x3fc8f8b83c69a60b,
0x3fcf19f97b215f1b,
0x3fd294062ed59f06,
0x3fd58f9a75ab1fdd,
0x3fd87de2a6aea963,
0x3fdb5d1009e15cc0,
0x3fde2b5d3806f63b,
0x3fe073879922ffee,
0x3fe1c73b39ae68c8,
0x3fe30ff7fce17035,
0x3fe44cf325091dd6,
0x3fe57d69348ceca0,
0x3fe6a09e667f3bcd,
0x3fe7b5df226aafaf,
0x3fe8bc806b151741,
0x3fe9b3e047f38741,
0x3fea9b66290ea1a3,
0x3feb728345196e3e,
0x3fec38b2f180bdb1,
0x3feced7af43cc773,
0x3fed906bcf328d46,
0x3fee212104f686e5,
0x3fee9f4156c62dda,
0x3fef0a7efb9230d7,
0x3fef6297cff75cb0,
0x3fefa7557f08a517,
0x3fefd88da3d12526,
0x3feff621e3796d7e,
0x3ff0000000000000,
0x3feff621e3796d7e,
0x3fefd88da3d12526,
0x3fefa7557f08a517,
0x3fef6297cff75cb0,
0x3fef0a7efb9230d7,
0x3fee9f4156c62dda,
0x3fee212104f686e5,
0x3fed906bcf328d46,
0x3feced7af43cc773,
0x3fec38b2f180bdb1,
0x3feb728345196e3e,
0x3fea9b66290ea1a3,
0x3fe9b3e047f38741,
0x3fe8bc806b151741,
0x3fe7b5df226aafaf,
0x3fe6a09e667f3bcd,
0x3fe57d69348ceca0,
0x3fe44cf325091dd6,
0x3fe30ff7fce17035,
0x3fe1c73b39ae68c8,
0x3fe073879922ffee,
0x3fde2b5d3806f63b,
0x3fdb5d1009e15cc0,
0x3fd87de2a6aea963,
0x3fd58f9a75ab1fdd,
0x3fd294062ed59f06,
0x3fcf19f97b215f1b,
0x3fc8f8b83c69a60b,
0x3fc2c8106e8e613a,
0x3fb917a6bc29b42c,
0x3fa91f65f10dd814,
0xb69f77598338bfdf,
0xbfa91f65f10dd814,
0xbfb917a6bc29b42c,
0xbfc2c8106e8e613a,
0xbfc8f8b83c69a60b,
0xbfcf19f97b215f1b,
0xbfd294062ed59f06,
0xbfd58f9a75ab1fdd,
0xbfd87de2a6aea963,
0xbfdb5d1009e15cc0,
0xbfde2b5d3806f63b,
0xbfe073879922ffee,
0xbfe1c73b39ae68c8,
0xbfe30ff7fce17035,
0xbfe44cf325091dd6,
0xbfe57d69348ceca0,
0xbfe6a09e667f3bcd,
0xbfe7b5df226aafaf,
0xbfe8bc806b151741,
0xbfe9b3e047f38741,
0xbfea9b66290ea1a3,
0xbfeb728345196e3e,
0xbfec38b2f180bdb1,
0xbfeced7af43cc773,
0xbfed906bcf328d46,
0xbfee212104f686e5,
0xbfee9f4156c62dda,
0xbfef0a7efb9230d7,
0xbfef6297cff75cb0,
0xbfefa7557f08a517,
0xbfefd88da3d12526,
0xbfeff621e3796d7e,
0xbff0000000000000,
0xbfeff621e3796d7e,
0xbfefd88da3d12526,
0xbfefa7557f08a517,
0xbfef6297cff75cb0,
0xbfef0a7efb9230d7,
0xbfee9f4156c62dda,
0xbfee212104f686e5,
0xbfed906bcf328d46,
0xbfeced7af43cc773,
0xbfec38b2f180bdb1,
0xbfeb728345196e3e,
0xbfea9b66290ea1a3,
0xbfe9b3e047f38741,
0xbfe8bc806b151741,
0xbfe7b5df226aafaf,
0xbfe6a09e667f3bcd,
0xbfe57d69348ceca0,
0xbfe44cf325091dd6,
0xbfe30ff7fce17035,
0xbfe1c73b39ae68c8,
0xbfe073879922ffee,
0xbfde2b5d3806f63b,
0xbfdb5d1009e15cc0,
0xbfd87de2a6aea963,
0xbfd58f9a75ab1fdd,
0xbfd294062ed59f06,
0xbfcf19f97b215f1b,
0xbfc8f8b83c69a60b,
0xbfc2c8106e8e613a,
0xbfb917a6bc29b42c,
0xbfa91f65f10dd814,
];
#[inline]
pub(crate) fn reduce_small_pi64(x: f64) -> (f64, i64) {
// Generated in SageMath:
// z = RealField(300)(64) / RealField(300).pi()
// n = 32
// x_hi = RealField(n)(z) # convert to f64
// x_mid = RealField(n)(z - RealField(300)(x_hi))
// x_lo = RealField(n)(z - RealField(300)(x_hi) - RealField(300)(x_mid))
// print(double_to_hex(x_hi), ",")
// print(double_to_hex(x_mid), ",")
// print(double_to_hex(x_lo), ",")
const MPI_OVER_SIXTY_FOUR: [u64; 3] =
[0xbfa921fb54400000, 0xbd80b4611a600000, 0xbb53198a2e037073];
const SIXTY_EIGHT_OVER_PI: f64 = f64::from_bits(0x40345f306dc9c883);
let prod_hi = x * SIXTY_EIGHT_OVER_PI;
let kd = prod_hi.round_finite();
// Let y = x - k * (pi/64)
// Then |y| < pi / 64
// With extra rounding errors, we can bound |y| < 1.6 * 2^-7.
let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[0]), x); // Exact
// |u.hi| < 1.6*2^-7
let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[1]), y_hi);
(u_hi, unsafe {
kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
})
}
struct SinCosPi64 {
v_sin: f64,
v_cos: f64,
}
#[inline]
fn sincos_eval_pi64(x: f64) -> SinCosPi64 {
let x2 = x * x;
let x4 = x2 * x2;
// Sin poly generated by Sollya:
// d = [0, pi/64];
// f_sin = sin(x)/x;
// Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|D...|], d);
const S: [u64; 4] = [
0x3ff0000000000000,
0xbfc5555555555451,
0x3f8111111072c563,
0xbf2a01321c030841,
];
let s0 = f_fmla(x2, f64::from_bits(S[1]), f64::from_bits(S[0]));
let s1 = f_fmla(x2, f64::from_bits(S[3]), f64::from_bits(S[2]));
let v_sin = f_fmla(x4, s1, s0) * x;
// Cos poly generated by Sollya:
// d = [0, pi/64];
// f_cos = cos(x);
// Q = fpminimax(f_cos, [|0, 2, 4, 6|], [|1, D...|], d);
const C: [u64; 4] = [
0x3ff0000000000000,
0xbfdffffffffffb6c,
0x3fa5555553f117c1,
0xbf56c0f056672a03,
];
let c0 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
let c1 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
let v_cos = f_fmla(x4, c1, c0);
SinCosPi64 { v_sin, v_cos }
}
#[inline]
pub(crate) fn sin_small(z: f64) -> f64 {
let x_e = (z.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
if x_e < E_BIAS - 26 {
return f_fmla(z, f64::from_bits(0xbc90000000000000), z);
}
let (angle_dd, k) = reduce_small_pi64(z);
let sin_cos = sincos_eval_pi64(angle_dd);
// cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
let sk = SIN_K_PI_OVER_64[((k as u64) & 127) as usize];
let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];
let sin_k = f64::from_bits(sk);
let cos_k = f64::from_bits(ck);
f_fmla(sin_cos.v_cos, sin_k, sin_cos.v_sin * cos_k)
}
#[inline]
pub(crate) fn cos_small(z: f64) -> f64 {
let x_e = (z.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
if x_e < E_BIAS - 27 {
// Signed zeros.
if z == 0.0 {
return 1.0;
}
// For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
return 1.0 - min_normal_f64();
}
let (angle_dd, k) = reduce_small_pi64(z);
let sin_cos = sincos_eval_pi64(angle_dd);
// cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
let sk = SIN_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize];
let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];
let sin_k = f64::from_bits(sk);
let cos_k = f64::from_bits(ck);
f_fmla(sin_cos.v_cos, cos_k, sin_cos.v_sin * sin_k)
}

1017
vendor/pxfm/src/bessel/y0.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1517
vendor/pxfm/src/bessel/y0_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1478
vendor/pxfm/src/bessel/y0_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

336
vendor/pxfm/src/bessel/y0f.rs vendored Normal file
View File

@@ -0,0 +1,336 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::{j0f_asympt_alpha, j0f_asympt_beta, j1f_rsqrt};
use crate::bessel::trigo_bessel::sin_small;
use crate::bessel::y0f_coeffs::{Y0_ZEROS, Y0_ZEROS_VALUES, Y0F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the second kind of order 0 (Y0)
///
/// Max ULP 0.5
pub fn f_y0f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::NEG_INFINITY;
}
if x.is_infinite() {
if x.is_sign_negative() {
return f32::NAN;
}
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x4296999au32 {
// x <= 75.3
if xb <= 0x40000000u32 {
// x <= 2
if xb <= 0x3faccccdu32 {
// x <= 1.35
return y0f_near_zero(f32::from_bits(xb));
}
// transient zone from 1.35 to 2 have bad behavior for log poly already,
// and not yet good to be easily covered, thus it use its own poly
return y0_transient_area(x);
}
return y0f_small_argument_path(f32::from_bits(xb));
}
// Exceptions:
let xb = x.to_bits();
if xb == 0x5023e87f {
return f32::from_bits(0x28085b2d);
} else if xb == 0x48171521 {
return f32::from_bits(0x2bd244ba);
} else if xb == 0x4398c299 {
return f32::from_bits(0x32c730db);
} else if xb == 0x7f0e5a38 {
return f32::from_bits(0x131f680b);
} else if xb == 0x6ef9be45 {
return f32::from_bits(0x987d8a8f);
}
y0f_asympt(x)
}
/**
Generated by SageMath:
Evaluates:
Y0(x) = 2/pi*(euler_gamma + log(x/2))*J0(x) - sum((-1)^m*(x/2)^(2*m)/(m!)^2*sum(1+1/2 + ... 1/m))
expressed as:
Y0(x)=log(x)*W0(x) - Z0(x)
```python
from sage.all import *
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 10 # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()
# Define J0(x) Taylor expansion at x = 0
def j_series(n, x):
return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])
J0_series = j_series(0, x)
def z_series(x):
return sum([(-1)**m * (x/2)**(ZZ(2)*ZZ(m)) / ZZ(m).factorial()**ZZ(2) * sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) for m in range(1, N)])
W0 = (d2/pi) * J0_series
Z0 = -gamma * (d2/pi) * J0_series + RealField(300)(2).log() * (d2/pi) * J0_series + (d2/pi) * z_series(x)
# see the series
print(W0)
print(Z0)
```
**/
#[inline]
fn y0f_near_zero(x: f32) -> f32 {
const W: [u64; 10] = [
0x3fe45f306dc9c883,
0xbfc45f306dc9c883,
0x3f845f306dc9c883,
0xbf321bb945252402,
0x3ed21bb945252402,
0xbe672db9f21b0f5f,
0x3df49a6c656d62ff,
0xbd7ae90af76a4d0f,
0x3cfae90af76a4d0f,
0xbc754331c053fdad,
];
let dx = x as f64;
let x2 = dx * dx;
let w0 = f_polyeval10(
x2,
f64::from_bits(W[0]),
f64::from_bits(W[1]),
f64::from_bits(W[2]),
f64::from_bits(W[3]),
f64::from_bits(W[4]),
f64::from_bits(W[5]),
f64::from_bits(W[6]),
f64::from_bits(W[7]),
f64::from_bits(W[8]),
f64::from_bits(W[9]),
);
const Z: [u64; 10] = [
0x3fb2e4d699cbd01f,
0xbfc6bbcb41034286,
0x3f9075b1bbf41364,
0xbf41a6206b7b973d,
0x3ee3e99794203bbd,
0xbe7bce4a600d3ea4,
0x3e0a6ee796b871b6,
0xbd92393d82c6b2e4,
0x3d131085da82054c,
0xbc8f4ed4b492ebcc,
];
let z0 = f_polyeval10(
x2,
f64::from_bits(Z[0]),
f64::from_bits(Z[1]),
f64::from_bits(Z[2]),
f64::from_bits(Z[3]),
f64::from_bits(Z[4]),
f64::from_bits(Z[5]),
f64::from_bits(Z[6]),
f64::from_bits(Z[7]),
f64::from_bits(Z[8]),
f64::from_bits(Z[9]),
);
let w_log = fast_logf(x);
f_fmla(w0, w_log, -z0) as f32
}
#[inline]
fn y0_transient_area(x: f32) -> f32 {
let dx = x as f64;
// first Y0 bessel zero
const ZERO: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
let r = (dx - ZERO.hi) - ZERO.lo;
/*
Poly generated by Wolfram Matematica:
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990]
{approx,error} = MiniMaxApproximation[f[x],{x,{ 1.35 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
poly=error[[1]];
coeffs=CoefficientList[poly,x];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
*/
let p = f_polyeval18(
r,
f64::from_bits(0x3fe0aa48442f8375),
f64::from_bits(0x3de601d3b959b8d8),
f64::from_bits(0xbfd0aa4840bb8529),
f64::from_bits(0x3fa439fc16d4835e),
f64::from_bits(0x3f80d2dcd97d2b4f),
f64::from_bits(0x3f4f833368f9f047),
f64::from_bits(0xbf541a702ee92277),
f64::from_bits(0x3f3abc113cf0f4da),
f64::from_bits(0xbefac1ded6f17ba8),
f64::from_bits(0x3f33ef372e24df82),
f64::from_bits(0x3f3bf8b42322df40),
f64::from_bits(0x3f4582f9daec9ca7),
f64::from_bits(0x3f479fc07175494e),
f64::from_bits(0x3f4477a5e32b723a),
f64::from_bits(0x3f39fbfd6a6d6f0c),
f64::from_bits(0x3f2760a66816527b),
f64::from_bits(0x3f0a68fdeeba224f),
f64::from_bits(0x3edd78c6c87089e1),
);
p as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y0f_small_argument_path(x: f32) -> f32 {
let x_abs = x as f64;
// let avg_step = 74.607799 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299609508652038;
let fx = x_abs * INV_STEP;
const Y0_ZEROS_COUNT: f64 = (Y0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
// Really should not happen here, but if it is then to log expansion
return y0f_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(Y0_ZEROS_VALUES[idx]) as f32;
}
let c = &Y0F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval18(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
f64::from_bits(c[14]),
f64::from_bits(c[15]),
f64::from_bits(c[16]),
f64::from_bits(c[17]),
);
p as f32
}
/*
Evaluates:
Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
*/
#[inline]
fn y0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j0f_asympt_alpha(dx);
let beta = j0f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = sin_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
#[cfg(test)]
mod tests {
use crate::f_y0f;
#[test]
fn test_y0f() {
assert_eq!(f_y0f(90.5), 0.08254846);
assert_eq!(f_y0f(77.5), 0.087678276);
assert_eq!(f_y0f(1.5), 0.3824489);
assert_eq!(f_y0f(0.5), -0.44451874);
assert!(f_y0f(-1.).is_nan());
assert_eq!(f_y0f(0.), f32::NEG_INFINITY);
assert_eq!(f_y0f(-0.), f32::NEG_INFINITY);
assert_eq!(f_y0f(f32::INFINITY), 0.);
assert!(f_y0f(f32::NEG_INFINITY).is_nan());
}
}

1236
vendor/pxfm/src/bessel/y0f_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1112
vendor/pxfm/src/bessel/y1.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1068
vendor/pxfm/src/bessel/y1_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1030
vendor/pxfm/src/bessel/y1_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

348
vendor/pxfm/src/bessel/y1f.rs vendored Normal file
View File

@@ -0,0 +1,348 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
use crate::bessel::trigo_bessel::cos_small;
use crate::bessel::y1f_coeffs::{Y1_ZEROS, Y1_ZEROS_VALUES, Y1F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18, f_polyeval19};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the second kind of order 1 (Y1)
///
/// Max ULP 0.5
pub fn f_y1f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::NEG_INFINITY;
}
if x.is_infinite() {
if x.is_sign_negative() {
return f32::NAN;
}
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x424e0000u32 {
// x <= 51.5
if xb <= 0x40000000u32 {
// x <= 2
if xb <= 0x3fb5c28fu32 {
// x <= 1.42
return y1f_near_zero(x);
}
// transient zone from 1.42 to 2 have bad behavior for log poly already,
// and not yet good to be easily covered, thus it use its own poly
return y1_transient_area(x);
}
return y1f_small_argument_path(x);
}
// Exceptions
let bx = x.to_bits();
if bx == 0x47037a3d {
return f32::from_bits(0x2deededb);
} else if bx == 0x65ce46e4 {
return f32::from_bits(0x9eed85c4);
} else if bx == 0x6bf68a7b {
return f32::from_bits(0x9dc70a09);
} else if bx == 0x76d84625 {
return f32::from_bits(0x15d7a68b);
} else if bx == 0x7e3dcda0 {
return f32::from_bits(0x12b81111);
}
y1f_asympt(x)
}
/**
Generated by SageMath:
Evaluates:
y2 = -J1(x)*log(x) + 1/x * (1 - sum((-1)^m*(H(m)+H(m-1))/(2^m*m!*(m-1)!)*x^(2*m))
Y1(x) = 2/pi*(-y2(x)+(euler_gamma - log(2))*J1(x))
expressed as:
Y1(x)=log(x)*W1(x) - Z1(x) - 2/(pi*x)
```python
from sage.all import *
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 16 # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()
log2 = RealField(300)(2).log()
def j_series(n, x):
return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])
J1_series = j_series(1, x)
def harmony(m):
return sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1))
def z_series(x):
return sum([(-1)**m * (x)**(ZZ(2)*ZZ(m)) / (ZZ(2)**(2*m) * ZZ(m).factorial() * (ZZ(m) - ZZ(1)).factorial()) * (harmony(m) + harmony(m - 1)) for m in range(1, N)])
W1 = d2/pi * J1_series
Z1 = -(d2/(x*pi) * z_series(x) + d2/pi * gamma * J1_series(x) - d2/pi * log2 * J1_series(x))
# see the series
print(W0)
print(Z0)
```
See ./notes/bessel_y1_taylor.ipynb for generation
**/
#[inline]
fn y1f_near_zero(x: f32) -> f32 {
const W: [u64; 10] = [
0x3fd45f306dc9c883,
0xbfa45f306dc9c883,
0x3f5b2995e7b7b604,
0xbf021bb945252402,
0x3e9cf9286ea1d337,
0xbe2ee7a29824147f,
0x3db78be9987d036d,
0xbd3ae90af76a4d0f,
0x3cb7eb97f85e7d62,
0xbc31028e3376648a,
];
let dx = x as f64;
let x2 = dx * dx;
let w0 = f_polyeval10(
x2,
f64::from_bits(W[0]),
f64::from_bits(W[1]),
f64::from_bits(W[2]),
f64::from_bits(W[3]),
f64::from_bits(W[4]),
f64::from_bits(W[5]),
f64::from_bits(W[6]),
f64::from_bits(W[7]),
f64::from_bits(W[8]),
f64::from_bits(W[9]),
) * dx;
const Z: [u64; 10] = [
0x3fc91866143cbc8a,
0xbfabd3975c75b4a7,
0x3f6835b97894be5b,
0xbf12c7dbffcde97d,
0x3eb0a780ac776eac,
0xbe432e5a4ddeea30,
0x3dcf0ce34d2066a6,
0xbd52a4e1aea45c18,
0x3cd1474ade9154ac,
0xbc4978ba84f218c0,
];
let z0 = f_polyeval10(
x2,
f64::from_bits(Z[0]),
f64::from_bits(Z[1]),
f64::from_bits(Z[2]),
f64::from_bits(Z[3]),
f64::from_bits(Z[4]),
f64::from_bits(Z[5]),
f64::from_bits(Z[6]),
f64::from_bits(Z[7]),
f64::from_bits(Z[8]),
f64::from_bits(Z[9]),
) * dx;
let w_log = fast_logf(x);
const TWO_OVER_PI: f64 = f64::from_bits(0x3fe45f306dc9c883);
let recip = 1. / dx;
let z = f_fmla(w0, w_log, -z0);
f_fmla(recip, -TWO_OVER_PI, z) as f32
}
#[inline]
fn y1_transient_area(x: f32) -> f32 {
let dx = x as f64;
// first Y0 bessel zero
const ZERO: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
let r = (dx - ZERO.hi) - ZERO.lo;
/*
Poly generated by Wolfram Matematica:
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990]
{approx,error} = MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
poly=error[[1]];
coeffs=CoefficientList[poly,x];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
*/
let p = f_polyeval18(
r,
f64::from_bits(0x3d9b15a8283b069b),
f64::from_bits(0x3fe0aa484455fd09),
f64::from_bits(0xbfbe56f80802fa38),
f64::from_bits(0xbfa0d2ac9d0409ad),
f64::from_bits(0xbf73a619b3551650),
f64::from_bits(0x3f7e6c480057ecbb),
f64::from_bits(0xbf650dc773a5df4d),
f64::from_bits(0x3f531e9ccab7d4da),
f64::from_bits(0xbf29b76999169b0e),
f64::from_bits(0x3f509c829abceaf7),
f64::from_bits(0x3f575aee5697c4d8),
f64::from_bits(0x3f63f7f9598be176),
f64::from_bits(0x3f67a6ae61541282),
f64::from_bits(0x3f665e6d3de19021),
f64::from_bits(0x3f5ee8837b9197f6),
f64::from_bits(0x3f4e6924f270fd7e),
f64::from_bits(0x3f32ca61e5b74925),
f64::from_bits(0x3f0725735bc3890b),
);
p as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y1f_small_argument_path(x: f32) -> f32 {
let x_abs = x as f64;
// let avg_step = 51.03 / 33.0;
// let inv_step = 1.0 / avg_step;
//
// println!("inv_step {}", inv_step);
const INV_STEP: f64 = 0.6466784244562023;
let fx = x_abs * INV_STEP;
const Y1_ZEROS_COUNT: f64 = (Y1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
// Really should not happen here, but if it is then to log expansion
return y1f_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(Y1_ZEROS_VALUES[idx]) as f32;
}
let c = &Y1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval19(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
f64::from_bits(c[14]),
f64::from_bits(c[15]),
f64::from_bits(c[16]),
f64::from_bits(c[17]),
f64::from_bits(c[18]),
);
p as f32
}
/*
Evaluates:
Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x))
Discarding 1/2*PI gives:
Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x)))
*/
#[inline]
fn y1f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = -cos_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bessel_zero() {
assert_eq!(f_y1f(700.76), 0.024876066);
assert_eq!(f_y1f(35.76), 0.121432826);
assert_eq!(f_y1f(1.76), -0.24787569);
assert_eq!(f_y1f(0.87), -0.9030042);
assert_eq!(f_y1f(f32::INFINITY), 0.0);
assert!(f_y1f(f32::NEG_INFINITY).is_nan());
assert!(f_y1f(f32::NAN).is_nan());
}
}

923
vendor/pxfm/src/bessel/y1f_coeffs.rs vendored Normal file
View File

@@ -0,0 +1,923 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
Y1 zeros and extremum on [0, 52] interval
Generated by SageMath:
```python
R120 = RealField(120)
zeros = []
mp.prec = 150
step = mpf("0.1")
epsilon = mpf("1e-35")
x = mpf("1.25")
previous_zero = R120(0)
y1_zeros = []
while x < mpf("52.0"):
f1 = bessely(1, x)
f2 = bessely(1, x + step)
if f1 * f2 < 0:
zero = findroot(lambda t: bessely(1, t), (x, x + step), solver='secant', tol=mp.mpf("1e-41"))
previous_zero = zero
y1_zeros.append(zero)
if previous_zero is not None and abs(x - mpf(f'{round(x)}')) < epsilon:
zeros.append(previous_zero)
x += step
y1_extrema = []
x = mpf("1.25")
while x < mpf("52.0"):
d1 = mp.diff(lambda t: bessely(1, t), x)
d2 = mp.diff(lambda t: bessely(1, t), x + step)
if d1 * d2 < 0:
extremum = findroot(lambda t: mp.diff(lambda u: bessely(1, u), t), (x, x + step), solver='secant', tol=mp.mpf("1e-41"))
y1_extrema.append(extremum)
x += step
y1_zeros.extend(y1_extrema)
y1_zeros = sorted(y1_zeros)
print(f"pub(crate) static Y1_ZEROS: [(u64, u64); {len(y1_zeros)}] = [")
print(f"(0x0, 0x0),")
for z in y1_zeros:
k = split_double_double(z)
hi = double_to_hex(k[1])
lo = double_to_hex(k[0])
print(f"({lo}, {hi}),")
print("];")
```
**/
pub(crate) static Y1_ZEROS: [(u64, u64); 33] = [
(0x0, 0x0), // not really used, just a stab to avoid indices messing
(0xbc8bd1e50d219bfd, 0x400193bed4dff243),
(0x3c53bac0714e4129, 0x400d76d4affba175),
(0x3cbdfe7bac228e8c, 0x4015b7fe4e87b02e),
(0x3ca7960b6b1c46ac, 0x401bc41890588553),
(0x3cb479cc068d9046, 0x40213127ae6169b4),
(0x3cc8f4ba5d68e440, 0x40243f2ee51e8c7e),
(0x3c80fc786ce06080, 0x40277f9138d43206),
(0xbcaf6ef7a3571593, 0x402a924ee4a3e52c),
(0xbcc5e091a50f8e05, 0x402dcb7d88de848b),
(0x3cc07320221cd5e5, 0x403070a7a43daae6),
(0xbcda1ee4c5487ede, 0x40320b1c695f1e3b),
(0xbcd2903124fef7e3, 0x4033971a15717510),
(0x3cd391b14410528f, 0x40353025492188cd),
(0xbcc15ec09721b746, 0x4036bcefd7de87a3),
(0x3cb52f75f025b205, 0x403854fa303820ca),
(0x3cb6f57f7696f493, 0x4039e262715f12a9),
(0xbcbcf130fbea3b24, 0x403b79acee8cfb7d),
(0xbc912142b10a5c65, 0x403d079247e8f51b),
(0xbc9e7a77047d6166, 0x403e9e480605283c),
(0x3cb1452eb07cd937, 0x40401649819af8fa),
(0xbce96beabef7ecf4, 0x4040e16907f8fb56),
(0x3cec6086fb5dd335, 0x4041a8b8a142d536),
(0x3cd2481e87adfe57, 0x404273a7b35a7aff),
(0x3cd7df5b6f701c7a, 0x40433b1ac0375e31),
(0x3cda8ffacaac8461, 0x404405e18393afb5),
(0xbce5b5acaff0a867, 0x4044cd72d2adfb0c),
(0x3cbfe463face2c1c, 0x4045981787d668db),
(0xbcefcba6ea61df1b, 0x40465fc2f7ca5b81),
(0xbce26390f25f01cb, 0x40472a4a85cc317e),
(0xbcbba46ca6ef9b6f, 0x4047f20cbfc32967),
(0xbcdcc667e557a177, 0x4048bc7b10ed3960),
(0x3cea473d4f209faf, 0x4049845158040451),
];
/**
Value at zero or extremum of Y1 belongs to [Y1_ZEROS]
Generated by MPFR:
```text
let mut arr = vec![];
for zeros in Y1_ZEROS.iter() {
if zeros.1 == 0 {
arr.push(0);
} else {
let mpfr = Float::with_val(107, f64::from_bits(zeros.1)).y1();
arr.push(mpfr.to_f64().to_bits());
}
}
println!(
"arr: [{}]",
arr.iter()
.map(|x| format!("0x{:016x}", x))
.collect::<Vec<_>>()
.join(", ")
);
```
**/
pub(crate) static Y1_ZEROS_VALUES: [u64; 33] = [
0x0000000000000000,
0x3c7cf9f8d5e1a475,
0x3fdaabb4011ed330,
0x3ca46a40b234169c,
0xbfd36732d4b96094,
0xbc963bc010b45f46,
0x3fd00ef3745e0e3c,
0x3c5f96d32c02f147,
0xbfcc075da85beb4f,
0x3ca213201464272b,
0x3fc931a5a0ae5aa0,
0xbcb39d4c41d5839f,
0xbfc713fc51664c74,
0xbcab20138dd047ec,
0x3fc56b97f8091ac5,
0x3c8b67dbfd3bd179,
0xbfc413644356a52b,
0x3c919e290514c619,
0x3fc2f4e70d6c7e01,
0xbc719366994a86bc,
0xbfc20198200b699d,
0x3cbbec2345ac18e2,
0x3fc12f9870d68e18,
0x3ca334eb74fdcfd1,
0xbfc077eede4a0d89,
0xbcaaca291aee3c35,
0x3fbfab0b166d23d8,
0x3c8ef92aa9c8e53c,
0xbfbe891b327da16d,
0x3cb13e3c695ea088,
0x3fbd84391bb2748d,
0xbcaa1c861fdd1438,
0xbfbc97d79918527d,
];
/**
Series expansion at point for Y1
Generated by SageMath and Sollya:
```python
def compute_intervals(zeros):
intervals = []
for i in range(0, len(zeros)):
if i == 0:
a = 2 - zeros[i]
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
elif i + 1 > len(zeros) - 1:
a = (zeros[i - 1] + zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i]) + 0.83 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
else:
a = (zeros[i - 1] + zeros[i]) / 2 - zeros[i] - 0.05
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
return intervals
intervals = compute_intervals(y1_zeros)
def build_sollya_script(a, b, zero, deg):
return f"""
prec = 250;
bessel_y1 = library("./notes/bessel_sollya/cmake-build-release/libbessel_sollya.dylib");
f = bessel_y1(x + {zero});
d = [{a}, {b}];
pf = remez(f, {deg}, d);
for i from 0 to degree(pf) do {{
write(coeff(pf, i)) >> "coefficients.txt";
write("\\n") >> "coefficients.txt";
}};
"""
def load_coefficients(filename):
with open(filename, "r") as f:
return [RR(line.strip()) for line in f if line.strip()]
def call_sollya_on_interval(a, b, zero, degree=12):
sollya_script = build_sollya_script(a, b, zero, degree)
with open("tmp_interval.sollya", "w") as f:
f.write(sollya_script)
import subprocess
if os.path.exists("coefficients.txt"):
os.remove("coefficients.txt")
try:
result = subprocess.run(
["sollya", "tmp_interval.sollya"],
check=True,
capture_output=True,
text=True
)
except subprocess.CalledProcessError as e:
return
degree = 18
print(f"pub(crate) static Y1F_COEFFS: [[u64;{degree + 1}]; {len(intervals)}] = [")
for i in range(0, len(intervals)):
interval = intervals[i]
call_sollya_on_interval(interval[0], interval[1], interval[2], degree)
coeffs = load_coefficients(f"coefficients.txt")
print("[")
for c in coeffs:
print(double_to_hex(c) + ",")
print("],")
print("];")
```
**/
pub(crate) static Y1F_COEFFS: [[u64; 19]; 32] = [
[
0x3bdca2ee18606a4b,
0x3fe0aa48442f014b,
0xbfbe56f82217b8f4,
0xbfa0d2af4e932400,
0xbf73a6dec3726cd5,
0x3f7e671c7d12ea48,
0xbf65429dc5c0e9d4,
0x3f517ab4af4655e4,
0xbf40b2d8647a250d,
0x3f2eea7b1b675766,
0xbf1c3fb728e7d2ff,
0x3f09d1da72e12f44,
0xbef7964bf8511e22,
0x3ee57c2a83e1f972,
0xbed33f4211a00375,
0x3ec02bcdac2103fd,
0xbea6fefcf033ab9d,
0x3e874128ed97d3bb,
0xbe57d5b1eac16658,
],
[
0x3fdaabb4011ed330,
0x3c54da7c52fcf446,
0xbfc8b45babe797b6,
0x3f8e147099a6f00d,
0x3f88c5af1eeb2143,
0xbf4133fa47d8ea48,
0xbf3bf8af93e7a2f0,
0x3f021d64bd4e2cd8,
0x3eb44d2c32fdaf23,
0x3eb14c3b9e7960c1,
0xbe9b8ee25c629be6,
0x3e7a85b5b497dc6c,
0xbe5bfa422fb8d949,
0x3e3f0ad81d293f5a,
0xbe20e6844c6faba7,
0x3e0214b2d826d072,
0xbde4ff658967d425,
0x3dcb549cdc774a83,
0xbda6b5df4d9c1682,
],
[
0x3b7ff35240713789,
0xbfd5c7c556f0c19a,
0x3fa00b9f8571ca1f,
0x3faa15d92dfe3e27,
0xbf710a329e2c23f5,
0xbf61be6db9923ac9,
0x3f2337c7e138eb84,
0x3f085b940eb5f37f,
0xbec80619146a1e65,
0xbea255e6cf4b3254,
0x3e5b62ccdc392c5a,
0x3e380b1a5a61e6b5,
0xbdfa7ec7fd0d2925,
0x3d840d04ff01d1b2,
0xbd938dc1b2e33eca,
0x3d74839c586126ca,
0xbd4b045bc7ad769b,
0x3d261d10a8575c45,
0xbd052a6cc14bcc54,
],
[
0xbfd36732d4b96094,
0x3b3886a5ed6fd628,
0x3fc3001c8002caf8,
0xbf7bf5a03bab4999,
0xbf8751ea028c1953,
0x3f423874cd8d0402,
0x3f364f6610d6493b,
0xbef02978de38394f,
0xbed72f0766d0d9c7,
0x3e8f2081874e556c,
0x3e6defd5dce91973,
0xbe2205c70046a2c7,
0xbdfb6432eb3ab7ea,
0x3db028a1c0572973,
0x3d807791dcab03a0,
0xbd29778204deee13,
0xbd08342db2e7148e,
0x3cc898efb37f9dad,
0xbc84e2adc305e2ab,
],
[
0xbac1435819592d4c,
0x3fd15f993fceab5c,
0xbf902b3933cf21b1,
0xbfa6395dfe49fcd4,
0x3f63ced2a2e69180,
0x3f607a678d6000bb,
0xbf1b50d7e1d3201e,
0xbf06f7bab104f34b,
0x3ec176e72bf94a3a,
0x3ea2becb2b6bacd1,
0xbe5a384eebfb23c2,
0xbe341e7a921f7f66,
0x3de9e3284b918a26,
0x3dbec40b21f2c78f,
0xbd726865da6190a9,
0xbd416f4fe7eed351,
0x3cf3160bd2bd6c64,
0x3cbf6d61c945b95c,
0xbc706809636e0aec,
],
[
0x3fd00ef3745e0e3c,
0x3aff192f298c81c3,
0xbfbfcdacdda138f2,
0x3f706cc34cd829fa,
0x3f84641bb10c16cb,
0xbf37fac943e2a16d,
0xbf34769ed32e14a2,
0x3ee80608ecda1508,
0x3ed5cc8242d77e23,
0xbe888c8f2538feb8,
0xbe6ce5908c1e5174,
0x3e1ed16257e17417,
0x3dfa30d623eda066,
0xbdaa5076123e3ecf,
0xbd814cd297d2be7e,
0x3d306166947e23e9,
0x3d01635f73179569,
0xbcaeafcf4c2f127b,
0xbc7b0828175d92fa,
],
[
0x3aba1488e1b7782d,
0xbfcdc14ea14e89f9,
0x3f84429fef5b5fbd,
0x3fa367d7d608e4ba,
0xbf59d6eb2bc49e35,
0xbf5dc4f991b3db86,
0x3f1315ec04d6e6bb,
0x3f0571814a1aa2f5,
0xbeba2977fa42f00f,
0xbea1e864230850b8,
0x3e54a7b82d3fa1e5,
0x3e33906609f9fe4c,
0xbde549e8b0e16969,
0xbdbe32cf2ce99d6f,
0x3d6eff542dd345c3,
0x3d415e2a9c2f4933,
0xbcf0d48dde3c3ffe,
0xbcbeac3c36b4bce2,
0x3c6af1612c5ddab0,
],
[
0xbfcc075da85beb4f,
0xbafcfa84f4024782,
0x3fbbdeb6ff9f55e1,
0xbf661eefb74da882,
0xbf8229ea914b846e,
0x3f30cbcc6778fd37,
0x3f32aa59f5091f7b,
0xbee1c15d5251ae54,
0xbed4583f15abd654,
0x3e831d151a12624a,
0x3e6b74e57c21e022,
0xbe19044f1339b061,
0xbdf93b1ec70c7bbc,
0x3da61a4e437e8105,
0x3d80d4305f038451,
0xbd2c3aad6f3b35c7,
0xbd010dec3a02c58c,
0x3cab15901b6d0925,
0x3c7ab2531f00c501,
],
[
0xbab392a85abdc950,
0x3fca7022be084d99,
0xbf7c650b6b83109a,
0xbfa163191c30aa62,
0x3f526b045287ddca,
0x3f5b17602840abf5,
0xbf0c0a9cee3c8429,
0xbf03e398cbc472de,
0x3eb3f35db1ff19f5,
0x3ea0e9b612dbc0ea,
0xbe5056babcd79a11,
0xbe32c1a8c8d768b1,
0x3de161b6a84838d0,
0x3dbd4ca9d2d67d78,
0xbd69fdd67a999eab,
0xbd4101919ce84a07,
0x3cecd91fa7851496,
0x3cbe3f8588ebbfdf,
0xbc67a4499c96e38d,
],
[
0x3fc931a5a0ae5aa0,
0x3afa23fd08be9891,
0xbfb919c8a3f203fa,
0x3f602a38da6262a9,
0x3f807ced48910819,
0xbf2900f33a00690a,
0xbf31278d46fd153c,
0x3edb2595529cf19f,
0x3ed2f7c2d608e0bb,
0xbe7e212d23787793,
0xbe69f3fcf3631e9c,
0x3e144fbf033f1974,
0x3df82268e7ab0cdb,
0xbda26cc2714815d4,
0xbd80418b35c32375,
0x3d28122e50410f0a,
0x3d009aba27e11464,
0xbca78943175d4e84,
0xbc7a379f959c0224,
],
[
0x3aaf25ce7e30cbc6,
0xbfc80781c32422e7,
0x3f754eda697a0098,
0x3f9fbe6df840847f,
0xbf4be318d61276e1,
0xbf58efee4094379c,
0x3f059145b4f0e4dd,
0x3f0282d26a74c382,
0xbeaf56c29d9ad6c8,
0xbe9fdd03174f6b47,
0x3e4a44a7907d0ec6,
0x3e31df6533090779,
0xbddc96e9cb6ee22b,
0xbdbc3439a99213c4,
0x3d65d387fc8083e0,
0x3d40830db4ec8a6e,
0xbce8ad426f9ce3f5,
0xbcbd93c0cf35d116,
0x3c649b19a5449ffa,
],
[
0xbfc713fc51664c74,
0xbaf73aab14face16,
0x3fb7049760cde490,
0xbf58ef5f1cbe4874,
0xbf7e5f53caf3bead,
0x3f237b0b62ddadd1,
0x3f2fd3bac08286da,
0xbed5789803de3adb,
0xbed1c0faa8999393,
0x3e7845b49b063dc7,
0x3e6886872800e226,
0xbe10b03677687883,
0xbdf7049d17bd230b,
0x3d9edd9ca057f252,
0x3d7f445f42a168e6,
0xbd24866878075342,
0xbd0015a37275b46d,
0x3ca463bd3d4059be,
0x3c79974848138496,
],
[
0xbaa9a62f9227c851,
0x3fc62d94d97e859c,
0xbf70bf614807033c,
0xbf9d5f857a2a6107,
0x3f46081b0b7fe572,
0x3f57307b03e248f8,
0xbf0132c0aa83d0db,
0xbf0154ed4598d2e4,
0x3ea94f64f476e3f5,
0x3e9e1272585385c0,
0xbe4588c758dd66db,
0xbe31021cdd7a4f3a,
0x3dd7cfa7a39f5d48,
0x3dbb0e00d41ec645,
0xbd6276c9a451cdb1,
0xbd3fe8cf17671ae1,
0x3ce52f1a6f7ae06f,
0x3cbcc2eb893d62ce,
0xbc61f4c0af8bd0fb,
],
[
0x3fc56b97f8091ac5,
0x3af48a947d2475cd,
0xbfb560fcc8c08469,
0x3f53fafa39618883,
0x3f7c49141623372f,
0xbf1f69980694fd17,
0xbf2dc5f848aa9d33,
0x3ed178fc979b779d,
0x3ed0b494a4bafca8,
0xbe73fc3884c243a5,
0xbe673afb9fb48ff7,
0x3e0bd903464b077a,
0x3df5f3bafabcdabe,
0xbd9a1c27612b5f03,
0xbd7e04553366c10e,
0x3d219970f1564c7c,
0x3cff128fbd867c78,
0xbca1b4d2be53f3ad,
0xbc78e13fb654b036,
],
[
0x3aa5951bb8e2b477,
0xbfc4b2a38f1ab9b4,
0x3f6b3878aadeb34d,
0x3f9b750d89a9b35f,
0xbf41f6911725a956,
0xbf55beee6fd51c8a,
0x3efc3625d7a65087,
0x3f005375a588a71f,
0xbea4ee5e4e7cafc0,
0xbe9c7b3d81b5dc31,
0x3e41fce14f464e1e,
0x3e30346643a98dcb,
0xbdd41c86191a49ce,
0xbdb9eed9da04017a,
0x3d5f8cee5e5b42b4,
0x3d3ec41075d33352,
0xbce24e44459e28b0,
0xbcbbe16f7d769c15,
0x3c5f670ad9138f1f,
],
[
0xbfc413644356a52b,
0xbaf22d9ab9060f8f,
0x3fb40bb88c6f2b85,
0xbf5078d13cfc400e,
0xbf7a9191262ab9d5,
0x3f1a005297618f35,
0x3f2c0cbad847a60e,
0xbecd1a72e7c35fa0,
0xbecf9a2654099c0b,
0x3e70c6b06e20d1c0,
0x3e66136d6425acf0,
0xbe0797767778226d,
0xbdf4f77b30ed58c3,
0x3d96572059bf2445,
0x3d7cd12649b82d6f,
0xbd1e6ce514a88f2d,
0xbcfdfabaf5c37514,
0x3c9eea8202989176,
0x3c782260f7596e02,
],
[
0xbaa27e57c2b07d4b,
0x3fc37aaceac987b9,
0xbf66afe4fe0bc0f7,
0xbf99de7a33bc3a97,
0x3f3e024f567ac487,
0x3f548843c426abe0,
0xbef7a8e14711c0f4,
0xbefeeceb341ad81c,
0x3ea1a743e05b383f,
0x3e9b143d39c8eb5f,
0xbe3e8e00011fabc3,
0xbe2ef28e31ff924c,
0x3dd137a1bd136742,
0x3db8e0878264a773,
0xbd5b3dc655a5a5f4,
0xbd3da652e8239897,
0x3cdfe34eace42448,
0x3cbafd0cc7251807,
0xbc5b9b0102453020,
],
[
0x3fc2f4e70d6c7e01,
0x3af022defda0ec45,
0xbfb2ef24d6f7526a,
0x3f4bc33c9dc6ec82,
0x3f7920414ee2acbe,
0xbf15f9173916a219,
0xbf2a94fdbdcec471,
0x3ec8b309990f94db,
0x3ece087ff4517bd5,
0xbe6ca22ab12c685c,
0xbe650d1f28632753,
0x3e044415529c950b,
0x3df411b8a7d9d1bc,
0xbd9354e8c7a8bfd7,
0xbd7bb16e8ee8c711,
0x3d1a881fddcb8d86,
0x3cfcecee70233b69,
0xbc9b2b6cccd3802a,
0xbc77637662fa6ba8,
],
[
0x3aa00f5dbb23e90b,
0xbfc2740819f1caaa,
0x3f6349369dc780bb,
0x3f98868d7401bf2e,
0xbf398cd1bebe1445,
0xbf537eef9aadeee2,
0x3ef43394c95b2d29,
0x3efd6dfcdb026013,
0xbe9e448fbc8a1c95,
0xbe99d764ee07a6b7,
0x3e3a53958c8a71d8,
0x3e2da0e1c86368ce,
0xbdcdd7f914e496e5,
0xbdb7e67ff45daf48,
0x3d57c2e32861f41c,
0x3d3c96e18ab6db69,
0xbcdc0099b11f0478,
0xbcba1dfeafeb6e19,
0x3c586b4c940f74bc,
],
[
0xbfc20198200b699d,
0xbaecc875d54af9d0,
0x3fb1fd242a74e630,
0xbf47cf261dfbf19a,
0xbf77e4820ec1dde4,
0x3f12e1bd281dfcba,
0x3f2950bb06c6fdf9,
0xbec54a38ab6af51a,
0xbecca94f38024fb4,
0x3e68c7e75971843f,
0x3e6423fc7e24ed40,
0xbe019fe1d8a6e0d8,
0xbdf34198c7517f5a,
0x3d90e78c95f157aa,
0x3d7aa74c4042e051,
0xbd1756942b9afcaf,
0xbcfbedc3e7dae4e4,
0x3c980b9567289463,
0x3c76a9e024cc6a52,
],
[
0xba9c33661811b8ff,
0x3fc192f2627a74e3,
0xbf60a846a83fecf2,
0xbf975eceaabf7f86,
0x3f3617c581be35b1,
0x3f529934b7a84483,
0xbef18123e8751889,
0xbefc1f05a2d85150,
0x3e9a4e0bc09262e9,
0x3e98be81ad44b8a4,
0xbe36f73795dfb5c7,
0xbe2c70ab155167d1,
0x3dca26218cc79400,
0x3db7011269271056,
0xbd54ec138a5f86cd,
0xbd3b98bf6fa2fe47,
0x3cd8c95d73f0c84c,
0x3cb948b2dd021429,
0xbc55bd7c63fa9765,
],
[
0x3fc12f9870d68e18,
0x3ae9cd1ac1fa64f9,
0xbfb12c11811945f9,
0x3f44b638f21f0f76,
0x3f76d2a897d58353,
0xbf10732e5458ba20,
0xbf2835929300df3e,
0x3ec297283816a814,
0x3ecb73adedf11a1f,
0xbe65b455b903b389,
0xbe6353f0797a3bf1,
0x3dfefc9ac10b87d9,
0x3df2853545ffa79d,
0xbd8dd8945079a88f,
0xbd79b28860cd63f8,
0x3d14b29ba2797832,
0x3cfaff02f362ca7e,
0xbc956d8436ee55ed,
0xbc75f881bb0137f5,
],
[
0x3a9900b85a085cfa,
0xbfc0cf3ee98f769b,
0x3f5d26e7af251f79,
0x3f965d05948a946a,
0xbf335959b8482e40,
0xbf51cff175d05c2a,
0x3eeeb59416879104,
0x3efaf7544eeac751,
0xbe9720522bb1fa69,
0xbe97c41261703475,
0x3e343fa0ea5ba663,
0x3e2b5e23abb21a5f,
0xbdc722397b59adb6,
0xbdb62f213532a0b8,
0x3d5294a89e377c98,
0x3d3aac95aead6ada,
0xbcd61abb584f3fc2,
0xbcb87f3345758fd5,
0x3c537a3b70fc94b7,
],
[
0xbfc077eede4a0d89,
0xbae73fb2e67b1968,
0x3fb0751548b2924d,
0xbf423b5d46a73864,
0xbf75e2467c8fb832,
0x3f0cfe5c189d6e4d,
0x3f273bbd8c7aef2c,
0xbec06974d3d04263,
0xbeca6081d36e6a0b,
0x3e6334a83cf5d21d,
0x3e6299571cb4bb1e,
0xbdfb7f5bc046450f,
0xbdf1da63b49ed896,
0x3d8a92885fb339ae,
0x3d78d1d6e93bb23d,
0xbd127ea4434f9fb5,
0xbcfa20dab6b920e9,
0x3c93389d892643a0,
0x3c7550b88147fd02,
],
[
0xba96582ab366c758,
0x3fc0230ba90f2871,
0xbf59ca16f0c9734e,
0xbf9579c1bdbcfc99,
0x3f3120ecfac5c017,
0x3f511dd26bbe2946,
0xbeeb37e7c9a57147,
0xbef9f01e7c19098c,
0x3e94887fe7a88a4d,
0x3e96e3723883fe87,
0xbe3204b644d485a1,
0xbe2a659b13b69c6b,
0x3dc4a40c8498625a,
0x3db56f0212f628e4,
0xbd50a0fef4ac5a44,
0xbd39d1d92cf50973,
0x3cd3d93917ae0666,
0x3cb7c222c421cbf8,
0xbc518e27cddeecfd,
],
[
0x3fbfab0b166d23d8,
0x3ae50cd9856106aa,
0xbfafa65c1ce7ebd6,
0x3f4035bf503ffc1f,
0x3f750d1b04713c41,
0xbf09cd14a92842a1,
0xbf265d504af5d8fe,
0x3ebd3feeb33d9cae,
0x3ec96a257062f750,
0xbe61254f302b04d0,
0xbe61f11585e02bfc,
0x3df89a7674827723,
0x3df13f0ba458182e,
0xbd87d67ae3559fb7,
0xbd78038124810666,
0x3d10a3cffab7b16e,
0x3cf952a4679b4020,
0xbc9158ffabf6b26f,
0xbc74b2e8c7ca451f,
],
[
0x3a9422b204fbf27f,
0xbfbf13fb0c0e6fcd,
0x3f5706ed3d935d00,
0x3f94af74cbd77bef,
0xbf2e9a9e66e5a792,
0xbf507ec9ed824fcb,
0x3ee856d4518ab29c,
0x3ef9040de830648a,
0xbe9262f69c56c4a2,
0xbe9618c94a54555e,
0x3e3029d2c8bd8b0e,
0x3e2983bca06d479e,
0xbdc28e29fd7e309a,
0xbdb4beea8ebaabe8,
0x3d4df87d00b82fa1,
0x3d39076f370434b4,
0xbcd1ef67bd03c16c,
0xbcb7115a994eb5d5,
0x3c4fd28ad1effa7c,
],
[
0xbfbe891b327da16d,
0xbae325aba995f36e,
0x3fae8533ce07bdb8,
0xbf3d1253218e31b0,
0xbf744e6826476498,
0x3f07271a9b5e3cb4,
0x3f2595b697c8ec04,
0xbeba46b03ecb3892,
0xbec88c173e076203,
0x3e5ed9b1754f626a,
0x3e615891ef312cd4,
0xbdf62ca3527c988f,
0xbdf0b14767922479,
0x3d85879b58ff9d05,
0x3d7745bb7346aea9,
0xbd0e20fb122a7c2a,
0xbcf89354d05f7b8c,
0x3c8f7da941b1f5c8,
0x3c741ef462ba56a3,
],
[
0xba923c4506ec812e,
0x3fbe018dac1c17e3,
0xbf54b994dd05c1fb,
0xbf93f9e0db07e7ef,
0x3f2b8e55b75b13ab,
0x3f4fdf68a78bb3d2,
0xbee5ee9d17106a07,
0xbef82ee6dfdfedd8,
0x3e90962d7f6d601e,
0x3e9560edce7d4b08,
0xbe2d34381cff4d39,
0xbe28b5a0e715b17d,
0x3dc0cab951aa6b0b,
0x3db41d218be1cc79,
0xbd4b2d2a51de7089,
0xbd384c06737cf8d9,
0x3cd04c96e113bd0a,
0x3cb66c4b3dc74284,
0xbc4cfe4908cedcb8,
],
[
0x3fbd84391bb2748d,
0x3ae17f232c3596e7,
0xbfad80edb3c4ea05,
0x3f3a431f5421f7ef,
0x3f73a282fe7b63a8,
0xbf04ed653e607bdc,
0xbf24e15832bda3c4,
0x3eb7c5832dd13719,
0x3ec7c2b80da8df7e,
0xbe5bf160ccd30135,
0xbe60cda2141b0afb,
0x3df41d31d25837e8,
0x3df02f6ca822716e,
0xbd83901f0d351db4,
0xbd7696cdd91c27b7,
0x3d0b6e1487e46414,
0x3cf7e1bb61dba0fc,
0xbc8cbb194749390a,
0xbc7394699d841c82,
],
[
0x3a909e71c2163ed3,
0xbfbd09b21e36c0bd,
0x3f52c84acfb586b4,
0x3f9355b904fbf7ee,
0xbf28fb570465af0d,
0xbf4edc3292ba6cfd,
0x3ee3e552ee8c2575,
0x3ef76d44f6a83510,
0xbe8e1ee2dc4a3910,
0xbe94b944bbd4ab57,
0x3e2a8c3a4ce04c1b,
0x3e27f8c9ef133245,
0xbdbe92957079603b,
0xbdb388138abed52f,
0x3d48c7ca6d80cb32,
0x3d379e45475751e8,
0xbccdc773f7abde68,
0xbcb5d2331c76adb9,
0x3c4a8aa5ca753696,
],
[
0xbfbc97d79918527d,
0xbae4d780a21f2057,
0x3fac95081ab2b511,
0xbf37e0b14f7d7c3f,
0xbf730688f6836a76,
0x3f030941f6e78e36,
0x3f243d5898657a6f,
0xbeb5a39a94f2ad1c,
0xbec70b18406146b4,
0x3e597607f952cd69,
0x3e604e788f4f18d9,
0xbdf2598918fbb9ee,
0xbdef701541acccd2,
0x3d81df5bcb47b899,
0x3d75f524c73bc009,
0xbd0919aede2dcbf2,
0xbcf73c65c9ce91ed,
0x3c8a553df0f1b569,
0x3c72fe9f3f0dcae2,
],
];

78
vendor/pxfm/src/bits.rs vendored Normal file
View File

@@ -0,0 +1,78 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[inline]
pub(crate) const fn get_exponent_f32(x: f32) -> i32 {
let bits = x.to_bits();
(((bits >> 23) & 0xFF) as i32).wrapping_sub(127)
}
#[inline]
pub(crate) const fn mantissa_f32(x: f32) -> u32 {
x.to_bits() & ((1u32 << 23) - 1)
}
#[inline]
pub(crate) const fn mantissa_f64(x: f64) -> u64 {
x.to_bits() & ((1u64 << 52) - 1)
}
#[inline]
pub(crate) const fn get_exponent_f64(x: f64) -> i64 {
((x.to_bits() as i64 & EXP_MASK as i64) >> 52).wrapping_sub(1023)
}
#[inline]
pub(crate) const fn biased_exponent_f64(x: f64) -> i64 {
(x.to_bits() as i64 & EXP_MASK as i64) >> 52
}
#[inline]
pub(crate) const fn mask_trailing_ones(len: u64) -> u64 {
if len >= 64 {
u64::MAX
} else {
(1u64 << len).wrapping_sub(1)
}
}
pub(crate) const EXP_MASK: u64 = mask_trailing_ones(11) << 52;
#[inline]
pub(crate) fn set_exponent_f64(x: u64, new_exp: u64) -> u64 {
let encoded_mask = new_exp.wrapping_shl(52) & EXP_MASK;
x ^ ((x ^ encoded_mask) & EXP_MASK)
}
#[inline]
pub(crate) const fn min_normal_f32(sign: bool) -> f32 {
let sign_bit = if sign { 1u32 << 31 } else { 0 };
let exponent = 1u32 << 23;
f32::from_bits(sign_bit | exponent)
}

136
vendor/pxfm/src/ceil.rs vendored Normal file
View File

@@ -0,0 +1,136 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::{get_exponent_f32, get_exponent_f64};
#[inline]
pub const fn ceilf(x: f32) -> f32 {
// If x is infinity NaN or zero, return it.
if !x.is_normal() {
return x;
}
let is_neg = x.is_sign_negative();
let exponent = get_exponent_f32(x);
// If the exponent is greater than the most negative mantissa
// exponent, then x is already an integer.
const FRACTION_LENGTH: u32 = 23;
if exponent >= FRACTION_LENGTH as i32 {
return x;
}
if exponent <= -1 {
return if is_neg { -0.0 } else { 1.0 };
}
let trim_size = (FRACTION_LENGTH as i32).wrapping_sub(exponent);
let x_u = x.to_bits();
let trunc_u = x_u
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
// If x is already an integer, return it.
if trunc_u == x_u {
return x;
}
let trunc_value = f32::from_bits(trunc_u);
// If x is negative, the ceil operation is equivalent to the trunc operation.
if is_neg {
return trunc_value;
}
trunc_value + 1.0
}
#[inline]
pub const fn ceil(x: f64) -> f64 {
// If x is infinity NaN or zero, return it.
if !x.is_normal() {
return x;
}
let is_neg = x.is_sign_negative();
let exponent = get_exponent_f64(x);
// If the exponent is greater than the most negative mantissa
// exponent, then x is already an integer.
const FRACTION_LENGTH: u64 = 52;
if exponent >= FRACTION_LENGTH as i64 {
return x;
}
if exponent <= -1 {
return if is_neg { -0.0 } else { 1.0 };
}
let trim_size = (FRACTION_LENGTH as i64).wrapping_sub(exponent);
let x_u = x.to_bits();
let trunc_u = x_u
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
// If x is already an integer, return it.
if trunc_u == x_u {
return x;
}
let trunc_value = f64::from_bits(trunc_u);
// If x is negative, the ceil operation is equivalent to the trunc operation.
if is_neg {
return trunc_value;
}
trunc_value + 1.0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ceilf() {
assert_eq!(ceilf(0.0), 0.0);
assert_eq!(ceilf(10.0), 10.0);
assert_eq!(ceilf(10.1), 11.0);
assert_eq!(ceilf(-9.0), -9.0);
assert_eq!(ceilf(-9.5), -9.0);
}
#[test]
fn test_ceil() {
assert_eq!(ceil(0.0), 0.0);
assert_eq!(ceil(10.0), 10.0);
assert_eq!(ceil(10.1), 11.0);
assert_eq!(ceil(-9.0), -9.0);
assert_eq!(ceil(-9.5), -9.0);
}
}

405
vendor/pxfm/src/common.rs vendored Normal file
View File

@@ -0,0 +1,405 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::EXP_MASK;
use num_traits::MulAdd;
use std::ops::{Add, Mul};
#[inline]
pub(crate) fn is_integerf(x: f32) -> bool {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
))]
{
x.round_ties_even() == x
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
)))]
{
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK_F32) >> 23;
let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
const UNIT_EXPONENT: u32 = E_BIAS + 23;
x_e + lsb >= UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_odd_integerf(x: f32) -> bool {
#[cfg(target_arch = "aarch64")]
{
(x as i32 & 1) != 0
}
#[cfg(not(target_arch = "aarch64"))]
{
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK_F32) >> 23;
let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
const UNIT_EXPONENT: u32 = E_BIAS + 23;
x_e + lsb == UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_integer(n: f64) -> bool {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
))]
{
n == n.round_ties_even()
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
)))]
{
let x_u = n.to_bits();
let x_e = (x_u & EXP_MASK) >> 52;
let lsb = (x_u | EXP_MASK).trailing_zeros();
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const UNIT_EXPONENT: u64 = E_BIAS + 52;
x_e + lsb as u64 >= UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_odd_integer(x: f64) -> bool {
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK) >> 52;
let lsb = (x_u | EXP_MASK).trailing_zeros();
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const UNIT_EXPONENT: u64 = E_BIAS + 52;
x_e + lsb as u64 == UNIT_EXPONENT
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
#[inline(always)]
pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
acc: T,
a: T,
b: T,
) -> T {
MulAdd::mul_add(a, b, acc)
}
#[inline(always)]
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
acc: T,
a: T,
b: T,
) -> T {
acc + a * b
}
#[inline]
pub(crate) const fn rintfk(x: f32) -> f32 {
(if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
}
#[inline(always)]
pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
c + a * b
}
#[inline(always)]
pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f32::mul_add(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
a * b + c
}
}
/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
#[inline(always)]
pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f64::mul_add(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
a * b + c
}
}
#[inline(always)]
pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
c + a * b
}
/// Executes mandatory FMA
/// if not available will be simulated through Dekker and Veltkamp
#[inline(always)]
pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmla(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::double_double::DoubleDouble;
DoubleDouble::dd_f64_mul_add(a, b, c)
}
}
// Executes mandatory FMA
// if not available will be simulated through dyadic float 128
#[inline(always)]
pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmla(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::dyadic_float::DyadicFloat128;
let z = DyadicFloat128::new_from_f64(a);
let k = DyadicFloat128::new_from_f64(b);
let p = z * k + DyadicFloat128::new_from_f64(c);
p.fast_as_f64()
}
}
// Executes mandatory FMA
// if not available will be simulated through Dekker and Veltkamp
#[inline(always)]
#[allow(unused)]
pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmlaf(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
(a as f64 * b as f64 + c as f64) as f32
}
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn c_mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
a: T,
b: T,
c: T,
) -> T {
mlaf(c, a, b)
}
/// Copies sign from `y` to `x`
#[inline]
pub const fn copysignfk(x: f32, y: f32) -> f32 {
f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
}
// #[inline]
// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
// (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
// }
//
// #[inline]
// // Founds a in x=a+𝑛ln(2)
// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
// f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
// }
#[inline]
pub(crate) const fn pow2if(q: i32) -> f32 {
f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
}
/// Round towards whole integral number
#[inline]
pub(crate) const fn rintk(x: f64) -> f64 {
(if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
}
/// Computes 2^n
#[inline(always)]
pub(crate) const fn pow2i(q: i32) -> f64 {
f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
}
// #[inline]
// pub(crate) const fn ilogb2k(d: f64) -> i32 {
// (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
// }
//
// #[inline]
// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
// f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
// }
/// Copies sign from `y` to `x`
#[inline]
pub const fn copysignk(x: f64, y: f64) -> f64 {
f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
}
#[inline]
pub(crate) const fn min_normal_f64() -> f64 {
let exponent_bits = 1u64 << 52;
let bits = exponent_bits;
f64::from_bits(bits)
}
#[inline]
const fn mask_trailing_ones_u32(len: u32) -> u32 {
if len >= 32 {
u32::MAX // All ones if length is 64 or more
} else {
(1u32 << len).wrapping_sub(1)
}
}
pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
#[inline]
pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_integer() {
assert_eq!(is_integer(5.), true);
assert_eq!(is_integer(6.), true);
assert_eq!(is_integer(6.01), false);
assert_eq!(is_odd_integer(5.), true);
assert_eq!(is_odd_integer(6.), false);
assert_eq!(is_odd_integer(6.01), false);
assert_eq!(is_integerf(5.), true);
assert_eq!(is_integerf(6.), true);
assert_eq!(is_integerf(6.01), false);
assert_eq!(is_odd_integerf(5.), true);
assert_eq!(is_odd_integerf(6.), false);
assert_eq!(is_odd_integerf(6.01), false);
}
}

485
vendor/pxfm/src/compound/compound_d.rs vendored Normal file
View File

@@ -0,0 +1,485 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::{log1p_f64_dyadic, log1p_fast_dd};
use crate::pow_exec::{exp_dyadic, pow_exp_dd};
use crate::triple_double::TripleDouble;
/// Computes (1+x)^y
///
pub fn f_compound(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ay = y.to_bits() & 0x7fff_ffff_ffff_ffff;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 1.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 1.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 1.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { 0.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { 0.0 };
}
match y_a {
0x3fe0_0000_0000_0000 => {
// TODO: speed up x^(-1/2) with rsqrt(x) when available.
if x == 0.0 {
return 1.0;
}
let z = DoubleDouble::from_full_exact_add(x, 1.0).sqrt();
return if y_sign {
z.recip().to_f64()
} else {
z.to_f64()
};
}
0x3ff0_0000_0000_0000 => {
return if y_sign {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z = DyadicFloat128::new_from_f64(x) + ONES;
z.reciprocal().fast_as_f64()
} else {
DoubleDouble::from_full_exact_add(x, 1.0).to_f64()
};
}
0x4000_0000_0000_0000 => {
let z0 = DoubleDouble::from_full_exact_add(x, 1.0);
let z = DoubleDouble::quick_mult(z0, z0);
return if y_sign {
z.recip().to_f64()
} else {
f64::copysign(z.to_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
0.0
};
}
// y is finite and non-zero.
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// compound(1, y) = 1
return 2.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
let min_abs = f64::min(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let max_abs = f64::max(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let min_exp = min_abs.wrapping_shr(52);
let max_exp = max_abs.wrapping_shr(52);
if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
let scale_up = min_exp < 128u64;
let scale_down = max_exp > 0x7ffu64 - 128u64;
// At least one input is denormal, multiply both numerator and denominator
// then will go with hard path
if scale_up || scale_down {
return compound_accurate(x, y, s);
}
}
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
// y is integer and in [-102;102] and |x|<2^10
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return compound_accurate(x, y, s);
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_exp_dd(r, s);
let res_min = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), -res.hi, res.lo);
let res_max = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), res.hi, res.lo);
if res_min == res_max {
return res_max;
}
compound_accurate(x, y, s)
}
#[cold]
fn compound_accurate(x: f64, y: f64, s: f64) -> f64 {
/* the idea of returning res_max instead of res_min is due to Laurent
Théry: it is better in case of underflow since res_max = +0 always. */
let f_y = DyadicFloat128::new_from_f64(y);
let r = log1p_f64_dyadic(x) * f_y;
let mut result = exp_dyadic(r);
// 2^R.ex <= R < 2^(R.ex+1)
/* case R < 2^-1075: underflow case */
if result.exponent < -1075 {
return 0.5 * (s * f64::from_bits(0x0000000000000001));
}
if result.exponent >= 1025 {
return 1.0;
}
result.sign = if s == -1.0 {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
result.fast_as_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
let mut s = TripleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
TripleDouble::new(0., 0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = TripleDouble::quick_mult(s, s);
if iter_count % 2 != 0 {
acc = TripleDouble::quick_mult(acc, s);
}
}
if y.is_sign_negative() {
acc.recip().to_f64()
} else {
acc.to_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound() {
assert_eq!(f_compound(4831835136., -13.),0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012780345669344118 );
assert_eq!(
f_compound(11468322278342656., 2.9995136260713475),
1481455956234813000000000000000000000000000000000.
);
assert_eq!(f_compound(0.9999999999999999, 3.), 7.999999999999999);
assert_eq!(
f_compound(1.0039215087890625, 10.000000000349134),
1044.2562119607103
);
assert_eq!(f_compound(10., 18.0), 5559917313492231000.0);
assert_eq!(
f_compound(131071.65137729312, 2.000001423060894),
17180328027.532265
);
assert_eq!(f_compound(2., 5.), 243.);
assert_eq!(f_compound(126.4324324, 126.4324324), 1.4985383310514043e266);
assert_eq!(f_compound(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound(0.0, 0.0), 1.0);
assert_eq!(f_compound(0.0, -1. / 2.), 1.0);
assert_eq!(f_compound(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound(f64::INFINITY, -1. / 2.), 0.0);
assert_eq!(f_compound(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound(46.3828125, 46.3828125), 5.248159634773675e77);
}
#[test]
fn test_compound_exotic_cases() {
assert_eq!(f_compound(0.9999999850987819, -1.), 0.5000000037253046);
assert_eq!(
f_compound(22427285907987670000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1.),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004458854290718438
);
assert_eq!(f_compound(0.786438105629145, 607.999512419221),
1616461095392737200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound( 1.0000002381857613, 960.8218657970428),
17228671476562465000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound(1., 1.0000000000000284), 2.);
assert_eq!(f_compound(1., f64::INFINITY), f64::INFINITY);
assert_eq!(
f_compound(10.000000000000007, -8.),
0.00000000466507380209731
);
}
}

573
vendor/pxfm/src/compound/compound_m1.rs vendored Normal file
View File

@@ -0,0 +1,573 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log1p_fast_dd;
use crate::pow_exec::pow_expm1_1;
/// Computes (1+x)^y - 1
///
/// max found ULP 0.56
pub fn f_compound_m1(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 0.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 0.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 0.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { -1.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { -1.0 };
}
match y_a {
// 0x3fe0_0000_0000_0000 => {
// if x == 0.0 {
// return 0.0;
// }
// let z = Dekker::from_full_exact_add(x, 1.0).sqrt();
// if y_sign {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z.reciprocal() + M_ONES).fast_as_f64()
// } else {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z + M_ONES).fast_as_f64()
// };
// }
0x3ff0_0000_0000_0000 => {
return if y_sign {
let z = DyadicFloat128::new_from_f64(x);
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let p = (z + ONES).reciprocal() + M_ONES;
p.fast_as_f64()
} else {
x
};
}
0x4000_0000_0000_0000 => {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z0 = DyadicFloat128::new_from_f64(x) + ONES;
let z = z0 * z0;
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
return if y_sign {
(z.reciprocal() + M_ONES).fast_as_f64()
} else {
f64::copysign((z + M_ONES).fast_as_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x_a == 0x3ff0_0000_0000_0000 {
// pow(+-1, +-Inf) = 1.0
return 0.0;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
-1.0
};
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return -1.0;
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -1.0 } else { 1.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return -1.;
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
}
// evaluate (1+x)^y explicitly for integer y in [-1024,1024] range and |x|<2^64
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let mut dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
dz = DoubleDouble::full_add_f64(dz, -1.);
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
// approximate log1p(x)
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return 0.;
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_expm1_1(r, s);
res.to_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
const ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let mut s = DyadicFloat128::new_from_f64(x) + ONE;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { ONE };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc = acc * s;
}
}
if y.is_sign_negative() {
(acc.reciprocal() + M_ONE).fast_as_f64()
} else {
(acc + M_ONE).fast_as_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound_exotic() {
assert_eq!(
f_compound_m1(0.000152587890625, -8.484374999999998),
-0.0012936766014690006
);
assert_eq!(
f_compound_m1(
0.00000000000000799360578102344,
-0.000000000000000000000001654361225106131
),
-0.000000000000000000000000000000000000013224311452909338
);
assert_eq!(
f_compound_m1( 4.517647064592699, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055329046628180653),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009449932890153435
);
assert_eq!(f_compound_m1(
11944758478933760000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1242262631503757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
), -1.);
}
#[test]
fn test_compound_m1() {
assert_eq!(
f_compound_m1(0.0000000000000009991998751296936, -4.),
-0.000000000000003996799500518764
);
assert_eq!(f_compound_m1(-0.003173828125, 25.), -0.0763960132649781);
assert_eq!(f_compound_m1(3., 2.8927001953125), 54.154259038961406);
assert_eq!(
f_compound_m1(-0.43750000000000044, 19.),
-0.9999821216263793
);
assert_eq!(
f_compound_m1(127712., -2.0000000000143525),
-0.9999999999386903
);
assert_eq!(
f_compound_m1(-0.11718749767214207, 2893226081485815000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(2418441935074801400000000., 512.),
f64::INFINITY
);
assert_eq!(
f_compound_m1(32.50198364245834, 128000.00000000093),
f64::INFINITY
);
assert_eq!(
f_compound_m1(1.584716796877785, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004168916810703412),
0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003958869879428553
);
assert_eq!(
f_compound_m1(
-0.000000000000000000000000000000001997076793037533,
366577337071337140000000000000000f64
),
-0.5190938261758579
);
assert_eq!(f_compound_m1(2.1075630259863374, 0.5), 00.7628281328553664);
assert_eq!(f_compound_m1(2.1078916412661783, 0.5), 0.7629213372315222);
assert_eq!(f_compound_m1(3.0000000000001115, -0.5), -0.500000000000007);
assert_eq!(
f_compound_m1(0.0004873839215895903, 3.),
0.0014628645098045245
);
assert_eq!(f_compound_m1(-0.483765364602732, 3.), -0.862424399516842);
assert_eq!(f_compound_m1(3.0000001192092896, -2.), -0.9375000037252902);
assert_eq!(f_compound_m1(29.38323424607434, -1.), -0.9670871115332561);
assert_eq!(f_compound_m1(-0.4375, 4.), -0.8998870849609375);
assert_eq!(
f_compound_m1(-0.0039033182037826464, 3.),
-0.011664306402886494
);
assert_eq!(
f_compound_m1(0.000000000000000000000000000000000000007715336350455947,
-262034087537726030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.,
);
assert_eq!(f_compound_m1(10.000000059604645, 10.), 25937426005.44638);
assert_eq!(f_compound_m1(10., -308.25471555814863), -1.0);
assert_eq!(
f_compound_m1(5.4172231599824623E-312, 9.4591068440831498E+164),
5.124209266851586e-147
);
assert_eq!(
f_compound_m1(5.8776567263633397E-39, 3.4223548116804511E-310),
0.0
);
assert_eq!(
f_compound_m1(5.8639503496997932E-148, -7.1936801558778956E+305),
0.0
);
assert_eq!(
f_compound_m1(0.9908447265624999,
-19032028850336152000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006952247559980936,
5069789834563405000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
3.524643400695958e-163
);
assert_eq!(
f_compound_m1(1.000000000000341,
-69261261804788370000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(
0.0000000000000001053438024827798,
0.0000000000000001053438024827798
),
0.000000000000000000000000000000011097316721530923
);
assert_eq!(
f_compound_m1(
0.00000000000000010755285551056508,
0.00000000000000010755285551056508
),
0.00000000000000000000000000000001156761672847649
);
assert_eq!(f_compound_m1(2.4324324, 1.4324324), 4.850778380908823);
assert_eq!(f_compound_m1(2., 5.), 242.);
assert_eq!(f_compound_m1(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound_m1(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound_m1(0.0, 0.0), 0.0);
assert_eq!(f_compound_m1(0.0, -1. / 2.), 0.0);
assert_eq!(f_compound_m1(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(f64::INFINITY, -1. / 2.), -1.0);
assert_eq!(f_compound_m1(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(46.3828125, 46.3828125), 5.248159634773675e77);
}
}

438
vendor/pxfm/src/compound/compound_m1f.rs vendored Normal file
View File

@@ -0,0 +1,438 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, compoundf_exp2_poly2, compoundf_log2p1_accurate,
compoundf_log2p1_fast,
};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1_accurate_tiny;
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
// INVLOG2 = 1/log(2) * (1 + eps1) with |eps1| < 2^-55.976
const INVLOG2: f64 = f64::from_bits(0x3ff71547652b82fe);
#[cold]
#[inline(never)]
fn as_compoundm1f_special(x: f32, y: f32) -> f32 {
let nx = x.to_bits();
let ny = y.to_bits();
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ay == 0 {
// x or y is 0
if ax == 0 {
// compound(0,y) = 1 except for y = sNaN
return if y.is_nan() { x + y } else { 0.0 };
}
if ay == 0 {
// compound (x, 0)
if x.is_nan() {
return x + y;
} // x = sNaN
return if x < -1.0 {
f32::NAN // rule (g)
} else {
0.0
}; // rule (a)
}
}
let mone = (-1.0f32).to_bits();
if ay >= 0xffu32 << 24 {
// y=Inf/NaN
// the case x=0 was already checked above
if ax > 0xffu32 << 24 {
return x + y;
} // x=NaN
if ay == 0xffu32 << 24 {
// y = +/-Inf
if nx > mone {
return f32::NAN;
} // rule (g)
let sy = ny >> 31; // sign bit of y
if nx == mone {
return if sy == 0 {
-1. // Rule (c)
} else {
f32::INFINITY // Rule (b)
};
}
if x < 0.0 {
return if sy == 0 { -1. } else { f32::INFINITY };
}
if x > 0.0 {
return if sy != 0 { -1. } else { f32::INFINITY };
}
return 0.0;
}
return x + y; // case y=NaN
}
if nx >= mone || nx >= 0xffu32 << 23 {
// x is Inf, NaN or <= -1
if ax == 0xffu32 << 24 {
// x is +Inf or -Inf
if (nx >> 31) != 0 {
return f32::NAN;
} // x = -Inf, rule (g)
// (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
return (if (ny >> 31) != 0 { 1.0 / x } else { x }) - 1.;
}
if ax > 0xffu32 << 24 {
return x + y;
} // x is NaN
if nx > mone {
return f32::NAN; // x < -1.0: rule (g)
}
// now x = -1
return if (ny >> 31) != 0 {
// y < 0
f32::INFINITY
} else {
// y > 0
-1.0
};
}
-1.
}
/* for |z| <= 2^-6, returns an approximation of 2^z
with absolute error < 2^-43.540 */
#[inline]
pub(crate) fn compoundf_expf_poly(z: f64) -> f64 {
/* Q is a degree-4 polynomial generated by Sollya (cf compoundf_expf.sollya)
with absolute error < 2^-43.549 */
const Q: [u64; 5] = [
0x3fe62e42fefa39ef,
0x3fcebfbdff8098eb,
0x3fac6b08d7045dc3,
0x3f83b2b276ce985d,
0x3f55d8849c67ace4,
];
let z2 = z * z;
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
dd_fmla(c2, z2, c0) * z
}
/* return the correct rounding of (1+x)^y, otherwise -1.0
where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
assuming 0x1.7154759a0df53p-24 <= |t| <= 150
exact is non-zero iff (1+x)^y is exact or midpoint */
fn exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe { ((k.to_int_unchecked::<i64>() as u64).wrapping_add(0x3ffu64)) << 52 }; // k is already integer
s *= f64::from_bits(su);
let q_poly = compoundf_expf_poly(r);
v = q_poly.to_bits();
/* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
bounded by ulp(v) + 2^0.5s * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
< 2^-40.849. */
let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(f64::from_bits(v), s, s - 1f64).to_bits();
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(f64::from_bits(v), s);
v = DoubleDouble::add(z, p0).to_f64().to_bits();
}
// in case of potential underflow, we defer to the accurate path
if f64::from_bits(v) < f64::from_bits(0x3d61d00000000000) {
return -1.0;
}
err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
if lb != rb {
return -1.0;
} // rounding test failed
f64::from_bits(v)
}
fn compoundf_exp2m1_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
if y == 1.0 {
let res = x;
return res;
}
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
// if x_dd.hi.abs() <= f64::from_bits(0x3fc0000000000000u64) {
// /* the relative error between h and y*log2(1+x) is bounded by
// (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
// 2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
// The above threshold is such that h*(1+2^-48.444) < H0. */
// return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
// }
let k = x_dd.hi.round_ties_even_finite(); // |k| <= 150
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
/* the relative error between h and y*log2(1+x) is bounded by
(1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
The above threshold is such that h*(1+2^-48.444) < H0. */
// let z0 = 1.0 + x_dd.hi * 0.5;
// let k = Dekker::from_exact_sub(z0, 1.);
// return k.to_f64() as f32;
return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
}
let r = x_dd.hi - k; // |r| <= 1/2, exact
// since r is an integer multiple of ulp(h), fast_two_sum() below is exact
let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
// h is near (i-16)/2^5
v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |h| <= 2^-6
// 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
let q = compoundf_exp2_poly2(v_dd);
/* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
|qh + ql - 2^(h+l)| < 2^-85.210 */
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
let mut q = DoubleDouble::quick_mult(exp2u, q);
q = DoubleDouble::from_exact_add(q.hi, q.lo);
let mut du = unsafe {
k.to_int_unchecked::<i64>()
.wrapping_add(0x3ff)
.wrapping_shl(52) as u64
};
du = f64::from_bits(du).to_bits();
let scale = f64::from_bits(du);
q.hi *= scale;
q.lo *= scale;
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(q.hi, -1.0);
q.lo += zf.lo;
q.hi = zf.hi;
v = q.to_f64().to_bits();
f64::from_bits(v) as f32
}
// at input, exact is non-zero iff (1+x)^y is exact
// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
#[cold]
#[inline(never)]
fn compoundm1f_accurate(x: f32, y: f32) -> f32 {
let mut v = compoundf_log2p1_accurate(x as f64);
v = DoubleDouble::quick_mult_f64(v, y as f64);
compoundf_exp2m1_accurate(v, x, y)
}
/// Computes compound (1.0 + x)^y - 1
///
/// Max ULP 0.5
#[inline]
pub fn f_compound_m1f(x: f32, y: f32) -> f32 {
/* Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let mone = (-1.0f32).to_bits();
let nx = x.to_bits();
let ny = y.to_bits();
if nx >= mone {
return as_compoundm1f_special(x, y);
} // x <= -1
// now x > -1
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
return as_compoundm1f_special(x, y);
} // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
// evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
if ax <= 0x62000000u32 {
return 1.0 + y * x;
} // does it work for |x|<2^-29 and |y|<=16?
let mut s = x as f64 + 1.;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc *= s;
}
}
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
return DoubleDouble::from_full_exact_add(dz, -1.).to_f64() as f32;
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = if ax < 0x62000000u32 {
// |x| < 2^-29
/* |log2(1+x) - 1/log(2) * (x - x^2/2)| < 2^-59.584 * |log2(1+x)|
(cf compoundf.sollya) */
let t = xd - (xd * xd) * 0.5;
/* since x is epresentable in binary32, x*x is exact, and so is (x * x) * 0.5.
Thus the only error in the computation of t is the final rounding, which
is bounded by ulp(t): t = (x - x^2/2) * (1 + eps2) with |eps2| < 2^-52
*/
INVLOG2 * t
/* since INVLOG2 = 1/log(2) * (1 + eps1) and
and t = (x - x^2/2) * (1 + eps2)
let u = o(INVLOG2 * t) then u = INVLOG2 * t * (1 + eps3) with |eps3|<2^-53
thus u = 1/log(2) * (x - x^2/2) * (1 + eps1)*(1 + eps2)*(1 + eps3)
= 1/log(2) * (x - x^2/2) * (1 + eps4) with |eps4| < 2^-50.954
Now Sollya says the relative error by approximating log2(1+x) by
1/log(2) * (x - x^2/2) for |x| < 2^-29 is bounded by 2^-59.584
(file compoundf.sollya), thus:
u = log2(1+x) * (1+eps4)*(1+eps5) with |eps5| < 2^-59.584
= log2(1+x) * (1+eps6) with |eps6| < 2^-50.950 */
} else {
compoundf_log2p1_fast(f64::from_bits(tx))
};
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let t: u64 = (l * f64::from_bits(ty)).to_bits();
/* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = exp2m1_fast(f64::from_bits(t));
if res != -1.0 {
return res as f32;
}
compoundm1f_accurate(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::compound::compound_m1f::{compoundf_exp2m1_accurate, exp2m1_fast};
use crate::double_double::DoubleDouble;
#[test]
fn test_compoundf() {
assert_eq!(
f_compound_m1f(-0.000000000000001191123, -0.000000000000001191123),
0.0000000000000000000000000000014187741
);
assert_eq!(f_compound_m1f(-0.000000000000001191123, 16.), 1.0);
assert_eq!(f_compound_m1f(0.91123, 16.), 31695.21);
assert_eq!(f_compound_m1f(0.91123, -16.), -0.99996847);
}
#[test]
fn test_compoundf_expm1_fast() {
assert_eq!(exp2m1_fast(3.764), 12.585539943149435);
}
#[test]
fn test_compoundf_expm1_accurate() {
assert_eq!(
compoundf_exp2m1_accurate(DoubleDouble::new(0., 2.74), 12., 53.),
5.680703,
);
}
}

1000
vendor/pxfm/src/compound/compoundf.rs vendored Normal file

File diff suppressed because it is too large Load Diff

41
vendor/pxfm/src/compound/mod.rs vendored Normal file
View File

@@ -0,0 +1,41 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod compound_d;
mod compound_m1;
mod compound_m1f;
mod compoundf;
mod powm1;
mod powm1f;
pub use compound_d::f_compound;
pub use compound_m1::f_compound_m1;
pub use compound_m1f::f_compound_m1f;
pub use compoundf::f_compoundf;
pub use powm1::f_powm1;
pub use powm1f::f_powm1f;

224
vendor/pxfm/src/compound/powm1.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXPM1_T0, EXPM1_T1, ldexp};
use crate::pow_exec::pow_log_1;
use crate::round_ties_even::RoundTiesEven;
/// Computes x^y - 1
pub fn f_powm1(x: f64, y: f64) -> f64 {
let ax: u64 = x.to_bits().wrapping_shl(1);
let ay: u64 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0x7ffu64 << 53 || ay == 0 || ay >= 0x7ff64 << 53 {
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f64::INFINITY;
} else if y > 0.0 {
f64::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f64::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integer(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f64::INFINITY
} else {
f64::NEG_INFINITY
};
pow - 1.0
} else {
f64::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f64::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f64::INFINITY
}
} else {
// |x| == 1
f64::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f64::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integer(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f64::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integer(y) {
negative_parity = true;
}
}
let (mut l, _) = pow_log_1(x);
l = DoubleDouble::from_exact_add(l.hi, l.lo);
let r = DoubleDouble::quick_mult_f64(l, y);
if r.hi < -37.42994775023705 {
// underflow
return -1.;
}
let res = powm1_expm1_1(r);
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
DoubleDouble::full_add_f64(-res, -2.).to_f64()
} else {
res.to_f64()
}
}
#[inline]
pub(crate) fn powm1_expm1_1(r: DoubleDouble) -> DoubleDouble {
let ax = r.hi.to_bits() & 0x7fffffffffffffffu64;
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
if ax <= 0x3f80000000000000 {
// |x| < 2^-7
if ax < 0x3970000000000000 {
// |x| < 2^-104
return r;
}
let d = crate::pow_exec::expm1_poly_dd_tiny(r);
return d;
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).round_ties_even_finite();
let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
let tbh = DoubleDouble::quick_mult(t1, t0);
let mut de = tbh;
// exp(k)=2^k*exp(r) + (2^k - 1)
let q = crate::pow_exec::expm1_poly_fast(z);
de = DoubleDouble::quick_mult(de, q);
de = DoubleDouble::add(tbh, de);
let ie = mk - 0x3ff;
let off: f64 = f64::from_bits((2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64);
let e: f64;
if ie < 53 {
let fhz = DoubleDouble::from_exact_add(off, de.hi);
de.hi = fhz.hi;
e = fhz.lo;
} else if ie < 104 {
let fhz = DoubleDouble::from_exact_add(de.hi, off);
de.hi = fhz.hi;
e = fhz.lo;
} else {
e = 0.;
}
de.lo += e;
de.hi = ldexp(de.to_f64(), ie as i32);
de.lo = 0.;
de
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1() {
assert_eq!(f_powm1(f64::INFINITY, f64::INFINITY), f64::INFINITY);
assert_eq!(f_powm1(50850368932909610000000000., 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000023201985303960773), 1.3733470789307166e-303);
assert_eq!(f_powm1(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1(1.83329e-40, 2.4645883e-32), -2.255031542428047e-30);
assert_eq!(f_powm1(3., 2.), 8.);
assert_eq!(f_powm1(3., 3.), 26.);
assert_eq!(f_powm1(5., 2.), 24.);
assert_eq!(f_powm1(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1(-5., 2.), 24.);
assert_eq!(f_powm1(-5., 3.), -126.);
assert_eq!(
f_powm1(196560., 0.000000000000000000000000000000000000001193773),
1.4550568430468268e-38
);
}
}

260
vendor/pxfm/src/compound/powm1f.rs vendored Normal file
View File

@@ -0,0 +1,260 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compound_m1f::compoundf_expf_poly;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
#[inline]
fn powm1f_log2_fast(x: f64) -> f64 {
/* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
for x < 0, 1+x is exact when -1 < x <= 2^-30 */
// double u = (x >= 0x1p53) ? x : 1.0 + x;
/* For x < 0x1p53, x + 1 is exact thus u = x+1.
For x >= 2^53, we estimate log2(x) instead of log2(1+x),
since log2(1+x) = log2(x) + log2(1+1/x),
log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
error is bounded by 2^-52.471/53 < 2^-58.198 */
let mut v = x.to_bits();
let m: u64 = v & 0xfffffffffffffu64;
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
// 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
v = v.wrapping_sub((e << 52) as u64);
let t = f64::from_bits(v);
// u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
// thus log2(u) = e + log2(t)
v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
// we approximates log2(t) by -log2(r) + log2(r*t)
let p = crate::compound::compoundf::log2p1_polyeval_1(z);
// p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}
/// Computes x^y - 1
pub fn f_powm1f(x: f32, y: f32) -> f32 {
let ax: u32 = x.to_bits().wrapping_shl(1);
let ay: u32 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
if x.is_nan() || y.is_nan() {
return f32::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f32::INFINITY;
} else if y > 0.0 {
f32::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integerf(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f32::INFINITY
} else {
f32::NEG_INFINITY
};
pow - 1.0
} else {
f32::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f32::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f32::INFINITY
}
} else {
// |x| == 1
f32::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f32::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integerf(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f32::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integerf(y) {
negative_parity = true;
}
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let dt = l * f64::from_bits(ty);
let t: u64 = dt.to_bits();
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return -1.;
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = powm1_exp2m1_fast(f64::from_bits(t));
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
(-res - 2.) as f32
} else {
res as f32
}
}
#[inline]
pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe {
k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
};
s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
let q_poly = compoundf_expf_poly(r);
v = q_poly;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(v, s, s - 1f64);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::double_double::DoubleDouble;
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(v, s);
v = DoubleDouble::add(z, p0).to_f64();
}
v
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1f() {
assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1f(3., 2.), 8.);
assert_eq!(f_powm1f(3., 3.), 26.);
assert_eq!(f_powm1f(5., 2.), 24.);
assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1f(-5., 2.), 24.);
assert_eq!(f_powm1f(-5., 3.), -126.);
assert_eq!(
f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
1.455057e-38
);
assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
}
}

219
vendor/pxfm/src/cosm1.rs vendored Normal file
View File

@@ -0,0 +1,219 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::f_polyeval4;
use crate::sin::{range_reduction_small, sincos_eval};
use crate::sin_helper::sincos_eval_dd;
use crate::sin_table::SIN_K_PI_OVER_128;
use crate::sincos_reduce::LargeArgumentReduction;
#[cold]
#[inline(never)]
fn cosm1_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
let r_sincos = sincos_eval_dd(y);
// k is an integer and -pi / 256 <= y <= pi / 256.
// Then sin(x) = sin((k * pi/128 + y)
// = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
let mut rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
// Computing cos(x) - 1 as follows:
// cos(x) - 1 = -2*sin^2(x/2)
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = DoubleDouble::quick_mult(rr, rr);
rr = DoubleDouble::quick_mult_f64(rr, -2.);
rr.to_f64()
}
#[cold]
fn cosm1_tiny_hard(x: f64) -> f64 {
// Generated by Sollya:
// d = [2^-27, 2^-7];
// f_cosm1 = cos(x) - 1;
// Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, 107...|], d);
// See ./notes/cosm1_hard.sollya
const C: [(u64, u64); 3] = [
(0x3c453997dc8ae20d, 0x3fa5555555555555),
(0x3bf6100c76a1827a, 0xbf56c16c16c15749),
(0x3b918f45acdd1fb2, 0x3efa019ddf5a583a),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[2]),
DoubleDouble::from_bit_pair(C[1]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(0xbfe0000000000000));
p = DoubleDouble::quick_mult(p, x2);
p.to_f64()
}
/// Computes cos(x) - 1
pub fn f_cosm1(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let y: DoubleDouble;
let k;
let mut argument_reduction = LargeArgumentReduction::default();
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
if x_e < E_BIAS + 16 {
// |x| < 2^-7
if x_e < E_BIAS - 7 {
// |x| < 2^-26
if x_e < E_BIAS - 27 {
// Signed zeros.
if x == 0.0 {
return 0.0;
}
// Taylor expansion for small cos(x) - 1 ~ -x^2/2 + x^4/24 + O(x^6)
let x_sqr = x * x;
const A0: f64 = -1. / 2.;
const A1: f64 = 1. / 24.;
let r0 = f_fmla(x_sqr, A1, A0);
return r0 * x_sqr;
}
// Generated by Sollya:
// d = [2^-27, 2^-7];
// f_cosm1 = (cos(x) - 1);
// Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, D...|], d);
// See ./notes/cosm1.sollya
let x2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval4(
x2.hi,
f64::from_bits(0xbfe0000000000000),
f64::from_bits(0x3fa5555555555555),
f64::from_bits(0xbf56c16c16b9c2b7),
f64::from_bits(0x3efa014d03f38855),
);
let r = DoubleDouble::quick_mult_f64(x2, p);
let eps = x * f_fmla(
x2.hi,
f64::from_bits(0x3d00000000000000), // 2^-47
f64::from_bits(0x3be0000000000000), // 2^-65
);
let ub = r.hi + (r.lo + eps);
let lb = r.hi + (r.lo - eps);
if ub == lb {
return r.to_f64();
}
return cosm1_tiny_hard(x);
} else {
// // Small range reduction.
(y, k) = range_reduction_small(x * 0.5);
}
} else {
// Inf or NaN
if x_e > 2 * E_BIAS {
// cos(+-Inf) = NaN
return x + f64::NAN;
}
// Large range reduction.
// k = argument_reduction.high_part(x);
(k, y) = argument_reduction.reduce(x * 0.5);
}
// Computing cos(x) - 1 as follows:
// cos(x) - 1 = -2*sin^2(x/2)
let r_sincos = sincos_eval(y);
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
let sin_k = DoubleDouble::from_bit_pair(sk);
let cos_k = DoubleDouble::from_bit_pair(ck);
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
// sin_k_cos_y is always >> cos_k_sin_y
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = DoubleDouble::quick_mult(rr, rr);
rr = DoubleDouble::quick_mult_f64(rr, -2.);
let rlp = rr.lo + r_sincos.err;
let rlm = rr.lo - r_sincos.err;
let r_upper = rr.hi + rlp; // (rr.lo + ERR);
let r_lower = rr.hi + rlm; // (rr.lo - ERR);
// Ziv's accuracy test
if r_upper == r_lower {
return rr.to_f64();
}
cosm1_accurate(y, sin_k, cos_k)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_cosm1f_test() {
assert_eq!(f_cosm1(0.0017700195313803402), -0.000001566484161754997);
assert_eq!(
f_cosm1(0.0000000011641532182693484),
-0.0000000000000000006776263578034406
);
assert_eq!(f_cosm1(0.006164513528517324), -0.000019000553351160402);
assert_eq!(f_cosm1(6.2831853071795862), -2.999519565323715e-32);
assert_eq!(f_cosm1(0.00015928394), -1.2685686744140693e-8);
assert_eq!(f_cosm1(0.0), 0.0);
assert_eq!(f_cosm1(0.0), 0.0);
assert_eq!(f_cosm1(std::f64::consts::PI), -2.);
assert_eq!(f_cosm1(0.5), -0.12241743810962728);
assert_eq!(f_cosm1(0.7), -0.23515781271551153);
assert_eq!(f_cosm1(1.7), -1.1288444942955247);
assert!(f_cosm1(f64::INFINITY).is_nan());
assert!(f_cosm1(f64::NEG_INFINITY).is_nan());
assert!(f_cosm1(f64::NAN).is_nan());
assert_eq!(f_cosm1(0.0002480338), -3.0760382813519806e-8);
}
}

152
vendor/pxfm/src/csc.rs vendored Normal file
View File

@@ -0,0 +1,152 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::sin::{get_sin_k_rational, range_reduction_small, sincos_eval};
use crate::sin_table::SIN_K_PI_OVER_128;
use crate::sincos_dyadic::{range_reduction_small_f128, sincos_eval_dyadic};
use crate::sincos_reduce::LargeArgumentReduction;
#[cold]
fn csc_accurate(x: f64, argument_reduction: &mut LargeArgumentReduction, x_e: u64, k: u64) -> f64 {
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let u_f128 = if x_e < EXP_BIAS + 16 {
range_reduction_small_f128(x)
} else {
argument_reduction.accurate()
};
let sin_cos = sincos_eval_dyadic(&u_f128);
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sin_k_f128 = get_sin_k_rational(k);
let cos_k_f128 = get_sin_k_rational(k.wrapping_add(64));
// sin(x) = sin(k * pi/128 + u)
// = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
let r = (sin_k_f128 * sin_cos.v_cos) + (cos_k_f128 * sin_cos.v_sin);
r.reciprocal().fast_as_f64()
}
/// Cosecant for double precision
///
/// ULP 0.5
pub fn f_csc(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let y: DoubleDouble;
let k;
let mut argument_reduction = LargeArgumentReduction::default();
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
if x_e < E_BIAS + 16 {
// |x| < 2^-26
if x_e < E_BIAS - 26 {
// Signed zeros.
if x == 0.0 {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_e < E_BIAS - 52 {
return 1. / x;
}
// For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
let rcp = DoubleDouble::from_quick_recip(x);
return DoubleDouble::f64_mul_f64_add(x, f64::from_bits(0x3fc5555555555555), rcp)
.to_f64();
}
// // Small range reduction.
(y, k) = range_reduction_small(x);
} else {
// Inf or NaN
if x_e > 2 * E_BIAS {
// sin(+-Inf) = NaN
return x + f64::NAN;
}
// Large range reduction.
(k, y) = argument_reduction.reduce(x);
}
let r_sincos = sincos_eval(y);
// Fast look up version, but needs 256-entry table.
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
let sin_k = DoubleDouble::from_bit_pair(sk);
let cos_k = DoubleDouble::from_bit_pair(ck);
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
// sin_k_cos_y is always >> cos_k_sin_y
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = rr.recip();
let rlp = rr.lo + r_sincos.err;
let rlm = rr.lo - r_sincos.err;
let r_upper = rr.hi + rlp; // (rr.lo + ERR);
let r_lower = rr.hi + rlm; // (rr.lo - ERR);
// Ziv's accuracy test
if r_upper == r_lower {
return rr.to_f64();
}
csc_accurate(x, &mut argument_reduction, x_e, k)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csc() {
assert_eq!(f_csc(0.000000014901161055069778), 67108864.62500001);
assert_eq!(f_csc( 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000541722315998), f64::INFINITY);
assert_eq!(f_csc(0.0), f64::INFINITY);
assert_eq!(f_csc(-0.0), f64::NEG_INFINITY);
assert!(f_csc(f64::NAN).is_nan());
assert_eq!(f_csc(1.0), 1.1883951057781212);
assert_eq!(f_csc(-0.5), -2.085829642933488);
}
}

137
vendor/pxfm/src/cube_roots/cbrt.rs vendored Normal file
View File

@@ -0,0 +1,137 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::cube_roots::cbrtf::halley_refine_d;
use crate::double_double::DoubleDouble;
use crate::exponents::fast_ldexp;
use crate::polyeval::f_polyeval4;
/// Computes cube root
///
/// Max found ULP 0.5
pub fn f_cbrt(x: f64) -> f64 {
// 1; 2^{1/3}; 2^{2/3}
static ESCALE: [f64; 3] = [
1.0,
f64::from_bits(0x3ff428a2f98d728b),
f64::from_bits(0x3ff965fea53d6e3d),
];
let bits = x.to_bits();
let mut exp = ((bits >> 52) & 0x7ff) as i32;
let mut mant = bits & ((1u64 << 52) - 1);
if exp == 0x7ff || x == 0.0 {
return x + x;
}
// Normalize subnormal
if exp == 0 && x != 0.0 {
let norm = x * f64::from_bits(0x4350000000000000); // * 2^54
let norm_bits = norm.to_bits();
mant = norm_bits & ((1u64 << 52) - 1);
exp = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
}
exp -= 1023;
mant |= 0x3ff << 52;
let m = f64::from_bits(mant);
// Polynomial for x^(1/3) on [1.0; 2.0]
// Generated by Sollya:
// d = [1.0, 2.0];
// f_cbrt = x^(1/3);
// Q = fpminimax(f_cbrt, 4, [|D...|], d, relative, floating);
// See ./notes/cbrt.sollya
let p = f_polyeval4(
m,
f64::from_bits(0x3fe1b0babceeaafa),
f64::from_bits(0x3fe2c9a3e8e06a3c),
f64::from_bits(0xbfc4dc30afb71885),
f64::from_bits(0x3f97a8d3e05458e4),
);
// split exponent e = 3*q + r with r in {0,1,2}
// use div_euclid/rem_euclid to get r >= 0
let q = exp.div_euclid(3);
let rem_scale = exp.rem_euclid(3);
let z = p * ESCALE[rem_scale as usize];
let mm = fast_ldexp(m, rem_scale); // bring mantissa into [1;8]
let r = 1.0 / mm;
// One Halley's method step
// then refine in partial double-double precision with Newton-Raphson iteration
let y0 = halley_refine_d(z, mm);
let d2y = DoubleDouble::from_exact_mult(y0, y0);
let d3y = DoubleDouble::quick_mult_f64(d2y, y0);
// Newton-Raphson step
// h = (x^3 - a) * r
// y1 = y0 - 1/3 * h * y0
let h = ((d3y.hi - mm) + d3y.lo) * r;
// y1 = y0 - 1/3*y0*(h.lo + h.hi) = y0 - 1/3 *y0*h.lo - 1/3 * y0 * h.hi
let y = f_fmla(-f64::from_bits(0x3fd5555555555555), y0 * h, y0);
f64::copysign(fast_ldexp(y, q), x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cbrt() {
assert_eq!(f_cbrt(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005432309223745),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017579026781511548);
assert_eq!(f_cbrt(1.225158611559834), 1.0700336588124544);
assert_eq!(f_cbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158), 1.1173329935611586e-103);
assert_eq!(f_cbrt(27.0), 3.0);
assert_eq!(f_cbrt(64.0), 4.0);
assert_eq!(f_cbrt(125.0), 5.0);
assert_eq!(f_cbrt(216.0), 6.0);
assert_eq!(f_cbrt(343.0), 7.0);
assert_eq!(f_cbrt(512.0), 8.0);
assert_eq!(f_cbrt(729.0), 9.0);
assert_eq!(f_cbrt(-729.0), -9.0);
assert_eq!(f_cbrt(-512.0), -8.0);
assert_eq!(f_cbrt(-343.0), -7.0);
assert_eq!(f_cbrt(-216.0), -6.0);
assert_eq!(f_cbrt(-125.0), -5.0);
assert_eq!(f_cbrt(-64.0), -4.0);
assert_eq!(f_cbrt(-27.0), -3.0);
assert_eq!(f_cbrt(0.0), 0.0);
assert_eq!(f_cbrt(f64::INFINITY), f64::INFINITY);
assert_eq!(f_cbrt(f64::NEG_INFINITY), f64::NEG_INFINITY);
assert!(f_cbrt(f64::NAN).is_nan());
}
}

129
vendor/pxfm/src/cube_roots/cbrtf.rs vendored Normal file
View File

@@ -0,0 +1,129 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
#[inline(always)]
pub(crate) fn halley_refine_d(x: f64, a: f64) -> f64 {
let tx = x * x * x;
x * f_fmla(2., a, tx) / f_fmla(2., tx, a)
}
#[inline(always)]
const fn halley_refine(x: f32, a: f32) -> f32 {
let tx = x * x * x;
x * (tx + 2f32 * a) / (2f32 * tx + a)
}
/// Cbrt for given value for const context.
/// This is simplified version just to make a good approximation on const context.
#[inline]
pub const fn cbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if au == 0 {
return x; /* +-0 */
}
}
const B1: u32 = 709958130;
let mut t: f32;
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
hx = (hx / 3).wrapping_add(B1);
ui &= 0x80000000;
ui |= hx;
t = f32::from_bits(ui);
t = halley_refine(t, x);
halley_refine(t, x)
}
/// Computes cube root
///
/// Peak ULP on 64 bit = 0.49999577
#[inline]
pub fn f_cbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if au == 0 {
return x; /* +-0 */
}
}
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
if hx < 0x00800000 {
/* zero or subnormal? */
if hx == 0 {
return x; /* cbrt(+-0) is itself */
}
const TWO_EXP_24: f32 = f32::from_bits(0x4b800000);
ui = (x * TWO_EXP_24).to_bits();
hx = ui & 0x7fffffff;
const B2: u32 = 642849266;
hx = (hx / 3).wrapping_add(B2);
} else {
const B1: u32 = 709958130;
hx = (hx / 3).wrapping_add(B1);
}
ui &= 0x80000000;
ui |= hx;
let mut t = f32::from_bits(ui) as f64;
let dx = x as f64;
t = halley_refine_d(t, dx);
halley_refine_d(t, dx) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fcbrtf() {
assert_eq!(f_cbrtf(0.0), 0.0);
assert_eq!(f_cbrtf(-27.0), -3.0);
assert_eq!(f_cbrtf(27.0), 3.0);
assert_eq!(f_cbrtf(64.0), 4.0);
assert_eq!(f_cbrtf(-64.0), -4.0);
assert_eq!(f_cbrtf(f32::NEG_INFINITY), f32::NEG_INFINITY);
assert_eq!(f_cbrtf(f32::INFINITY), f32::INFINITY);
assert!(f_cbrtf(f32::NAN).is_nan());
}
}

37
vendor/pxfm/src/cube_roots/mod.rs vendored Normal file
View File

@@ -0,0 +1,37 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod cbrt;
mod cbrtf;
mod rcbrt;
mod rcbrtf;
pub use cbrt::f_cbrt;
pub use cbrtf::{cbrtf, f_cbrtf};
pub use rcbrt::f_rcbrt;
pub use rcbrtf::f_rcbrtf;

191
vendor/pxfm/src/cube_roots/rcbrt.rs vendored Normal file
View File

@@ -0,0 +1,191 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::exponents::fast_ldexp;
use crate::polyeval::f_polyeval6;
//
// // y1 = y0 + 1/3 * y0 * (1 - a * y0 * y0 * y0)
// #[inline]
// fn raphson_step(x: f64, a: f64) -> f64 {
// let h = f_fmla(-a * x, x * x, 1.0);
// f_fmla(1. / 3. * h, x, x)
// }
// y1 = y0(k1 c(k2 k3c), c = x*y0*y0*y0
// k1 = 14/9 , k2 = 7/9 , k3 = 2/9
#[inline(always)]
fn halleys_div_free(x: f64, a: f64) -> f64 {
const K3: f64 = 2. / 9.;
const K2: f64 = 7. / 9.;
const K1: f64 = 14. / 9.;
let c = a * x * x * x;
let mut y = f_fmla(-K3, c, K2);
y = f_fmla(-c, y, K1);
y * x
}
/// Computes 1/cbrt(x)
///
/// ULP 0.5
pub fn f_rcbrt(a: f64) -> f64 {
// Decompose a = m * 2^e, with m in [0.5, 1)
let xu = a.to_bits();
let exp = ((xu >> 52) & 0x7ff) as i32;
let mut e = ((xu >> 52) & 0x7ff) as i32;
let mut mant = xu & ((1u64 << 52) - 1);
if exp == 0x7ff {
if a.is_infinite() {
return if a.is_sign_negative() { -0.0 } else { 0.0 };
}
return a + a;
}
if exp == 0 && a == 0. {
return if a.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// Normalize subnormal
if exp == 0 {
let norm = a * f64::from_bits(0x4350000000000000); // * 2^54
let norm_bits = norm.to_bits();
mant = norm_bits & ((1u64 << 52) - 1);
e = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
}
e -= 1023;
mant |= 0x3ff << 52;
let m = f64::from_bits(mant);
// Polynomial for x^(-1/3) on [1.0; 2.0]
// Generated by Sollya:
// d = [1.0, 2.0];
// f_inv_cbrt = x^(-1/3);
// Q = fpminimax(f_inv_cbrt, 5, [|D...|], d, relative, floating);
// See ./notes/inv_cbrt.sollya
let p = f_polyeval6(
m,
f64::from_bits(0x3ffc7f365bceaf71),
f64::from_bits(0xbff90e741fb9c896),
f64::from_bits(0x3ff3e68b9b2cd237),
f64::from_bits(0xbfe321c5eb24a185),
f64::from_bits(0x3fc3fa269b897f69),
f64::from_bits(0xbf916d6f13849fd1),
);
// split exponent e = 3*q + r with r in {0,1,2}
// use div_euclid/rem_euclid to get r >= 0
let q = e.div_euclid(3);
let rem_scale = e.rem_euclid(3);
// 1; 2^{-1/3}; 2^{-2/3}
static ESCALE: [u64; 3] = [1.0f64.to_bits(), 0x3fe965fea53d6e3d, 0x3fe428a2f98d728b];
let z = p * f64::from_bits(ESCALE[rem_scale as usize]);
let mm = fast_ldexp(m, rem_scale); // bring domain into [1;8]
// One Halley's method step
// then refine in partial double-double precision with Newton-Raphson iteration
let y0 = halleys_div_free(z, mm);
let d2y = DoubleDouble::from_exact_mult(y0, y0);
let d3y = DoubleDouble::quick_mult_f64(d2y, y0);
let hb = DoubleDouble::quick_mult_f64(d3y, mm);
let y: f64;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// decompose double-double in linear FMA sums
// r = (1.0 - hb.hi - hb.lo) * y0 = y0 - hb.hi * y0 - hb.lo * y0 = fma(-hb.lo, y0, fma(-hb.hi, y0, y0))
let r = f_fmla(-hb.lo, y0, f_fmla(hb.hi, -y0, y0));
// // y1 = y0 + 1/3 * y0 * (1 - a * y0 * y0 * y0) = y0 + 1/3 * r
y = f_fmla(1. / 3., r, y0);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let m_hb = DoubleDouble::full_add_f64(-hb, 1.0);
let r = DoubleDouble::quick_mult_f64(m_hb, y0);
y = f_fmla(1. / 3., r.to_f64(), y0);
}
f64::copysign(fast_ldexp(y, -q), a)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rcbrt() {
assert_eq!(f_rcbrt(0.9999999999999717), 1.0000000000000095);
assert_eq!(f_rcbrt(-68355745214719140000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-0.000000000000000000000000000000000000000002445728958868668);
assert_eq!(f_rcbrt(-96105972807656840000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-0.0000000000000000000000000000000000000000000000000000000002183148143573148);
assert_eq!(f_rcbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158),
8949883389846071000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_rcbrt(0.00008386280387617153), 22.846001824951983);
assert_eq!(f_rcbrt(-125.0), -0.2);
assert_eq!(f_rcbrt(125.0), 0.2);
assert_eq!(f_rcbrt(1.0), 1.0);
assert_eq!(f_rcbrt(-1.0), -1.0);
assert_eq!(f_rcbrt(0.0), f64::INFINITY);
assert_eq!(f_rcbrt(-27.0), -1. / 3.);
assert_eq!(
f_rcbrt(2417851639214765300000000.),
0.000000007450580596938716
);
assert_eq!(f_rcbrt(27.0), 1. / 3.);
assert_eq!(f_rcbrt(64.0), 0.25);
assert_eq!(f_rcbrt(-64.0), -0.25);
assert_eq!(f_rcbrt(f64::NEG_INFINITY), -0.0);
assert_eq!(f_rcbrt(f64::INFINITY), 0.0);
assert!(f_rcbrt(f64::NAN).is_nan());
}
}

122
vendor/pxfm/src/cube_roots/rcbrtf.rs vendored Normal file
View File

@@ -0,0 +1,122 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// // y1 = y0 * (2+x*y0^3)/(1+2*x*y0^3)
// #[inline(always)]
// fn halley_refine_d(x: f64, a: f64) -> f64 {
// let tx = x * x * x;
// x * f_fmla(tx, a, 2.0) / f_fmla(2. * a, tx, 1.0)
// }
#[inline(always)]
fn rapshon_refine_inv_cbrt(x: f64, a: f64) -> f64 {
x * f_fmla(-1. / 3. * a, x * x * x, 4. / 3.)
}
// y1 = y0(k1 c(k2 k3c), c = x*y0*y0*y0
// k1 = 14/9 , k2 = 7/9 , k3 = 2/9
#[inline(always)]
fn halleys_div_free(x: f64, a: f64) -> f64 {
const K3: f64 = 2. / 9.;
const K2: f64 = 7. / 9.;
const K1: f64 = 14. / 9.;
let c = a * x * x * x;
let mut y = f_fmla(-K3, c, K2);
y = f_fmla(-c, y, K1);
y * x
}
/// Computes 1/cbrt(x)
///
/// ULP 0.5
#[inline]
pub fn f_rcbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if x.is_infinite() {
return if x.is_sign_negative() { -0.0 } else { 0.0 };
}
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if x == 0. {
return if x.is_sign_positive() {
f32::INFINITY
} else {
f32::NEG_INFINITY
}; /* +-inf */
}
}
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
if hx < 0x00800000 {
/* zero or subnormal? */
if hx == 0 {
return x; /* cbrt(+-0) is itself */
}
const TWO_EXP_24: f32 = f32::from_bits(0x4b800000);
ui = (x * TWO_EXP_24).to_bits();
hx = ui & 0x7fffffff;
const B: u32 = 0x54a21d2au32 + (8u32 << 23);
hx = B.wrapping_sub(hx / 3);
} else {
hx = 0x54a21d2au32.wrapping_sub(hx / 3);
}
ui &= 0x80000000;
ui |= hx;
let t = f32::from_bits(ui) as f64;
let dx = x as f64;
let mut t = halleys_div_free(t, dx);
t = halleys_div_free(t, dx);
t = rapshon_refine_inv_cbrt(t, dx);
t as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fcbrtf() {
assert_eq!(f_rcbrtf(0.0), f32::INFINITY);
assert_eq!(f_rcbrtf(-0.0), f32::NEG_INFINITY);
assert_eq!(f_rcbrtf(-27.0), -1. / 3.);
assert_eq!(f_rcbrtf(27.0), 1. / 3.);
assert_eq!(f_rcbrtf(64.0), 0.25);
assert_eq!(f_rcbrtf(-64.0), -0.25);
assert_eq!(f_rcbrtf(f32::NEG_INFINITY), -0.0);
assert_eq!(f_rcbrtf(f32::INFINITY), 0.0);
assert!(f_rcbrtf(f32::NAN).is_nan());
}
}

1012
vendor/pxfm/src/double_double.rs vendored Normal file

File diff suppressed because it is too large Load Diff

881
vendor/pxfm/src/dyadic_float.rs vendored Normal file
View File

@@ -0,0 +1,881 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::EXP_MASK;
use crate::common::f_fmla;
use std::ops::{Add, Mul, Sub};
#[repr(u8)]
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub(crate) enum DyadicSign {
Pos = 0,
Neg = 1,
}
impl DyadicSign {
#[inline]
pub(crate) fn negate(self) -> Self {
match self {
DyadicSign::Pos => DyadicSign::Neg,
DyadicSign::Neg => DyadicSign::Pos,
}
}
#[inline]
pub(crate) const fn to_bit(self) -> u8 {
match self {
DyadicSign::Pos => 0,
DyadicSign::Neg => 1,
}
}
#[inline]
pub(crate) const fn mult(self, rhs: Self) -> Self {
if (self as u8) ^ (rhs as u8) != 0 {
DyadicSign::Neg
} else {
DyadicSign::Pos
}
}
}
const BITS: u32 = 128;
#[derive(Copy, Clone, Debug)]
pub(crate) struct DyadicFloat128 {
pub(crate) sign: DyadicSign,
pub(crate) exponent: i16,
pub(crate) mantissa: u128,
}
#[inline]
pub(crate) const fn f64_from_parts(sign: DyadicSign, exp: u64, mantissa: u64) -> f64 {
let r_sign = (if sign.to_bit() == 0 { 0u64 } else { 1u64 }).wrapping_shl(63);
let r_exp = exp.wrapping_shl(52);
f64::from_bits(r_sign | r_exp | mantissa)
}
#[inline]
pub(crate) fn mulhi_u128(a: u128, b: u128) -> u128 {
let a_lo = a as u64 as u128;
let a_hi = (a >> 64) as u64 as u128;
let b_lo = b as u64 as u128;
let b_hi = (b >> 64) as u64 as u128;
let lo_lo = a_lo * b_lo;
let lo_hi = a_lo * b_hi;
let hi_lo = a_hi * b_lo;
let hi_hi = a_hi * b_hi;
let carry = (lo_lo >> 64)
.wrapping_add(lo_hi & 0xffff_ffff_ffff_ffff)
.wrapping_add(hi_lo & 0xffff_ffff_ffff_ffff);
let mid = (lo_hi >> 64)
.wrapping_add(hi_lo >> 64)
.wrapping_add(carry >> 64);
hi_hi.wrapping_add(mid)
}
#[inline]
const fn explicit_exponent(x: f64) -> i16 {
let exp = ((x.to_bits() >> 52) & ((1u64 << 11) - 1u64)) as i16 - 1023;
if x == 0. {
return 0;
} else if x.is_subnormal() {
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
return 1i16 - EXP_BIAS as i16;
}
exp
}
#[inline]
const fn explicit_mantissa(x: f64) -> u64 {
const MASK: u64 = (1u64 << 52) - 1;
let sig_bits = x.to_bits() & MASK;
if x.is_subnormal() || x == 0. {
return sig_bits;
}
(1u64 << 52) | sig_bits
}
impl DyadicFloat128 {
#[inline]
pub(crate) const fn zero() -> Self {
Self {
sign: DyadicSign::Pos,
exponent: 0,
mantissa: 0,
}
}
#[inline]
pub(crate) const fn new_from_f64(x: f64) -> Self {
let sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
let exponent = explicit_exponent(x) - 52;
let mantissa = explicit_mantissa(x) as u128;
let mut new_val = Self {
sign,
exponent,
mantissa,
};
new_val.normalize();
new_val
}
#[inline]
pub(crate) fn new(sign: DyadicSign, exponent: i16, mantissa: u128) -> Self {
let mut new_item = DyadicFloat128 {
sign,
exponent,
mantissa,
};
new_item.normalize();
new_item
}
#[inline]
pub(crate) fn accurate_reciprocal(a: f64) -> Self {
let mut r = DyadicFloat128::new_from_f64(4.0 / a); /* accurate to about 53 bits */
r.exponent -= 2;
/* we use Newton's iteration: r -> r + r*(1-a*r) */
let ba = DyadicFloat128::new_from_f64(-a);
let mut q = ba * r;
const F128_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
q = F128_ONE + q;
q = r * q;
r + q
}
#[inline]
pub(crate) fn from_div_f64(a: f64, b: f64) -> Self {
let reciprocal = DyadicFloat128::accurate_reciprocal(b);
let da = DyadicFloat128::new_from_f64(a);
reciprocal * da
}
/// Multiply self by integer scalar `b`.
/// Returns a new normalized DyadicFloat128.
#[inline]
pub(crate) fn mul_int64(&self, b: i64) -> DyadicFloat128 {
if b == 0 {
return DyadicFloat128::zero();
}
let abs_b = b.unsigned_abs();
let sign = if (b < 0) ^ (self.sign == DyadicSign::Neg) {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
let mut hi_prod = (self.mantissa >> 64).wrapping_mul(abs_b as u128);
let m = hi_prod.leading_zeros();
hi_prod <<= m;
let mut lo_prod = (self.mantissa & 0xffff_ffff_ffff_ffff).wrapping_mul(abs_b as u128);
lo_prod = (lo_prod << (m - 1)) >> 63;
let (mut product, overflow) = hi_prod.overflowing_add(lo_prod);
let mut result = DyadicFloat128 {
sign,
exponent: self.exponent + 64 - m as i16,
mantissa: product,
};
if overflow {
// Overflow means an implicit bit in the 129th place, which we shift down.
product += product & 0x1;
result.mantissa = (product >> 1) | (1u128 << 127);
result.shift_right(1);
}
result.normalize();
result
}
#[inline]
fn shift_right(&mut self, amount: u32) {
if amount < BITS {
self.exponent += amount as i16;
self.mantissa = self.mantissa.wrapping_shr(amount);
} else {
self.exponent = 0;
self.mantissa = 0;
}
}
#[inline]
fn shift_left(&mut self, amount: u32) {
if amount < BITS {
self.exponent -= amount as i16;
self.mantissa = self.mantissa.wrapping_shl(amount);
} else {
self.exponent = 0;
self.mantissa = 0;
}
}
// Don't forget to call if manually created
#[inline]
pub(crate) const fn normalize(&mut self) {
if self.mantissa != 0 {
let shift_length = self.mantissa.leading_zeros();
self.exponent -= shift_length as i16;
self.mantissa = self.mantissa.wrapping_shl(shift_length);
}
}
#[inline]
pub(crate) fn negated(&self) -> Self {
Self {
sign: self.sign.negate(),
exponent: self.exponent,
mantissa: self.mantissa,
}
}
#[inline]
pub(crate) fn quick_sub(&self, rhs: &Self) -> Self {
self.quick_add(&rhs.negated())
}
#[inline]
pub(crate) fn quick_add(&self, rhs: &Self) -> Self {
if self.mantissa == 0 {
return *rhs;
}
if rhs.mantissa == 0 {
return *self;
}
let mut a = *self;
let mut b = *rhs;
let exp_diff = a.exponent.wrapping_sub(b.exponent);
// If exponent difference is too large, b is negligible
if exp_diff.abs() >= BITS as i16 {
return if a.sign == b.sign {
// Adding very small number to large: return a
return if a.exponent > b.exponent { a } else { b };
} else if a.exponent > b.exponent {
a
} else {
b
};
}
// Align exponents
if a.exponent > b.exponent {
b.shift_right((a.exponent - b.exponent) as u32);
} else if b.exponent > a.exponent {
a.shift_right((b.exponent - a.exponent) as u32);
}
let mut result = DyadicFloat128::zero();
if a.sign == b.sign {
// Addition
result.sign = a.sign;
result.exponent = a.exponent;
result.mantissa = a.mantissa;
let (sum, is_overflow) = result.mantissa.overflowing_add(b.mantissa);
result.mantissa = sum;
if is_overflow {
// Mantissa addition overflow.
result.shift_right(1);
result.mantissa |= 1u128 << 127;
}
// Result is already normalized.
return result;
}
// Subtraction
if a.mantissa >= b.mantissa {
result.sign = a.sign;
result.exponent = a.exponent;
result.mantissa = a.mantissa.wrapping_sub(b.mantissa);
} else {
result.sign = b.sign;
result.exponent = b.exponent;
result.mantissa = b.mantissa.wrapping_sub(a.mantissa);
}
result.normalize();
result
}
#[inline]
pub(crate) fn quick_mul(&self, rhs: &Self) -> Self {
let mut result = DyadicFloat128 {
sign: if self.sign != rhs.sign {
DyadicSign::Neg
} else {
DyadicSign::Pos
},
exponent: self.exponent + rhs.exponent + BITS as i16,
mantissa: 0,
};
if !(self.mantissa == 0 || rhs.mantissa == 0) {
result.mantissa = mulhi_u128(self.mantissa, rhs.mantissa);
// Check the leading bit directly, should be faster than using clz in
// normalize().
if result.mantissa >> 127 == 0 {
result.shift_left(1);
}
} else {
result.mantissa = 0;
}
result
}
#[inline]
pub(crate) fn fast_as_f64(&self) -> f64 {
if self.mantissa == 0 {
return if self.sign == DyadicSign::Pos {
0.
} else {
-0.0
};
}
// Assume that it is normalized, and output is also normal.
const PRECISION: u32 = 52 + 1;
// SIG_MASK - FRACTION_MASK
const SIG_MASK: u64 = (1u64 << 52) - 1;
const FRACTION_MASK: u64 = (1u64 << 52) - 1;
const IMPLICIT_MASK: u64 = SIG_MASK - FRACTION_MASK;
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let mut exp_hi = self.exponent as i32 + ((BITS - 1) as i32 + EXP_BIAS as i32);
if exp_hi > 2 * EXP_BIAS as i32 {
// Results overflow.
let d_hi = f64_from_parts(self.sign, 2 * EXP_BIAS, IMPLICIT_MASK);
// volatile prevents constant propagation that would result in infinity
// always being returned no matter the current rounding mode.
let two = 2.0f64;
let r = two * d_hi;
return r;
}
let mut denorm = false;
let mut shift = BITS - PRECISION;
if exp_hi <= 0 {
// Output is denormal.
denorm = true;
shift = (BITS - PRECISION) + (1 - exp_hi) as u32;
exp_hi = EXP_BIAS as i32;
}
let exp_lo = exp_hi.wrapping_sub(PRECISION as i32).wrapping_sub(1);
let m_hi = if shift >= BITS {
0
} else {
self.mantissa >> shift
};
let d_hi = f64_from_parts(
self.sign,
exp_hi as u64,
(m_hi as u64 & SIG_MASK) | IMPLICIT_MASK,
);
let round_mask = if shift > BITS {
0
} else {
1u128.wrapping_shl(shift.wrapping_sub(1))
};
let sticky_mask = round_mask.wrapping_sub(1u128);
let round_bit = (self.mantissa & round_mask) != 0;
let sticky_bit = (self.mantissa & sticky_mask) != 0;
let round_and_sticky = round_bit as i32 * 2 + sticky_bit as i32;
let d_lo: f64;
if exp_lo <= 0 {
// d_lo is denormal, but the output is normal.
let scale_up_exponent = 1 - exp_lo;
let scale_up_factor = f64_from_parts(
DyadicSign::Pos,
EXP_BIAS + scale_up_exponent as u64,
IMPLICIT_MASK,
);
let scale_down_factor = f64_from_parts(
DyadicSign::Pos,
EXP_BIAS - scale_up_exponent as u64,
IMPLICIT_MASK,
);
d_lo = f64_from_parts(
self.sign,
(exp_lo + scale_up_exponent) as u64,
IMPLICIT_MASK,
);
return f_fmla(d_lo, round_and_sticky as f64, d_hi * scale_up_factor)
* scale_down_factor;
}
d_lo = f64_from_parts(self.sign, exp_lo as u64, IMPLICIT_MASK);
// Still correct without FMA instructions if `d_lo` is not underflow.
let r = f_fmla(d_lo, round_and_sticky as f64, d_hi);
if denorm {
const SIG_LEN: u64 = 52;
// Exponent before rounding is in denormal range, simply clear the
// exponent field.
let clear_exp: u64 = (exp_hi as u64) << SIG_LEN;
let mut r_bits: u64 = r.to_bits() - clear_exp;
if r_bits & EXP_MASK == 0 {
// Output is denormal after rounding, clear the implicit bit for 80-bit
// long double.
r_bits -= IMPLICIT_MASK;
}
return f64::from_bits(r_bits);
}
r
}
// Approximate reciprocal - given a nonzero `a`, make a good approximation to 1/a.
// The method is Newton-Raphson iteration, based on quick_mul.
#[inline]
pub(crate) fn reciprocal(self) -> DyadicFloat128 {
// Computes the reciprocal using Newton-Raphson iteration:
// Given an approximation x ≈ 1/a, we refine via:
// x' = x * (2 - a * x)
// This squares the error term: if ax ≈ 1 - e, then ax' ≈ 1 - e².
let guess = 1. / self.fast_as_f64();
let mut x = DyadicFloat128::new_from_f64(guess);
// The constant 2, which we'll need in every iteration
let twos = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
x = x * (twos - (self * x));
x = x * (twos - (self * x));
x
}
// // Approximate reciprocal - given a nonzero `a`, make a good approximation to 1/a.
// // The method is Newton-Raphson iteration, based on quick_mul.
// // *This is very crude guess*
// #[inline]
// fn approximate_reciprocal(&self) -> DyadicFloat128 {
// // Given an approximation x to 1/a, a better one is x' = x(2-ax).
// //
// // You can derive this by using the Newton-Raphson formula with the function
// // f(x) = 1/x - a. But another way to see that it works is to say: suppose
// // that ax = 1-e for some small error e. Then ax' = ax(2-ax) = (1-e)(1+e) =
// // 1-e^2. So the error in x' is the square of the error in x, i.e. the number
// // of correct bits in x' is double the number in x.
//
// // An initial approximation to the reciprocal
// let mut x = DyadicFloat128 {
// sign: DyadicSign::Pos,
// exponent: -32 - self.exponent - BITS as i16,
// mantissa: self.mantissa >> (BITS - 32),
// };
// x.normalize();
//
// // The constant 2, which we'll need in every iteration
// let two = DyadicFloat128::new(DyadicSign::Pos, 1, 1);
//
// // We expect at least 31 correct bits from our 32-bit starting approximation
// let mut ok_bits = 31usize;
//
// // The number of good bits doubles in each iteration, except that rounding
// // errors introduce a little extra each time. Subtract a bit from our
// // accuracy assessment to account for that.
// while ok_bits < BITS as usize {
// x = x * (two - (*self * x));
// ok_bits = 2 * ok_bits - 1;
// }
//
// x
// }
}
impl Add<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn add(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_add(&rhs)
}
}
impl DyadicFloat128 {
#[inline]
pub(crate) fn biased_exponent(&self) -> i16 {
self.exponent + (BITS as i16 - 1)
}
#[inline]
pub(crate) fn trunc_to_i64(&self) -> i64 {
if self.exponent <= -(BITS as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return 0;
}
let hi = self.mantissa >> 64;
let norm_exp = self.biased_exponent();
if norm_exp > 63 {
return if self.sign == DyadicSign::Neg {
i64::MIN
} else {
i64::MAX
};
}
let r: i64 = (hi >> (63 - norm_exp)) as i64;
if self.sign == DyadicSign::Neg { -r } else { r }
}
#[inline]
pub(crate) fn round_to_nearest(&self) -> DyadicFloat128 {
if self.exponent == -(BITS as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return DyadicFloat128 {
sign: self.sign,
exponent: -(BITS as i16 - 1),
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
}
if self.exponent <= -((BITS + 1) as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return DyadicFloat128 {
sign: self.sign,
exponent: 0,
mantissa: 0u128,
};
}
const FRACTION_LENGTH: u32 = BITS - 1;
let trim_size =
(FRACTION_LENGTH as i64).wrapping_sub(self.exponent as i64 + (BITS - 1) as i64) as u128;
let half_bit_set =
self.mantissa & (1u128.wrapping_shl(trim_size.wrapping_sub(1) as u32)) != 0;
let trunc_u: u128 = self
.mantissa
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
if trunc_u == self.mantissa {
return *self;
}
let truncated = DyadicFloat128::new(self.sign, self.exponent, trunc_u);
if !half_bit_set {
// Franctional part is less than 0.5 so round value is the
// same as the trunc value.
truncated
} else if self.sign == DyadicSign::Neg {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -(BITS as i16 - 1),
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
truncated - ones
} else {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -(BITS as i16 - 1),
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
truncated + ones
}
}
#[inline]
pub(crate) fn round_to_nearest_f64(&self) -> f64 {
self.round_to_nearest().fast_as_f64()
}
}
impl Sub<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn sub(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_sub(&rhs)
}
}
impl Mul<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn mul(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_mul(&rhs)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dyadic_float() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt0 = minus_0_5.fast_as_f64();
assert_eq!(cvt0, -1.0 / 2.0);
let minus_1_f4 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -132,
mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
};
let cvt0 = minus_1_f4.fast_as_f64();
assert_eq!(cvt0, -1.0 / 24.0);
let minus_1_f8 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -143,
mantissa: 0xd00d00d0_0d00d00d_00d00d00_d00d00d0_u128,
};
let cvt0 = minus_1_f8.fast_as_f64();
assert_eq!(cvt0, 1.0 / 40320.0);
}
#[test]
fn dyadic_float_add() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt0 = ones.quick_add(&minus_0_5).fast_as_f64();
assert_eq!(cvt0, 0.5);
}
#[test]
fn dyadic_float_mul() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let product = ones.quick_mul(&minus_0_5);
let cvt0 = product.fast_as_f64();
assert_eq!(cvt0, -0.5);
let twos = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = twos.fast_as_f64();
assert_eq!(cvt, 2.0);
}
#[test]
fn dyadic_round_trip() {
let z00 = 0.0;
let zvt00 = DyadicFloat128::new_from_f64(z00);
let b00 = zvt00.fast_as_f64();
assert_eq!(b00, z00);
let zvt000 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: 0,
mantissa: 0,
};
let b000 = zvt000.fast_as_f64();
assert_eq!(b000, z00);
let z0 = 1.0;
let zvt0 = DyadicFloat128::new_from_f64(z0);
let b0 = zvt0.fast_as_f64();
assert_eq!(b0, z0);
let z1 = 0.5;
let zvt1 = DyadicFloat128::new_from_f64(z1);
let b1 = zvt1.fast_as_f64();
assert_eq!(b1, z1);
let z2 = -0.5;
let zvt2 = DyadicFloat128::new_from_f64(z2);
let b2 = zvt2.fast_as_f64();
assert_eq!(b2, z2);
let z3 = -532322.54324324232;
let zvt3 = DyadicFloat128::new_from_f64(z3);
let b3 = zvt3.fast_as_f64();
assert_eq!(b3, z3);
}
#[test]
fn dyadic_float_reciprocal() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
}
.reciprocal();
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128::new_from_f64(4.).reciprocal();
let cvt0 = minus_0_5.fast_as_f64();
assert_eq!(cvt0, 0.25);
}
#[test]
fn dyadic_float_from_div() {
let from_div = DyadicFloat128::from_div_f64(1.0, 4.0);
let cvt = from_div.fast_as_f64();
assert_eq!(cvt, 0.25);
}
#[test]
fn dyadic_float_accurate_reciprocal() {
let from_div = DyadicFloat128::accurate_reciprocal(4.0);
let cvt = from_div.fast_as_f64();
assert_eq!(cvt, 0.25);
}
#[test]
fn dyadic_float_mul_int() {
let from_div = DyadicFloat128::new_from_f64(4.0);
let m1 = from_div.mul_int64(-2);
assert_eq!(m1.fast_as_f64(), -8.0);
let from_div = DyadicFloat128::new_from_f64(-4.0);
let m1 = from_div.mul_int64(-2);
assert_eq!(m1.fast_as_f64(), 8.0);
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.mul_int64(2);
assert_eq!(m1.fast_as_f64(), 5.0);
}
#[test]
fn dyadic_float_round() {
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 3.0);
let from_div = DyadicFloat128::new_from_f64(0.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 1.0);
let from_div = DyadicFloat128::new_from_f64(-0.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, -1.0);
let from_div = DyadicFloat128::new_from_f64(-0.351);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, (-0.351f64).round());
let from_div = DyadicFloat128::new_from_f64(0.351);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 0.351f64.round());
let z00 = 25.6;
let zvt00 = DyadicFloat128::new_from_f64(z00);
let b00 = zvt00.round_to_nearest_f64();
assert_eq!(b00, 26.);
}
#[test]
fn dyadic_int_trunc() {
let from_div = DyadicFloat128::new_from_f64(-2.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, -2);
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 2);
let from_div = DyadicFloat128::new_from_f64(0.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(-0.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(-0.351);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(0.351);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
}
}

319
vendor/pxfm/src/err/erf.rs vendored Normal file
View File

@@ -0,0 +1,319 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::err::erf_poly::{ERF_POLY, ERF_POLY_C2};
use crate::floor::FloorFinite;
/* double-double approximation of 2/sqrt(pi) to nearest */
const TWO_OVER_SQRT_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c71ae3a914fed80),
f64::from_bits(0x3ff20dd750429b6d),
);
pub(crate) struct Erf {
pub(crate) result: DoubleDouble,
pub(crate) err: f64,
}
/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
#[cold]
fn cr_erf_accurate_tiny(x: f64) -> DoubleDouble {
static P: [u64; 15] = [
0x3ff20dd750429b6d,
0x3c71ae3a914fed80,
0xbfd812746b0379e7,
0x3c6ee12e49ca96ba,
0x3fbce2f21a042be2,
0xbc52871bc0a0a0d0,
0xbf9b82ce31288b51,
0x3c21003accf1355c,
0x3f7565bcd0e6a53f,
0xbf4c02db40040cc3,
0x3f1f9a326fa3cf50,
0xbeef4d25e3c73ce9,
0x3ebb9eb332b31646,
0xbe864a4bd5eca4d7,
0x3e6c0acc2502e94e,
];
let z2 = x * x;
let mut h = f64::from_bits(P[21 / 2 + 4]); /* degree 21 */
for a in (12..=19).rev().step_by(2) {
h = dd_fmla(h, z2, f64::from_bits(P[(a / 2 + 4) as usize]))
}
let mut l = 0.;
for a in (8..=11).rev().step_by(2) {
let mut t = DoubleDouble::from_exact_mult(h, x);
t.lo = dd_fmla(l, x, t.lo);
let mut k = DoubleDouble::from_exact_mult(t.hi, x);
k.lo = dd_fmla(t.lo, x, k.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(P[(a / 2 + 4) as usize]), k.hi);
l = k.lo + p.lo;
h = p.hi;
}
for a in (1..=7).rev().step_by(2) {
let mut t = DoubleDouble::from_exact_mult(h, x);
t.lo = dd_fmla(l, x, t.lo);
let mut k = DoubleDouble::from_exact_mult(t.hi, x);
k.lo = dd_fmla(t.lo, x, k.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(P[a - 1]), k.hi);
l = k.lo + p.lo + f64::from_bits(P[a]);
h = p.hi;
}
/* multiply by z */
let p = DoubleDouble::from_exact_mult(h, x);
l = dd_fmla(l, x, p.lo);
DoubleDouble::new(l, p.hi)
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an accurate
approximation of erf(z).
Assumes z >= 2^-61, thus no underflow can occur. */
#[cold]
#[inline(never)]
pub(crate) fn erf_accurate(x: f64) -> DoubleDouble {
if x < 0.125
/* z < 1/8 */
{
return cr_erf_accurate_tiny(x);
}
let v = (8.0 * x).floor_finite();
let i: u32 = (8.0 * x) as u32;
let z = (x - 0.0625) - 0.125 * v;
/* now |z| <= 1/16 */
let p = ERF_POLY_C2[(i - 1) as usize];
let mut h = f64::from_bits(p[26]); /* degree-18 */
for a in (11..=17).rev() {
h = dd_fmla(h, z, f64::from_bits(p[(8 + a) as usize])); /* degree j */
}
let mut l: f64 = 0.;
for a in (8..=10).rev() {
let mut t = DoubleDouble::from_exact_mult(h, z);
t.lo = dd_fmla(l, z, t.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(p[(8 + a) as usize]), t.hi);
h = p.hi;
l = p.lo + t.lo;
}
for a in (0..=7).rev() {
let mut t = DoubleDouble::from_exact_mult(h, z);
t.lo = dd_fmla(l, z, t.lo);
/* add p[2*j] + p[2*j+1] to th + tl: we use two_sum() instead of
fast_two_sum because for example for i=3, the coefficient of
degree 7 is tiny (0x1.060b78c935b8ep-13) with respect to that
of degree 8 (0x1.678b51a9c4b0ap-7) */
let v = DoubleDouble::from_exact_add(f64::from_bits(p[(2 * a) as usize]), t.hi);
h = v.hi;
l = v.lo + t.lo + f64::from_bits(p[(2 * a + 1) as usize]);
}
DoubleDouble::new(l, h)
}
/* Assuming 0 <= z <= 5.9215871957945065, put in h+l an approximation
of erf(z). Return err the maximal relative error:
|(h + l)/erf(z) - 1| < err*|h+l| */
#[inline]
pub(crate) fn erf_fast(x: f64) -> Erf {
/* we split [0,5.9215871957945065] into intervals i/16 <= z < (i+1)/16,
and for each interval, we use a minimax polynomial:
* for i=0 (0 <= z < 1/16) we use a polynomial evaluated at zero,
since if we evaluate in the middle 1/32, we will get bad accuracy
for tiny z, and moreover z-1/32 might not be exact
* for 1 <= i <= 94, we use a polynomial evaluated in the middle of
the interval, namely i/16+1/32
*/
if x < 0.0625
/* z < 1/16 */
{
/* the following is a degree-11 minimax polynomial for erf(x) on [0,1/16]
generated by Sollya, with double-double coefficients for degree 1 and 3,
and double coefficients for degrees 5 to 11 (file erf0.sollya).
The maximal relative error is 2^-68.935. */
let z2 = DoubleDouble::from_exact_mult(x, x);
const C: [u64; 8] = [
0x3ff20dd750429b6d,
0x3c71ae3a7862d9c4,
0xbfd812746b0379e7,
0x3c6f1a64d72722a2,
0x3fbce2f21a042b7f,
0xbf9b82ce31189904,
0x3f7565bbf8a0fe0b,
0xbf4bf9f8d2c202e4,
];
let z4 = z2.hi * z2.hi;
let c9 = dd_fmla(f64::from_bits(C[7]), z2.hi, f64::from_bits(C[6]));
let mut c5 = dd_fmla(f64::from_bits(C[5]), z2.hi, f64::from_bits(C[4]));
c5 = dd_fmla(c9, z4, c5);
/* compute c0[2] + c0[3] + z2h*c5 */
let mut t = DoubleDouble::from_exact_mult(z2.hi, c5);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[2]), t.hi);
v.lo += t.lo + f64::from_bits(C[3]);
/* compute c0[0] + c0[1] + (z2h + z2l)*(h + l) */
t = DoubleDouble::from_exact_mult(z2.hi, v.hi);
let h_c = v.hi;
t.lo += dd_fmla(z2.hi, v.lo, f64::from_bits(C[1]));
v = DoubleDouble::from_exact_add(f64::from_bits(C[0]), t.hi);
v.lo += dd_fmla(z2.lo, h_c, t.lo);
v = DoubleDouble::quick_mult_f64(v, x);
return Erf {
result: v,
err: f64::from_bits(0x3ba7800000000000),
}; /* err < 2.48658249618372e-21, cf Analyze0() */
}
let v = (16.0 * x).floor_finite();
let i: u32 = (16.0 * x) as u32;
/* i/16 <= z < (i+1)/16 */
/* For 0.0625 0 <= z <= 0x1.7afb48dc96626p+2, z - 0.03125 is exact:
(1) either z - 0.03125 is in the same binade as z, then 0.03125 is
an integer multiple of ulp(z), so is z - 0.03125
(2) if z - 0.03125 is in a smaller binade, both z and 0.03125 are
integer multiple of the ulp() of that smaller binade.
Also, subtracting 0.0625 * v is exact. */
let z = (x - 0.03125) - 0.0625 * v;
/* now |z| <= 1/32 */
let c = ERF_POLY[(i - 1) as usize];
let z2 = z * z;
let z4 = z2 * z2;
/* the degree-10 coefficient is c[12] */
let c9 = dd_fmla(f64::from_bits(c[12]), z, f64::from_bits(c[11]));
let mut c7 = dd_fmla(f64::from_bits(c[10]), z, f64::from_bits(c[9]));
let c5 = dd_fmla(f64::from_bits(c[8]), z, f64::from_bits(c[7]));
/* c3h, c3l <- c[5] + z*c[6] */
let mut c3 = DoubleDouble::from_exact_add(f64::from_bits(c[5]), z * f64::from_bits(c[6]));
c7 = dd_fmla(c9, z2, c7);
/* c3h, c3l <- c3h, c3l + c5*z2 */
let p = DoubleDouble::from_exact_add(c3.hi, c5 * z2);
c3.hi = p.hi;
c3.lo += p.lo;
/* c3h, c3l <- c3h, c3l + c7*z4 */
let p = DoubleDouble::from_exact_add(c3.hi, c7 * z4);
c3.hi = p.hi;
c3.lo += p.lo;
/* c2h, c2l <- c[4] + z*(c3h + c3l) */
let mut t = DoubleDouble::from_exact_mult(z, c3.hi);
let mut c2 = DoubleDouble::from_exact_add(f64::from_bits(c[4]), t.hi);
c2.lo += dd_fmla(z, c3.lo, t.lo);
/* compute c[2] + c[3] + z*(c2h + c2l) */
t = DoubleDouble::from_exact_mult(z, c2.hi);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(c[2]), t.hi);
v.lo += t.lo + dd_fmla(z, c2.lo, f64::from_bits(c[3]));
/* compute c[0] + c[1] + z*(h + l) */
t = DoubleDouble::from_exact_mult(z, v.hi);
t.lo = dd_fmla(z, v.lo, t.lo);
v = DoubleDouble::from_exact_add(f64::from_bits(c[0]), t.hi);
v.lo += t.lo + f64::from_bits(c[1]);
Erf {
result: v,
err: f64::from_bits(0x3ba1100000000000),
} /* err < 1.80414390200020e-21, cf analyze_p(1)
(larger values of i yield smaller error bounds) */
}
/// Error function
///
/// Max ULP 0.5
pub fn f_erf(x: f64) -> f64 {
let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
let mut t = z.to_bits();
let ux = t;
/* erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
if ux > 0x4017afb48dc96626
// |x| > 0x4017afb48dc96626
{
let os = f64::copysign(1.0, x);
const MASK: u64 = 0x7ff0000000000000u64;
if ux > MASK {
return x + x; /* NaN */
}
if ux == MASK {
return os; /* +/-Inf */
}
return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
}
/* now |x| <= 0x4017afb48dc96626 */
if z < f64::from_bits(0x3c20000000000000) {
/* for x=-0 the code below returns +0 which is wrong */
if x == 0. {
return x;
}
/* tiny x: erf(x) ~ 2/sqrt(pi) * x + O(x^3), where the ratio of the O(x^3)
term to the main term is in x^2/3, thus less than 2^-123 */
let y = TWO_OVER_SQRT_PI.hi * x; /* tentative result */
/* scale x by 2^106 to get out the subnormal range */
let sx = x * f64::from_bits(0x4690000000000000);
let mut p = DoubleDouble::quick_mult_f64(TWO_OVER_SQRT_PI, sx);
/* now compute the residual h + l - y */
p.lo += f_fmla(-y, f64::from_bits(0x4690000000000000), p.hi); /* h-y*2^106 is exact since h and y are very close */
let res = dyad_fmla(p.lo, f64::from_bits(0x3950000000000000), y);
return res;
}
let result = erf_fast(z);
let mut u = result.result.hi.to_bits();
let mut v = result.result.lo.to_bits();
t = x.to_bits();
const SIGN_MASK: u64 = 0x8000000000000000u64;
u ^= t & SIGN_MASK;
v ^= t & SIGN_MASK;
let left = f64::from_bits(u) + f_fmla(result.err, -f64::from_bits(u), f64::from_bits(v));
let right = f64::from_bits(u) + f_fmla(result.err, f64::from_bits(u), f64::from_bits(v));
if left == right {
return left;
}
let a_results = erf_accurate(z);
if x >= 0. {
a_results.to_f64()
} else {
(-a_results.hi) + (-a_results.lo)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erf() {
assert_eq!(f_erf(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009456563898732),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010670589695636709);
assert_eq!(f_erf(0.), 0.);
assert_eq!(f_erf(1.), 0.8427007929497149);
assert_eq!(f_erf(0.49866735123), 0.5193279892991808);
assert_eq!(f_erf(-0.49866735123), -0.5193279892991808);
assert!(f_erf(f64::NAN).is_nan());
assert_eq!(f_erf(f64::INFINITY), 1.0);
assert_eq!(f_erf(f64::NEG_INFINITY), -1.0);
}
}

177
vendor/pxfm/src/err/erf_poly.rs vendored Normal file
View File

@@ -0,0 +1,177 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[rustfmt::skip]
pub(crate) static ERF_POLY: [[u64; 13]; 94] = [
[ 0x3fbb0081148a873a, 0xbc2f0295f16ba5d8, 0x3ff1e565bca400d4, 0xbc962d0ac26c78d3, 0xbfbad8189af6013d, 0xbfd7712743c42914, 0x3faaafd4760d7634, 0x3fbba14988b4127e, 0xbf91afcdb244078a, 0xbf99d72ee25cf211, 0x3f719502f7beca8f, 0x3f73b955bfd46624, 0xbf4a4e2d4d32228b ],
[ 0x3fc662a0bdf7a89f, 0xbc4ef7bc5856c2d4, 0x3ff19e5e92b964ab, 0x3c6cca4dec08a640, 0xbfc605f63767bdd6, 0xbfd6582e9b69c9a9, 0x3fb5aa32b580ec64, 0x3fb97594c2593d3e, 0xbf9c69c62749fb7f, 0xbf96fa7f611aacdc, 0x3f7bf1e628a4606e, 0x3f70e50e4329e8a9, 0xbf568ca9c1954b4c ],
[ 0x3fcf190aa85540e2, 0xbc6e522ac9f718e6, 0x3ff135e3075d076b, 0xbc6e2d8ed30e4a48, 0xbfce1e4d4ce2ccfb, 0xbfd4c04e66e0d59b, 0x3fbd2855d59988e8, 0x3fb659a35f29781a, 0xbfa2cf6266a634c8, 0xbf92ef4180b1f3fa, 0x3f823199a6da60e3, 0x3f69e80d13a3368c, 0xbf5ba4e4eff641dd ],
[ 0x3fd3c9aa8b84beda, 0x3c538ec27d3e5820, 0x3ff0ae54fa490723, 0xbc9d016b7bc67433, 0xbfd2c41f99922807, 0xbfd2b900b640a201, 0x3fc1c6c7eef8fa14, 0x3fb277ad7822021e, 0xbfa66c9b2023b9df, 0xbf8bf7e7b4e8559e, 0x3f853005de4b5751, 0x3f60737c6ba405f0, 0xbf606ccc916b15dc ],
[ 0x3fd7e15944d9d3e4, 0xbc695f819cf77862, 0x3ff00abcf3e187a9, 0x3c85860d868dc542, 0xbfd60ec3cf561a89, 0xbfd05599bafe4ecc, 0x3fc451ef6280e70f, 0x3fac06c6e434be6f, 0xbfa8e2d73679096f, 0xbf80ea4a60550d9c, 0x3f86c911882cc99c, 0x3f48c65a9990353b, 0xbf61e8a88301a7b5 ],
[ 0x3fdbccfec24855b8, 0xbc7472ab1c2b898c, 0x3fee9d5a8e4c934e, 0xbc79a002a2814a72, 0xbfd8dfd9939e37af, 0xbfcb588d8dc5bb96, 0x3fc62338788aee97, 0x3fa26cf85bc6dff9, 0xbfaa1bcaa91da902, 0xbf65b4a7d42d0f64, 0x3f86edef7de2b68d, 0xbf4037b458e2da8c, 0xbf5e8d6001a54334 ],
[ 0x3fdf86faa9428f9d, 0x3c79996c0c376e32, 0x3fecfc41e36c7df9, 0xbc79be994724ea34, 0xbfdb2c7dc535b619, 0xbfc5a9de93f9c0d5, 0x3fc7317958d24aae, 0x3f9133e02ab7d777, 0xbfaa155bbde32db8, 0x3f672049c0cc8525, 0x3f85adde5c722d85, 0xbf5b0a7ec5dc80fc, 0xbf5aa9393b806535 ],
[ 0x3fe1855a5fd3dd50, 0x3c88f6964e67d61a, 0x3feb3aafcc27502e, 0xbc7a9dd26edea8a2, 0xbfdcee5ac8e9c531, 0xbfbfa02983c853d1, 0x3fc77cd75ec73100, 0xbf5fa6f82f9333b7, 0xbfa8e0db5528e559, 0x3f800bf7062212bc, 0x3f83319e670adc9f, 0xbf658833e091aa36, 0xbf58f99b6e81e8f5 ],
[ 0x3fe32a54cb8db67b, 0xbc696221f7e18978, 0x3fe96164fafd8de3, 0x3c70887f82841acc, 0xbfde23a7ea0d187e, 0xbfb3f5ee1564be49, 0x3fc70e469de06907, 0xbf93da6878ae6fd8, 0xbfa6a0d076468415, 0x3f88cf081f1fc304, 0x3f7f6d62866525e6, 0xbf6b93149d5701a4, 0xbf51a6c1a9f7ea73 ],
[ 0x3fe4b13713ad3513, 0x3c6e944ee1b212e4, 0x3fe7791b886e7403, 0xbc6da43cb53d911c, 0xbfdecef42310f844, 0xbfa15c3c5ce705df, 0x3fc5f6890affa468, 0xbfa1da642fabd4da, 0xbfa385991202c7eb, 0x3f8fa4f37fc7c6d4, 0x3f77156b4e430998, 0xbf6f546a4377d648, 0xbf432e4e5abb1e1a ],
[ 0x3fe61955607dd15d, 0x3c898ff39319ab83, 0x3fe58a445da7c74c, 0x3c808ec8e156809b, 0xbfdef6c246a12e7e, 0x3f7e83e0da030480, 0x3fc44cc65df8bfc7, 0xbfa87d3c8dd62c82, 0xbf9f9271a8a1d4e2, 0x3f9225234c1c0a0e, 0x3f6c0b0e055a0c48, 0xbf70585251f84919, 0xbf285bfb02436e0f ],
[ 0x3fe762870f720c6f, 0x3c8118b1ba6da9a7, 0x3fe39ccc1b136d5a, 0x3c5faa9371c0dd80, 0xbfdea4feea4e5add, 0x3fa715e595343353, 0x3fc22cdbdb4cdd0c, 0xbfada50ae547e69e, 0xbf975578f87f217d, 0x3f9353319c65f251, 0x3f539db53a2d03d5, 0xbf6fc0364ce17870, 0x3f3272bc18b0f2ce ],
[ 0x3fe88d1cd474a2e0, 0x3c86f571ada77d52, 0x3fe1b7e98fe26217, 0x3c7952bd607eb12e, 0xbfdde65a22ce0587, 0x3fb40686a3f3dc2b, 0x3fbf6b0cb6926c42, 0xbfb09c7caecd317d, 0xbf8da668f759eaea, 0x3f9364e72035e80a, 0xbf4d421975736447, 0xbf6cc98454e96141, 0x3f4a8860fdf17259 ],
[ 0x3fe999d4192a5715, 0xbc8c888a5759a92c, 0x3fdfc3ee5d1524b0, 0xbc527e60faac0278, 0xbfdcc990045b293f, 0x3fbb37338e6ac814, 0x3fba0d11fe9ba61a, 0xbfb19bb2ca3816ba, 0xbf7a0b7d94791f03, 0x3f9274a59774d5e6, 0xbf664adea7b36f57, 0xbf683684bd8ef173, 0x3f538905afd229ff ],
[ 0x3fea89c850b7d54d, 0xbc8e2752ebf0cd02, 0x3fdc40b0729ed548, 0xbc7c4c1c4927306d, 0xbfdb5eaaef09de9d, 0x3fc0847c7dad86af, 0x3fb47de0a4f796ca, 0xbfb1d9de8b54a3ec, 0x3f533252fb810c7c, 0x3f90ab3e329ded2f, 0xbf712d82076274ed, 0xbf6287bb4a78d728, 0x3f557d31bd574da0 ],
[ 0x3feb5e62fce16095, 0x3c7bc3cff4400364, 0x3fd8eed36b886d93, 0x3c7ea7e17b96436d, 0xbfd9b64a06e4b100, 0x3fc2bb6e2c74d4fe, 0x3fadee322c062364, 0xbfb169960d5a983d, 0x3f7feab4ad0bfc14, 0x3f8c76eb94b07a5f, 0xbf7584474ae8f994, 0xbf588df75be9251f, 0x3f54edef50317090 ],
[ 0x3fec194b1d49a184, 0xbc66770a58b27668, 0x3fd5d4fd33729015, 0xbc76db7d76e9e97b, 0xbfd7e0f4f0454d97, 0x3fc444bc66c35bc4, 0x3fa356dbb5432550, 0xbfb0643de6e8c574, 0x3f8b2e1f789415e4, 0x3f86ba6d9f4af32f, 0xbf78138bf4573a6a, 0xbf47e6e52a583322, 0x3f50f87322fa18a3 ],
[ 0x3fecbc54b476248d, 0x3c81a5083b01ec0d, 0x3fd2f7cc3fe6f423, 0x3c79fbb4b774e85d, 0xbfd5ee8429e30a49, 0x3fc52a8395f96270, 0x3f9313759f199499, 0xbfadcf844d90282c, 0x3f91e45f25ab54a1, 0x3f8091cb68a58665, 0xbf78ea40b0ac8b7b, 0xbee6b91b1bf985f2, 0x3f5158d9c0e1c327 ],
[ 0x3fed4970f9ce00d9, 0xbc756704209fca70, 0x3fd059f59af7a906, 0xbc70ce27da57f153, 0xbfd3eda354ddd5ff, 0x3fc57b85ad436067, 0x3f58e90c2a157e8d, 0xbfaa2893b28f4033, 0x3f94d6af4484a1cb, 0x3f74ccee8c8b1f57, 0xbf783304b9e2e312, 0x3f440cb679d0a832, 0x3f4d6b5f4bdef24b ],
[ 0x3fedc29fb60715af, 0x3c8ab029f047a087, 0x3fcbf8e1b1ca2279, 0x3be0426e10a38000, 0xbfd1eb7095e57e16, 0x3fc549ea6f7a013f, 0xbf8b10f20d110552, 0xbfa61420b5b34a55, 0x3f9677b7ea46c6f2, 0x3f624f9940ffd840, 0xbf76304445e5f6ca, 0x3f5222fabfa75bb0, 0x3f3fdcf55be3c03e ],
[ 0x3fee29e22a89d766, 0x3c8bcc9d569ed217, 0x3fc7bd5c7df3fe9c, 0x3c6488f3b06e1394, 0xbfcfe674493fde22, 0x3fc4a9feacf7e222, 0xbf9a0082c90a1b0d, 0xbfa1cf0e7655f99a, 0x3f96e3396f042620, 0xbf33a2d2cdd5650d, 0xbf7334add14b9a31, 0x3f57e12864580191, 0x3f3dae75c3e2be46 ],
[ 0x3fee812fc64db369, 0x3c83c66a6a23d9a5, 0x3fc3fda6bc016994, 0x3c6586ddaff31a18, 0xbfcc1cb27861fc79, 0x3fc3b1051230b982, 0xbfa1e645a2a638ff, 0xbf9b1f643b14fd89, 0x3f964297d7a66c20, 0xbf63e365adfbccae, 0xbf6f2aa2b3ef5ec2, 0x3f5b3339ee2c8c49, 0x3f20ef5710223110 ],
[ 0x3feeca6ccd709544, 0x3c6f3de8f1953470, 0x3fc0b3f52ce8c383, 0x3c6d1234b508bcfb, 0xbfc8885019f5df29, 0x3fc274275fc87eae, 0xbfa57f7386bfd263, 0xbf930769f45aaa8b, 0x3f94c8231709cfee, 0xbf70c2c99c75913f, 0xbf67514483efc090, 0x3f5c3ebcf121a533, 0x3eede2f1801b8480 ],
[ 0x3fef0762fde45ee6, 0x3c89c3612a14fb77, 0x3fbbb1c972f23e50, 0x3c5ba69c564971e1, 0xbfc5341e3c0177b6, 0x3fc107929f6e7528, 0xbfa7e1b362eacfe6, 0xbf873b61e487b8a9, 0x3f92aa763e0343a9, 0xbf759a388fd2272d, 0xbf5eea3c7f50e8de, 0x3f5b5026fd87d0ca, 0xbf30f2c660125dc6 ],
[ 0x3fef39bc242e43e6, 0xbc8dbae0fd9b967d, 0x3fb6c7e64e7281cb, 0x3c5aa87392dc4c20, 0xbfc2274b86833f6e, 0x3fbefb890e5b6633, 0xbfa92c7dbb880b5c, 0xbf74547708842f2b, 0x3f902047ab6c08c4, 0xbf7888355239e9ec, 0xbf50313bb85e86e1, 0x3f58ced9ddf3d834, 0xbf32d520499bd799 ],
[ 0x3fef62fe80272419, 0xbc8b7c2d17fc31d3, 0x3fb297db960e4f63, 0xbc522bea9385fad9, 0xbfbecb83b087b37b, 0x3fbbce18363bbbb9, 0xbfa985aaf97891cb, 0x3f3cd95f2aa8601a, 0x3f8ab9d43270d20f, 0xbf79b93410d46789, 0xbf29b530b472cadf, 0x3f552f54de527458, 0xbf36844d43c7d693 ],
[ 0x3fef848acb544e95, 0xbc8b27aa2c376c3c, 0x3fae1d4cf1e2450a, 0xbc4783e14555c1e9, 0xbfb9e12e1fde7354, 0x3fb8a27806de834f, 0xbfa91674e13a339a, 0x3f73bc75e8f9d448, 0x3f851b4d09ac47b8, 0xbf796dc7b5f9bd66, 0x3f3e16520532bde9, 0x3f50e742b323f434, 0xbf3ac319bfed91d4 ],
[ 0x3fef9f9ba8d3c733, 0x3c8cd5790ff03ab3, 0x3fa83298d717210e, 0x3c4740e2b04276bf, 0xbfb58d101f909971, 0x3fb58f1456f7db5e, 0xbfa808d17b33b814, 0x3f80c1bdce673b10, 0x3f7f5ff1c06e9df2, 0xbf77f26b8865f398, 0x3f4f87060e6f6460, 0x3f48c6056bea9223, 0xbf3e3499a90b84f5 ],
[ 0x3fefb54641aebbc9, 0xbc879975513f67e7, 0x3fa34ac36ad8dafe, 0x3c0902fb5363d360, 0xbfb1c8ec267fe9e2, 0x3fb2a52c5d83c050, 0xbfa68541b2c0582c, 0x3f85afe422155ad5, 0x3f756303c111cd8a, 0xbf7597ead749c06a, 0x3f557b0870a7b4cf, 0x3f3ffc0efb0ac024, 0xbf39e3ea349ab39e ],
[ 0x3fefc67bcf2d7b8f, 0xbc80d2748f976e8c, 0x3f9e85c449e377f3, 0xbc3cb7ccd2616394, 0xbfad177f166cce53, 0x3fafe23b75845cdf, 0xbfa4b120f9dde895, 0x3f88d9906d138bd5, 0x3f69201b7e469e83, 0xbf72aceacb2954f0, 0x3f58d4e8140dc518, 0x3f300a33f7e93047, 0xbf372b7adfeee575 ],
[ 0x3fefd40bd6d7a785, 0x3c860d45e630998f, 0x3f97f5188610ddc8, 0xbc360e8565137ecb, 0xbfa7954423f89a51, 0x3faaf5baae337ae6, 0xbfa2ad77b77d17dc, 0x3f8a7b8c4a8d53fe, 0x3f54593adc5d737a, 0xbf6ef1cf14455c9c, 0x3f5a1a04ce289b4b, 0x3f03d14f37840954, 0xbf350b861df174ee ],
[ 0x3fefdea6e062d0c9, 0xbc764c70f379f670, 0x3f92a875b5ffab56, 0x3c0531231987c3b8, 0xbfa2f3178cd7aa03, 0x3fa68d1c45b96efe, 0xbfa09648dd332653, 0x3f8ad8b148089c02, 0xbf2f00fa01e6ca19, 0xbf68718785b34600, 0x3f59a7b0da775387, 0xbf2090258ede6532, 0xbf2b3980b454d442 ],
[ 0x3fefe6e1742f7cf6, 0xbc8cebced8a49e04, 0x3f8cd5ec93c12432, 0xbc2bb85326a5eff3, 0xbf9e2ff3aaae31e4, 0x3fa2aa4e58242520, 0xbf9d049824fc44db, 0x3f8a34eda0fc336e, 0xbf5682d8d1801582, 0xbf6239bf51e17ea8, 0x3f57e761274bf059, 0xbf301e715d70d49f, 0xbf24d89f3d9c30d5 ],
[ 0x3fefed37386190fb, 0x3c872b1549ea44ee, 0x3f861beae53b72b7, 0x3bf401790f84b248, 0xbf97d6193f2417ad, 0x3f9e947279e4a43b, 0xbf99060301092cdc, 0x3f88d14d4bdaa7f4, 0xbf61f795ac880380, 0xbf59222edb6bd145, 0x3f553f95c7b01615, 0xbf3529b07d094e1d, 0xbf15b533d0382e20 ],
[ 0x3feff20e0a7ba8c2, 0xbc603f86c5a13f78, 0x3f80d1d69569b82d, 0xbc1a5e866bd1366e, 0xbf92a8ca0dc14852, 0x3f98cc071b719c43, 0xbf954a148886e917, 0x3f86e91361df3c9e, 0xbf665c02e0d08291, 0xbf4e94b0adc3b1ca, 0x3f5210781b57b089, 0xbf37b88f8c82fbff, 0xbf068df27e9a1688 ],
[ 0x3feff5b8fb26f5f6, 0xbc87e917ec20b615, 0x3f79646f35a76624, 0xbc1f771f32fd191b, 0xbf8cf68ed932f081, 0x3f93e8735b5b73b1, 0xbf91e1611aabcbea, 0x3f84afd8cd100d70, 0xbf68c72005b1cfcf, 0xbf3c6a7216b336aa, 0x3f4d577412afc2e2, 0xbf3836a0c0e10a99, 0x3eca8f39f410252a ],
[ 0x3feff87b1913e853, 0xbc73ca98afc58454, 0x3f730499b503957f, 0xbbfd1eabb1c04f50, 0xbf86496420203331, 0x3f8fa73d7eb1b70d, 0xbf8daa3005c2d3fe, 0x3f8250942c31c3ad, 0xbf6997578dc240a8, 0xbf03904177639e63, 0x3f46a6ed488a1f54, 0xbf371cf0c5789c7d, 0x3f043cb84231ab1c ],
[ 0x3feffa89fe5b3625, 0x3c8934b2bcb7f9a3, 0x3f6c4412bf4b8f0b, 0xbbcbbcc9dca4ec60, 0xbf8100f34713740d, 0x3f88ebda0768e8e6, 0xbf8850c68e8e5c3c, 0x3f7fdac8346071b3, 0xbf6929de70d00321, 0x3f310c7101bc52d8, 0x3f4070f7e89ec1e2, 0xbf34e4b3dcf4f08d, 0x3f0f0d43b9869b19 ],
[ 0x3feffc10194fcb64, 0x3c8ea14750ac9b59, 0x3f64d78bba8ca5fd, 0x3be4d9a93566b5b4, 0xbf79ba107a459ce4, 0x3f836f273fbd909b, 0xbf83b38708f7bef7, 0x3f7b3fdff1de2112, 0xbf67d55d55d262d8, 0x3f3eae5e05e74fcc, 0x3f35ebc1e53214a9, 0xbf31fd7c1cd5d63e, 0x3f149559a04c8568 ],
[ 0x3feffd2eae369a07, 0xbc683b09df7f7db4, 0x3f5e7f232d9e2630, 0x3bfa26ac725599e5, 0xbf734c7442de142b, 0x3f7e066bed09942f, 0xbf7f914f2c60b9bb, 0x3f76f4662f6be13b, 0xbf65e664591d6604, 0x3f43a1598d880f36, 0x3f2965b2e78a4544, 0xbf2d8db42b193729, 0x3f1449172919598e ],
[ 0x3feffdff92db56e5, 0xbc78aeef4ee0690a, 0x3f56235fbd7a4345, 0xbbe11380fe434056, 0xbf6cb5e029ba8f3d, 0x3f76fa4c7ef470e9, 0xbf7903a08305eeb0, 0x3f730f12c83fdb23, 0xbf639d769a774af1, 0x3f45d79439ceaefd, 0x3f15326883e7dfeb, 0xbf27199782285958, 0x3f147181c8911603 ],
[ 0x3feffe96a78a04a9, 0xbc82816fe4528f9b, 0x3f4fe41cd9bb4eee, 0x3bde3be508cae7ec, 0xbf652d7b2896626a, 0x3f716c192d8803dc, 0xbf739bfce9b4ecc2, 0x3f6f376a554e5dec, 0xbf612e67cb7aa486, 0x3f466d6e460b1614, 0xbed54f70e4bde32b, 0xbf210e125571fe1e, 0x3f12842d46eb9f29 ],
[ 0x3fefff0312b010b5, 0x3c8155dec9cdc96b, 0x3f46caa0d3582fe9, 0xbbc97d95851163fc, 0xbf5efb729f4be121, 0x3f6a2da7cec01564, 0xbf6e6c27ad2b1ce0, 0x3f693b1f34b17723, 0xbf5d8179cd2ad34f, 0x3f45cf51e0add9bb, 0xbf116d8f4b5119c7, 0xbf1768557564f5f5, 0x3f0f4fc9dde73f24 ],
[ 0x3fefff50456dab8c, 0xbc5a197a986f0de0, 0x3f40295ef6591848, 0xbbd262bd83520706, 0xbf5679880e93e5c4, 0x3f637d38e3a705af, 0xbf675b371a264745, 0x3f64231c3bfe3e65, 0xbf58e184d4921105, 0x3f445d5b5a7f77fa, 0xbf1bf8ece4afedd2, 0xbf0ccd677aaa82f7, 0x3f09e5241d5b6b15 ],
[ 0x3fefff86cfd3e657, 0xbc72e06adb26f84e, 0x3f36be02102b3520, 0x3bb448bcfd3cfe0c, 0xbf502b15777eb7c5, 0x3f5cc1d886874d5b, 0xbf61bff70664651d, 0x3f5fc0f76c943696, 0xbf54a22286622d3e, 0x3f4268887688a6e6, 0xbf20fa2692fd7da2, 0xbefcc13d1a82f742, 0x3f04153e6537aae5 ],
[ 0x3fefffad0b901755, 0x3c670d5c9a92b65c, 0x3f2fc0d55470cf51, 0xbbc6f2b03553d4c8, 0xbf47121aff59f6a1, 0x3f5506d6992fc8ff, 0xbf5ab596015fc183, 0x3f58bdd79a098723, 0xbf50d88da9deb868, 0x3f4031cdd07e4507, 0xbf222fc41430a37d, 0xbedb5cc9546afcec, 0x3efd7ea1c7b8fdb6 ],
[ 0x3fefffc7a37857d2, 0xbc797b30fd4b6b48, 0x3f25feada379d8b7, 0xbbc0546c4da57036, 0xbf405304df546ed8, 0x3f4e79c081b79ebc, 0xbf53e5dc1062db15, 0x3f530eb20ccc1f98, 0xbf4b1b06c20a060d, 0x3f3bd52fbd55e0ef, 0xbf2214afb8835b23, 0x3ee19ae9d16650a0, 0x3ef42d933ee154fd ],
[ 0x3fefffd9fdeabcce, 0x3c80c43c3bc59762, 0x3f1e3bcf436a1a95, 0xbba6458a28a3f9b6, 0xbf36e95311166825, 0x3f45e3edf674e2db, 0xbf4d5be6d15abe3a, 0x3f4d07da13e640c2, 0xbf458106cc648748, 0x3f376c840985e5eb, 0xbf2111de112b1a2e, 0x3ef315fc34053fbd, 0x3ee939439a75a553 ],
[ 0x3fefffe68f4fa777, 0x3c32f21786b76440, 0x3f149e17724f4d41, 0x3ba747684f0023e4, 0xbf2fe48c44d2ab81, 0x3f3f2bd95d72a532, 0xbf457389188a71a9, 0x3f45decc4058f7a1, 0xbf40d559cf0f2957, 0x3f33583904af6f83, 0xbf1efd7979333337, 0x3ef904cf9fa5c1f6, 0x3eda13a094bd56a2 ],
[ 0x3fefffef1960d85d, 0xbc8f7cc78053f6ad, 0x3f0be6abbb10a5aa, 0xbb9e50b219d40126, 0xbf260403819b22b8, 0x3f35fff1dde5305e, 0xbf3f0c93c73e7f42, 0x3f404cbf67af6c26, 0xbf3a04893510426c, 0x3f2f66b51a7bc4a0, 0xbf1b410d7f2fd319, 0x3efb99f9eb427956, 0x3ebf26fcffb14441 ],
[ 0x3feffff4db27f146, 0x3c8ddecdd5e1d408, 0x3f02bb5cc22e5db6, 0x3b9c5112eca8acde, 0xbf1e258948829ed1, 0x3f2ec8a8e59d9d5b, 0xbf36425722b9f3cd, 0x3f380a83a7103b4b, 0xbf33dbb9374004f9, 0x3f2913b301d37bde, 0xbf17563b0d94459f, 0x3efbc01eea9a10be, 0xbeb3df26463df6a5 ],
[ 0x3feffff8b500e77c, 0xbc71014e1f83ed4c, 0x3ef8f4ccca7fc90d, 0x3b9a5d4ec8b9de43, 0xbf1478cffe1cd2ed, 0x3f2559f04ad4de62, 0xbf2f9e163b15c466, 0x3f318bda8b8c1315, 0xbf2df381bd3c058e, 0x3f23b94f531bb6be, 0xbf1385f32481ed94, 0x3efa414bd2b7cb3c, 0xbecac2bbe30f8767 ],
[ 0x3feffffb43555b5f, 0x3c8c17f83b8d73a2, 0x3ef07ebd2a2d2844, 0x3b9d1bbdc704f49b, 0xbf0b93e442837f52, 0x3f1d5cf1514977f3, 0xbf263f5eb46877fd, 0x3f295a0411e668b1, 0xbf2652e5f2a88269, 0x3f1e950ddb7f5444, 0xbf0ffeb9383bdb3d, 0x3ef7c24392346fdd, 0xbed1f3b3254d7230 ],
[ 0x3feffffcf23ff5fc, 0xbc8b18a8b25039c4, 0x3ee5a2adfa0b4bc4, 0x3b8eb6d61aaaf95c, 0xbf026c8826ed9e85, 0x3f140473571d5383, 0xbf1f057dbf365c0a, 0x3f22217929fed933, 0xbf207324014ddb42, 0x3f1762758a56d654, 0xbf09ba250c662e90, 0x3ef4c25759179e3d, 0xbed3e800358f1a7b ],
[ 0x3feffffe0bd3e852, 0xbc5d7ece4ab53150, 0x3edc282cd3957eda, 0x3b6eb3cf4fd14280, 0xbef86ad6df7ba401, 0x3f0b0f313eeb65a6, 0xbf156e457745d637, 0x3f19ad1f65a78253, 0xbf17f92ad8542929, 0x3f11a5578c0d30b3, 0xbf04548d876bb0a3, 0x3ef19e60bf53b25a, 0xbed3f1745170e2d3 ],
[ 0x3feffffec2641a9e, 0xbc8e7ba4fdaaa8c8, 0x3ed22df298214423, 0xbb5a9d49552152a4, 0xbef00c902a4d5e27, 0x3f022234eb745941, 0xbf0d57a2be01db67, 0x3f1200c2ffad65f1, 0xbf1147585d43f49a, 0x3f0a4b07aec797e9, 0xbeff9d088bbeff64, 0x3eed2b2be4e42422, 0xbed2bb57c0cf2941 ],
[ 0x3fefffff37d63a36, 0xbc6753e3241c01b0, 0x3ec74adc8f4064d3, 0x3b6de8a904d5c372, 0xbee4ed4228b3da96, 0x3ef81918baca1979, 0xbf03e81c09c29601, 0x3f09004afed1bde9, 0xbf08a40e183ee3fc, 0x3f0359242a8b8c58, 0xbef834b953bcb845, 0x3ee79e345fb0b20d, 0xbed0bb2d323900ce ],
[ 0x3fefffff82cdcf1b, 0x3c8046bbe9897fd5, 0x3ebd9c73698fb1dc, 0x3b588de36481dfb5, 0xbedb11017e7d5893, 0x3eefc0dfadc2c6d6, 0xbefac4e1aa499ac6, 0x3f0131810ab2e2e3, 0xbf01629d94abc864, 0x3efc22a71036c259, 0xbef244452f74de31, 0x3ee2bf17664310c1, 0xbeccd1b31a8349be ],
[ 0x3fefffffb248c39d, 0x3c89b9a41713558c, 0x3eb2acee2f5ecdb8, 0xbb32d1692a9a105c, 0xbed15cc5700a2341, 0x3ee4be757b934819, 0xbef1d6ab6f8cbf7c, 0x3ef76c5a3035bdab, 0xbef847332578dfac, 0x3ef437f23f8d25ff, 0xbeeb305e625a092d, 0x3edd3886ff986fef, 0xbec81f2189b385a2 ],
[ 0x3fefffffd01f36af, 0xbc8d41915db812ef, 0x3ea75fa8dbc84bec, 0x3b3a5cd79572a1a6, 0xbec6186d9fc357c5, 0x3edae02322e08822, 0xbee79082befd50ca, 0x3eef9c26e211b174, 0xbef0c768235c378b, 0x3eecba7164e1064f, 0xbee3f75c28c31ac8, 0x3ed663fcfff77e44, 0xbec3a6da35f36ee6 ],
[ 0x3fefffffe2ba0ea5, 0xbc826cd7908cba2b, 0x3e9d06ad6ecdf971, 0xbb3020b74d9d30fb, 0xbebbe46aa879edb2, 0x3ed143860c49d129, 0xbededabcbc3e620d, 0x3ee52139c87e9c82, 0xbee6f567cd982028, 0x3ee42ebd266abd62, 0xbedcf2f0c6adfb3e, 0x3ed0e2c0ed67786c, 0xbebf50cb81b9b190 ],
[ 0x3fefffffee3cc32c, 0x3c7e429188c949b8, 0x3e91e1e857adc568, 0x3b32439f8a1649bb, 0xbeb1769ce59fb2c8, 0x3ec5fe5d47560794, 0xbed405da04875e51, 0x3edbfc96a938083d, 0xbedf19ff5e59cbe9, 0x3edc0c4d50d275bf, 0xbed4b9df120462ae, 0x3ec916640ee35de4, 0xbeb874483d99c37e ],
[ 0x3feffffff54dab72, 0xbc8a443df643729a, 0x3e85dcd669f2cd34, 0xbb1ceb1ec59e0c28, 0xbea5b11cbd1ee799, 0x3ebbc91a6b1c1839, 0xbec9c2c5d12dfa2c, 0x3ed25d1e3c70364f, 0xbed4dbe26c88e4f7, 0x3ed347bb8350b422, 0xbecd51d3280da8a0, 0x3ec25ed8e5b466b5, 0xbeb2b9c5d3390919 ],
[ 0x3feffffff99b79d2, 0xbc758ff1c425f8de, 0x3e7a854ea14102a9, 0xbb121745e4b4fcb3, 0xbe9aba593e8384ae, 0x3eb167c252a45678, 0xbec06d78ca0424a3, 0x3ec7e0f59fcfa53d, 0xbecbb4d48383b847, 0x3eca39f3ad9a397f, 0xbec47e836879c374, 0x3eba89244d14b829, 0xbeac33e15a6dbe37 ],
[ 0x3feffffffc355dfd, 0x3c688cb60fd4511c, 0x3e6febc107d5efab, 0xbaed9ed10902067c, 0xbe9055a3c70279a4, 0x3ea59ff37766e9a7, 0xbeb4c53adb9dcc4d, 0x3ebec49242997849, 0xbec23927ad6ac54f, 0x3ec1a6e0676c7463, 0xbebc5239f6a88a96, 0x3eb2e991308bf6fa, 0xbea4e276c09fe81b ],
[ 0x3feffffffdc4ad7a, 0xbc8d75de787812d4, 0x3e630f93c3699079, 0xbaf8f941ab38e9da, 0xbe83ce2f890bb01d, 0x3e9aa5010863c83b, 0xbeaa08ef1ca16360, 0x3eb3a4a6af3cafac, 0xbeb7be1e832218f0, 0x3eb784775c30c386, 0xbeb3593046482ce3, 0x3eaa9d448178fbfd, 0xbe9e77bb85451c65 ],
[ 0x3feffffffeb24467, 0x3c8bff89ef33d6dd, 0x3e56961b8d641d07, 0xbaf74a7fc97b1544, 0xbe77d2510f1f969d, 0x3e90476b165ac852, 0xbea02d3a3b9d195e, 0x3ea8db3567bef1df, 0xbeaea3ef4e3a126b, 0x3eaf03b0861a59ac, 0xbeaa250ca467705a, 0x3ea27e9995f6dfcd, 0xbe95e77b673c6d74 ],
[ 0x3fefffffff3e8892, 0x3c5befbf8d294678, 0x3e4a8e405e651ab7, 0x3ab167a2d8cf6b18, 0xbe6c6c40e5083698, 0x3e83ba47a17512fd, 0xbe93ee334beef6ec, 0x3e9f2bf9e6c43e99, 0xbea395c08ac8e281, 0x3ea43ee4b521ccad, 0xbea178f0deeb9b20, 0x3e9964e51b0f0532, 0xbe8f0cc4ecca5c2f ],
[ 0x3fefffffff90b2e3, 0xbc7d82d94a90f1e4, 0x3e3efac5187b2864, 0x3acf1301ae680614, 0xbe60d229044adeee, 0x3e77b5bc9db47d00, 0xbe88588212e670c2, 0x3e935f42db1989fa, 0xbe98cd98865c4ff0, 0x3e9a2b8587c48078, 0xbe971aa2de99af9c, 0x3e913a89805c15d9, 0xbe85b53ca1bcf01a ],
[ 0x3fefffffffc0748f, 0x3c66ef7a9caef280, 0x3e31edfa3c5f5ccb, 0x3ac368f60e2e6cfa, 0xbe53c025a6810c37, 0x3e6c42f78a0989ad, 0xbe7d7c6c3583c6e3, 0x3e87dd6ccb5c93b4, 0xbe8f1ec2f699fdcc, 0x3e90bf7a04407a8c, 0xbe8e3aafe6dfd4e0, 0x3e871bc3a55b63f4, 0xbe7df66b11724e7c ],
[ 0x3fefffffffdbff2a, 0x3c749438981099b2, 0x3e24979ac8b28928, 0xbacc2f44bcf3ce52, 0xbe47015eec37753a, 0x3e60b487791590cf, 0xbe71b44b64c3c995, 0x3e7d23ff3ef8dd83, 0xbe8357d673d1ccfc, 0x3e853a563ce0e9e3, 0xbe83921106a960f6, 0x3e7ea527d318f96e, 0xbe746bd6cea7103d ],
[ 0x3fefffffffebc1a9, 0x3c7e0e5facabfab4, 0x3e177756ec9f78fb, 0x3aae20366d0e0306, 0xbe3a9530780ca70c, 0x3e53962ecb10df65, 0xbe651494525dee64, 0x3e71a2961b90efb0, 0xbe77d35cd0b404bf, 0x3e7aa596d9d73afb, 0xbe791493d8d43ba2, 0x3e74184505343c2d, 0xbe6b7d977f1a3402 ],
[ 0x3feffffffff4b453, 0x3c859b25048a61cc, 0x3e0a887bd2b4404f, 0xba82556d8ad4dd44, 0xbe2e78be33fb01da, 0x3e46c6ef0b68629e, 0xbe58e36e9a44c497, 0x3e65286ee37c531e, 0xbe6d146395886537, 0x3e7090902855d5f0, 0xbe6fd0d1e8fcb6df, 0x3e6a10f65c3c5a7b, 0xbe624888c323daf3 ],
[ 0x3feffffffff9bec8, 0xbc76755054654b62, 0x3dfdc479de0ef004, 0xba9c3434581af3b8, 0xbe21535aee3eb1b2, 0x3e3a4547ed264758, 0xbe4d2308d0dead0f, 0x3e5929d46a9a7edc, 0xbe6195dbfd4afd19, 0x3e646630f49ccd2f, 0xbe63fa4637c64ebc, 0x3e60b98a6e0cfc02, 0xbe58093f032972f3 ],
[ 0x3feffffffffc901c, 0x3c69c951c943961c, 0x3df0916f04b6e18d, 0x3a81bdf9650721ea, 0xbe138b90f78fbe14, 0x3e2e0d7765326885, 0xbe40e9760d0ac127, 0x3e4daad91166722d, 0xbe5513c51b9838ed, 0x3e58e27fb85ba534, 0xbe58d6f6bd99eaff, 0x3e553c31e52fff08, 0xbe4f3bfd31796bc0 ],
[ 0x3feffffffffe202d, 0x3c8a54841f566a61, 0x3de24caf2c32af16, 0x3a802e3358112fa1, 0xbe05dfa962d49548, 0x3e210ca1ff2af812, 0xbe3377c7e98dd9b4, 0x3e4156649e0b5dd2, 0xbe49092f4db426c5, 0x3e4e12a29b227972, 0xbe4e94e18d5271a9, 0x3e4aae38927ee69b, 0xbe441121b0293be1 ],
[ 0x3feffffffffefc57, 0xbc68225a9658ef84, 0x3dd40dfd87456f4f, 0xba7a6d5c55f8e63b, 0xbdf848f101ce14c8, 0x3e132fed47f8dd28, 0xbe2638ff4a6975f2, 0x3e3416d25168a6b8, 0xbe3d78fb22f58668, 0x3e42009c6b4e61ea, 0xbe42a459e59c850b, 0x3e4096a3e8dac0ea, 0xbe397fba69de37d8 ],
[ 0x3fefffffffff748e, 0x3c6ae15e36044aac, 0x3dc5ce9ab1670dd6, 0x3a4cc9bbfb723fc4, 0xbdeabf69bd9866f7, 0x3e056ae1e8abbbbf, 0xbe1927ca04d1a7a8, 0x3e2713d3b07d7a36, 0xbe31318f5d7d717b, 0x3e355ab94fdfd1f4, 0xbe368216fb90717a, 0x3e346ad5ce577d65, 0xbe30065a20073e81 ],
[ 0x3fefffffffffb5b0, 0xbc850fb19119064f, 0x3db7872d9fa10ab2, 0xba57760afdf543a4, 0xbddd39eaac4a0b47, 0x3df7b67ab8af33d6, 0xbe0c3ced54e694ea, 0x3e1a4875d8a47f12, 0xbe23e213e6f5c296, 0x3e2919137301f897, 0xbe2aea6bd9b34930, 0x3e28e06e4ab5925f, 0xbe23ed1d979421b2 ],
[ 0x3fefffffffffd8b3, 0xbc65182469c211e0, 0x3da92ff33023d5c3, 0xba42932180032bd1, 0xbdcfae4fe28d12dd, 0x3dea0a80964d6e97, 0xbdff6f47be478e2a, 0x3e0dad968cdacb13, 0xbe16ca68a8bfdb81, 0x3e1d3a79e5305b4a, 0xbe1fe1534ebf69c7, 0x3e1e01ee76d92779, 0xbe1883ed9069f3fd ],
[ 0x3fefffffffffeb60, 0xbc74d3f53e684bf8, 0x3d9ac0f5f322937a, 0xba38e8ab19224e58, 0xbdc108dc99cf03e5, 0x3ddc5db17016a0c6, 0xbdf159f41ea079c3, 0x3e009ced3e9b7204, 0xbe09e4dace066800, 0x3e10dd5e0e9749b6, 0xbe12b3aa6599d0b5, 0x3e11eb5e8e4ffe8f, 0xbe0dd8955967ed31 ],
[ 0x3feffffffffff542, 0x3c6b57ed63ed8110, 0x3d8c324c20e337e5, 0x3a253fd8abf42ed9, 0xbdb22c6b11327305, 0x3dcea5f66f89cbd4, 0xbde2ff1e0a81bedc, 0x3df270ddbd8e501f, 0xbdfd2992b5c25c93, 0x3e03492d76bdf266, 0xbe05bc7361853dde, 0x3e053121ae3f1d2e, 0xbe01fb0f7e3f242b ],
[ 0x3feffffffffffa73, 0xbc76fead614b7934, 0x3d7d7c593130dd16, 0xba08e78574fe0514, 0xbda33c1e2f16e037, 0x3dc06c53fdc74764, 0xbdd4a029a87915ac, 0x3de44bd86238ff0d, 0xbdf0474ac3a80072, 0x3df5db2a89e9bc47, 0xbdf906f4b51a7f75, 0x3df8d189784c1f50, 0xbdf571a4760f483d ],
[ 0x3feffffffffffd27, 0x3c719e1a84064c56, 0x3d6e9810295890f9, 0x3a0f998d55766fdb, 0xbd943262ab4b77b2, 0x3db1756eae580a28, 0xbdc6359d5b0d251e, 0x3dd626391bd58994, 0xbde203efc6c9f556, 0x3de88c0b111be900, 0xbdec8ca211a38811, 0x3decc911f684d612, 0xbde950e3edf09a71 ],
[ 0x3feffffffffffe8d, 0x3c5e766e2c801398, 0x3d5f7f338086a87b, 0xb9ddfa0c27b527e0, 0xbd8509f766d9f287, 0x3da268e278ede221, 0xbdb7b7b43e9a1b0e, 0x3dc7f7aadab6b398, 0xbdd3c3cc6aafba0b, 0x3ddb52c69b4ab6de, 0xbde0222c438d1182, 0x3de0888e14314f83, 0xbddd96aaea63b362 ],
[ 0x3fefffffffffff45, 0xbc85948eec884df5, 0x3d501647ba79874e, 0x3986d5d39dabc300, 0xbd75be1cf20840dc, 0x3d93418096320daf, 0xbda91e9beb94b447, 0x3db9b762261756a7, 0xbdc57f320a630c91, 0x3dce24b78ce82b11, 0xbdd2112fff5c77aa, 0x3dd2cfdd93a41786, 0xbdd11ea1f35b4d2b ],
[ 0x3fefffffffffffa2, 0x3c6d07509a1a9440, 0x3d404e15ecc7f401, 0xb9d0858e34f7a6a6, 0xbd664ac1f9b95f96, 0x3d83fa8302ade993, 0xbd9a62b70897719e, 0x3dab5c619266e9f0, 0xbdb72de32129cbb8, 0x3dc07ae94305c398, 0xbdc40c45a9e95152, 0x3dc533d127efdf16, 0xbdc39dc242ba4cda ],
[ 0x3fefffffffffffd1, 0x3c83b6fc0b729759, 0x3d3065b9616170e1, 0xb9c49459f5147526, 0xbd56acaa58a8be12, 0x3d748fb92d0947e7, 0xbd8b7ce1a1ea8ea5, 0x3d9cddc552bbebeb, 0xbda8c751cc1a5784, 0x3db1dc79b52007b0, 0xbdb60b3d17e7714c, 0x3db7ac1d379afc28, 0xbdb641ca84798564 ],
[ 0x3fefffffffffffe9, 0xbc55fe91226dd510, 0x3d205ca50205d279, 0xb9c7a281f9edb8e6, 0xbd46e18ec0d42451, 0x3d64fdb051100a15, 0xbd7c66b3f3fe565e, 0x3d8e331281475b54, 0xbd9a42e6965b2b9a, 0x3da3301ef4931960, 0xbda804fcc1524d74, 0x3daa2ef0c13a3daa, 0xbda9028a915f98d3 ],
[ 0x3feffffffffffff5, 0xbc8238f8ed17d9b3, 0x3d10330f0fd69931, 0x39ba2c00e0c6dcba, 0xbd36e8334c65749d, 0x3d5541d561058477, 0xbd6d1ac042ada69e, 0x3d7f54864c5a530e, 0xbd8b984c73c1d301, 0x3d946ec7009c291f, 0xbd99efc2df737760, 0x3d9cb12ac38f37ca, 0xbd9bd54fcd67b8d4 ],
[ 0x3feffffffffffffb, 0xbc8efa4d64f59f62, 0x3cffd3de10d6287a, 0xb99e1fdae91c5cfe, 0xbd26c073be0916e6, 0x3d455a8eab9e129a, 0xbd5d94c87c1bc304, 0x3d701db0818bec24, 0xbd7cbfbe4c0ef6ee, 0x3d859179d8c519c4, 0xbd8bc172710440bd, 0x3d8f26a4f726814e, 0xbd8ead889e052555 ],
[ 0x3feffffffffffffd, 0x3c86be96953fe014, 0x3cef05e82aae2be2, 0xb98070a8237b4337, 0xbd166b44c6d7ddb6, 0x3d35474bd9d072f3, 0xbd4dd1e8c33100cc, 0x3d60711486984913, 0xbd6db2522b66a6ce, 0x3d76919a06329739, 0xbd7d6fe8f87926e8, 0x3d80c1488010ff5c, 0xbd80bf9fa407e9ab ],
[ 0x3fefffffffffffff, 0xbc80fecc5ed770de, 0x3cde00e9148a1d52, 0x394f7a503c7a2ad8, 0xbd05eaaa4200e355, 0x3d25088b6566fced, 0xbd3dd0b48e0f634e, 0x3d50a27116d7478e, 0xbd5e6a3e1d5c214f, 0x3d6769249755a4bc, 0xbd6ef16049050b69, 0x3d71dbf2744f66db, 0xbd721c636bd8f5a9 ],
[ 0x3fefffffffffffff, 0x3c8989c6c5d51227, 0x3ccccaaea71ab110, 0x394152f323a1f3b4, 0xbcf541a2f15eb476, 0x3d149fd53e85cdf3, 0xbd2d9144beee6b4a, 0x3d40b09b02f533a1, 0xbd4ee312fcf48076, 0x3d5812ed2f01f60a, 0xbd601e6391f47ad7, 0x3d62dce8f6b8c896, 0xbd6365d5011db0df ],
];
#[rustfmt::skip]
pub(crate) static ERF_POLY_C2: [[u64; 27]; 47] = [
[ 0x3fcac45e37fe2526, 0x3c648d48536c61e3, 0x3ff16e2d7093cd8c, 0x3c9979a52f906b4d, 0xbfca254428ddb453, 0x3c69c98838a77aea, 0xbfd59b3da8e1e176, 0xbc41f650c25d97b0, 0x3fb988648fe88219, 0xbc55aecf0c7bb6c1, 0x3fb803427310d199, 0xbc5a14576e703eb2, 0xbfa09e7bce5592c9, 0x3c3eb7c7f3e76998, 0xbf9516b205318414, 0xbc2941aa998b1fa4, 0x3f8038d3f3a16b57, 0x3f6e19d52695ad59, 0xbf59542e7ed01428, 0xbf41f9b6e46418dc, 0x3f30796a08a400f4, 0x3f12610d97c70323, 0xbf025d31d73f96d1, 0xbee05e1fa9e02f11, 0x3ed1e616f979139c, 0x3ea9b3d54f1f222a, 0xbe97ad96beea439a ],
[ 0x3fd5da9f415ff23f, 0xbc4a72e51e191950, 0x3ff05fd3ecbec298, 0xbc9f17d49717adf8, 0xbfd477c8e7ee733d, 0xbc792236432236b7, 0xbfd1917b60acab73, 0x3c7c06e6c21b4b3b, 0x3fc322a728d4ed12, 0x3c3ffa8aef321410, 0x3fb04c50a9cd2c12, 0xbc4edd0562dce396, 0xbfa7ce764eeddd86, 0x3c29afeb391c029c, 0xbf868aac5801171d, 0x3c24f9655411fc03, 0x3f862aa895f51cd3, 0x3f56c003c3cedb10, 0xbf6079502dbbafff, 0xbf1d9c7cbb799b47, 0x3f345a995aede3f4, 0x3eb0c04ea8c98fc9, 0xbf057edfa53128d0, 0x3eba96286bf3ef56, 0x3ed3c8ab12e6d24b, 0xbe97454eba0cb203, 0xbe8f02a6f6847617 ],
[ 0x3fddb081ce6e2a48, 0xbc77ff0a3296d9cb, 0x3fedd167c4cf9d2a, 0x3c844f2832f90a97, 0xbfda173acc35a985, 0xbc3c5432c9a22740, 0xbfc889a80f4ad955, 0xbc6f6123bf467942, 0x3fc6c2eea0d17b39, 0xbc61f4935c3cf5b1, 0x3f9b0645438e5d17, 0x3c37a5f08ebaf9d0, 0xbfaa3fd9fcbb6d6d, 0x3c494a1b58b5916f, 0x3f2060b78c935b8e, 0x3bb9cec375875a1c, 0x3f8678b51a9c4b0a, 0xbf51e03bfc8eebb4, 0xbf5e653535cab33f, 0x3f355f31366d2c5c, 0x3f30dcf1445cbb88, 0xbf1098913ad4dcc7, 0xbeff6e252329eeed, 0x3ee41ad0a5afe51d, 0x3ec8fd4609222f1c, 0xbeb4465926de1a35, 0xbea407a1f42b46d4 ],
[ 0x3fe25b8a88b6dd7f, 0x3c89534a3b5bd215, 0x3fea5074e2157620, 0x3c7fad8c0ef6fae6, 0xbfdd9a837e5824e4, 0xbc71d19ec86adc7c, 0xbfb9c41d1d5fae55, 0x3c374c230d6afba4, 0x3fc75bebc1b18d1c, 0x3c501ece95d4dffc, 0xbf86410ad9332666, 0xbc216523d167a40c, 0xbfa7df8890b11fa7, 0x3c4d6a99d1387564, 0x3f84a54816d3608a, 0xbc2f810ad06699cc, 0x3f818f36eb18f3d7, 0xbf68d661c030e174, 0xbf53628ede23e249, 0x3f4438eb2b3c4d27, 0x3f1fd3c13e725e91, 0xbf1991b866a32c87, 0xbee1237c600dab6f, 0x3eea9c701140d4c0, 0x3e71801e61adfdda, 0xbeb785516863e6ce, 0xbe83e033ef590125 ],
[ 0x3fe569243d2b3a9b, 0x3c78eef7012e8df4, 0x3fe681ff24b4ab04, 0xbc5dba6493354c70, 0xbfdef2bed2786b25, 0xbc7ae3f6b6b2b679, 0xbf8a4254557d722f, 0xbc20ff7bffd10053, 0x3fc532415c267962, 0x3c62eacc4bd2e841, 0xbfa558b4c55a835c, 0x3c3c21c40815d70a, 0xbfa1b7ad5b777f1b, 0xbc42115b2bd8d644, 0x3f91201d3bd0e758, 0xbc0b39b845442560, 0x3f72995e3a88a890, 0xbf70294c3e93cdb0, 0xbf3159644a564f83, 0x3f463daf9b3858ef, 0xbf03beeb4a1255ac, 0xbf180c5178c36c72, 0x3eed4f6f5bab7dfa, 0x3ee521deb6d2f46e, 0xbec4ef3208231a8b, 0xbeae7d2b4e06e4a2, 0x3e921536d5b8bdf9 ],
[ 0x3fe7fb9bfaed8078, 0x3c766cf14bcad032, 0x3fe2a8dcede3673b, 0xbc77378e2c70325e, 0xbfde5267029187c0, 0x3c7add23841b110a, 0x3fafe0796bb9d05a, 0xbc37a992e13ce574, 0x3fc0fa23021ad0ac, 0xbc417f4228359928, 0xbfafa21ebca76761, 0x3c3278ca2820f66c, 0xbf931546d5c4edb4, 0x3c136fcf151892a0, 0x3f937e5469efb7a6, 0xbc39553630321d4f, 0x3f2097966e2e87ea, 0xbf6e82ab020887a7, 0x3f4318270c11ae74, 0x3f412652e433da97, 0xbf24dc9bd6368bb8, 0xbf0c441138d4ff53, 0x3efc91d8dc5b66ec, 0x3ecf3ba57b86d474, 0xbecdd3403d11a818, 0xbe731f497a106a7c, 0x3e9436dbcc93d342 ],
[ 0x3fea1551a16aaeaf, 0x3c6a558a46df5f68, 0x3fddfca26f5bbf88, 0xbc6ddcbaf85587b6, 0xbfdc1cd84866038f, 0xbc7200885b97f453, 0x3fbe4c9975da0987, 0xbc5f162e7576c79c, 0x3fb747e31bf47af3, 0xbc56178f12d62ed9, 0xbfb1d1f00109e42a, 0x3c5002b06e023544, 0xbf647654175ceb42, 0x3bd683389ccacfa8, 0x3f91a817c594b8cb, 0x3c336ac477166efb, 0xbf6cb8acd699cca6, 0xbf657b72bf874db6, 0x3f524493dca8b6fa, 0x3f2f556774c6aaf6, 0xbf2b09ec5c8ba626, 0xbed09bd1a09f38e8, 0x3efd149c3e776976, 0xbec8f7c2a6575e92, 0xbec8391d4afaf16a, 0x3ea5a7552081d1d5, 0x3e932d1bb2d1d0ca ],
[ 0x3febbef0fbde6221, 0xbc8322c1148e0d48, 0x3fd75a91a7f4d2ed, 0x3c56eb826a9df85c, 0xbfd8d03ac274201c, 0x3c57a5c56eb7f6a0, 0x3fc3954778d6a0df, 0xbc5863eca74d1838, 0x3fa88e0f7b183fc6, 0x3c4226527d05ce39, 0xbfb0f7c15f75ee13, 0xbc156f74f3513660, 0x3f85e22cfa1aab51, 0x3c24b49a250c6474, 0x3f89ad28c5557c22, 0x3c299920b730ecd5, 0xbf7704ec5d29fc83, 0xbf523360304f19ba, 0x3f543ca3fcdf079d, 0xbf0dcb97a9e04bd4, 0xbf2735e26c43d267, 0x3f0360c3b06ffbb4, 0x3ef29a6b5798e781, 0xbeddbc35e4cf98f5, 0xbeb2f6e8e81287bb, 0x3eaeeb2fdddad355, 0x3e81ae65e387ac52 ],
[ 0x3fed0580b2cfd249, 0x3c84fca6318dfee9, 0x3fd1a0dc51a9934d, 0xbc6ca89d2d78fba4, 0xbfd4ef05a0f95eeb, 0xbc65f7c55a00231c, 0x3fc5648b5dc47417, 0xbc6fb8fa09976e07, 0x3f840fbaba44504c, 0x3bf435c75f61f1e0, 0xbfac0db89d0a41a4, 0xbc11dd02d9441b98, 0x3f9388c3ec056942, 0x3c38e7498172c914, 0x3f7aecb7463cf446, 0xbbe0d6701a009d70, 0xbf78bca53327e075, 0x3f34add4a8239f4a, 0x3f505ce4abd10484, 0xbf3183f198a0b620, 0xbf19cd1a9b9fc69b, 0x3f0d30363021af83, 0x3ecda66f2161c4c6, 0xbedf41f1f238827d, 0x3ea725a07b1177b7, 0x3ea84c3b2483eb6a, 0x3e6e30e89d6e85cd ],
[ 0x3fedf85ea8db188e, 0xbc8f71e8254d11a9, 0x3fc9cb5bd549b111, 0xbc4973e73caa1edc, 0xbfd0ed7443f85c33, 0xbc574bf040302ad8, 0x3fc5066cda84bba9, 0x3c4beb86d9e281a8, 0xbf9419fa10b6ed7d, 0x3c35157491034c58, 0xbfa3f41761d5a941, 0x3c494a1c1f7af153, 0x3f96d1d724baaae4, 0x3c3c41090a704426, 0x3f4e377f5703f7ff, 0xbbea753be0c53963, 0xbf74cc916ad63c27, 0x3f5553ef0d12719f, 0x3f426240f55987fd, 0xbf36bbf0fffb7138, 0xbee320cf6663c40d, 0x3f0a9d4850aaa197, 0xbee17036c4011c91, 0xbed441ea26a91a02, 0x3ebd81eb8e2ef452, 0x3e917d7b798a4322, 0xbe4b7b0dfb2559d0 ],
[ 0x3feea7730ed0bbb9, 0x3c82c5bd7ce1388b, 0x3fc24a7b84d38971, 0x3c6aa0c5e788ed5e, 0xbfca4b118ef01593, 0xbc3238e3e6a99de0, 0x3fc319c7a75f9187, 0x3c4a8f8fff24b0ac, 0xbfa3db5bed47faf6, 0x3c429cf699c8512c, 0xbf97019bda6c2fdd, 0x3c1dd56b84622d88, 0x3f959d3aa402c32e, 0xbc08de701f1e95e8, 0xbf6b324eab9c87a9, 0xbbec3a4329771a44, 0xbf6b4774d37d0dd6, 0x3f5c01377485a844, 0x3f1a5db5f627539b, 0xbf340d9c429b8932, 0x3f0e720d935ef7db, 0x3effc8295ac052de, 0xbeed1ccde95c6551, 0xbeb251c256ca45cb, 0x3ebe892cc5397b1b, 0xbe88f6831febdf3d, 0xbe9aa5ef30a52421 ],
[ 0x3fef21c9f12f0677, 0xbc57efe429672268, 0x3fb92470a61b6965, 0x3c5c6acd40cee352, 0xbfc3a47801c56a57, 0xbc6033705aa16f01, 0x3fc0453f90d3bd35, 0xbc6686e281ba5405, 0xbfa8a7c6a239217b, 0x3c32a988808a7222, 0xbf8075c088031ee3, 0xbc1665bd0a645f40, 0x3f916f9c9c127b80, 0xbc1e1813af47374c, 0xbf774c2fc9bdfe97, 0x3c15cf2dbe53783b, 0xbf5760c522bd5bec, 0x3f5a3cdb656adb44, 0xbf302c3c1ab0a7ba, 0xbf292892013c7e15, 0x3f16e7b268d42034, 0x3ed970751eb9359f, 0xbeeb00b549bbdf58, 0x3ec033f8545bcc6a, 0x3eb2d8b6f0a2204a, 0xbe9c1c1335b105c5, 0x3e661bbb2d003b8a ],
[ 0x3fef74a6d9a38383, 0x3c8c33a329423946, 0x3fb0bf97e95f2a64, 0xbc5446051f6fef82, 0xbfbc435059d09788, 0xbc4b93aeb5e5cf84, 0x3fba3687c1eaf1ad, 0x3c564513fb767a13, 0xbfa9647a30b16824, 0xbc486357831221be, 0x3f66981061dfbb09, 0xbbfccc83193c8742, 0x3f87e8755da47040, 0xbbec1eaeb3371490, 0xbf79be731fdab95d, 0xbc0ab79fedbfccd2, 0x3f23a95ae0a75542, 0x3f5319f780e962d8, 0xbf3b88dd51a4f261, 0xbf1037f168a8f581, 0x3f153fc5e83e3199, 0xbee9d5bf30917222, 0xbee03045c999d17a, 0x3ecb5d376e96179f, 0x3e8c66d2e5aa2274, 0xbe9aef24a52bcaca, 0x3e7b20b678e8a0c6 ],
[ 0x3fefab0dd89d1309, 0xbc8ae61bd9db1bab, 0x3fa5a08e85af27e0, 0x3c4e4f9cfc8c2382, 0xbfb399812926bc23, 0xbc5b782644df6665, 0x3fb4140efb719cb0, 0x3c308fa5a48311e8, 0xbfa7535a61a4193d, 0x3c359e0501c376b2, 0x3f8374c88c7e6abd, 0x3bfc2578bd7e3f00, 0x3f7a40709e010e77, 0xbbf18c33197d9138, 0xbf76dc078888efa7, 0x3c02b49da4c86c70, 0x3f52ee6d200993b0, 0x3f444f175e22a161, 0xbf3c2fb051c92f92, 0x3f0523035ed3964b, 0x3f0bc7b666856fc1, 0xbef574549f39ee50, 0xbebc57f3c47b39d9, 0x3ec8acc76ac31fcd, 0xbe9f70e8b7deaa9a, 0xbe8e1a28a0c1a6a6, 0x3e6bfa0e5b606c5e ],
[ 0x3fefcdacca0bfb73, 0xbc82c33d88729e43, 0x3f9b1160991ff737, 0xbc2d940a504353bc, 0xbfaa38d59456f77d, 0xbc1d625808eb9778, 0x3fad5bd91b6b0123, 0x3c222b86f5e3e16c, 0xbfa3b35dcbc80146, 0x3c482838d776d958, 0x3f89d76b0a0535c7, 0x3c260fda06bca0a0, 0x3f614c887a83a0e6, 0x3be55ef222558d68, 0xbf7117f42cc6e9f4, 0x3bed4213a7e14a18, 0x3f59b477bdad8e08, 0x3f21d219fb0e1bc8, 0xbf35bb59d3ca4fa9, 0x3f18ca373c577821, 0x3ef4a9b74153a4a3, 0xbef424a8a8831410, 0x3ec6ce0877965abc, 0x3ebc1ed3c11b1dd1, 0xbea86b0a731d831a, 0xbe55cea3996396c5, 0x3e9640950bde5eb3 ],
[ 0x3fefe307f2b503d0, 0xbc68a555000387f8, 0x3f906ae13b0d3255, 0xbc388abd7f4be982, 0xbfa0ee3844e59be7, 0xbc40b0ec94b96d83, 0x3fa48b127f8ed8a5, 0x3c3b6a1f18c2c162, 0xbf9f155b4e7d8c3b, 0x3c3adb2d99b0c1fc, 0x3f8aa2c0753d569a, 0x3c29a37b9864b8e6, 0xbf4bbf7e2795837b, 0xbbe4784a66288abf, 0xbf65478d784d271c, 0x3be27115917a7ec0, 0x3f58eae08cdf9546, 0xbf292946556037e6, 0xbf290f27ae61444c, 0x3f1b076b78538f02, 0xbedb2906f1b92d5d, 0xbeea2f66822d4a01, 0x3ed3031c4f7c4a97, 0x3e941708ced2abd0, 0xbea45ffd6deae2a8, 0x3e7e844ebdc8456a, 0x3e7c0bbf2b711595 ],
[ 0x3fefefcce6813974, 0xbc5b27cf5025d1c8, 0x3f834d7dbc76d7e5, 0x3c23780d6e7eb351, 0xbf951cc18621fc23, 0x3c2969629e4b64a6, 0x3f9b925a99886bb7, 0x3c29c8f65efdd1f4, 0xbf971e7d408c8c6f, 0xbc3c5621deaf4cfc, 0x3f87ea58080a81ef, 0xbc22f25b7f384ff3, 0xbf646eb9d203e071, 0x3beff569e38360a4, 0xbf5403333682fa5e, 0xbbe36256a95953a6, 0x3f53b37d5bd14a40, 0xbf36be130822dbdf, 0xbf103d4bcdafd553, 0x3f155848476c8142, 0xbef5492bf3c6eee6, 0xbed3823d4328e9c5, 0x3ed152fefc353e5a, 0xbea5199dbf7bc4c6, 0xbe94dda2bebe08f2, 0x3e83fb850b47210a, 0x3e6bcd1b284c4798 ],
[ 0x3feff733814af88c, 0x3c70a87238cea4fa, 0x3f75ff2750fe7820, 0xbc15f184847ca667, 0xbf896f0575a63ae5, 0x3c295f4139297a96, 0x3f91c5a643f04363, 0xbc16ea87997fba3c, 0xbf904f5caaf2196f, 0x3c119502347d3b54, 0x3f8382a146afb9d2, 0xbc0f93bde902d2d0, 0xbf695cab93aa68d2, 0x3be0f716a5fc18c4, 0xbf2d2fd90fe62928, 0xbbb4e00d5fcc484a, 0x3f49f50fb94c0b86, 0xbf37d7378074399b, 0x3efcc0c9cb9ede1e, 0x3f092a3a29471895, 0xbef7c127858c909a, 0x3eb5a72fde935a48, 0x3ec57b9d90a92106, 0xbeafdb8443754cf7, 0xbe56c7d633eab55a, 0x3e7ddcfc714a2b67, 0xbe89fedf738e84b4 ],
[ 0x3feffb5bdf67fe6f, 0x3c14e830346f6e80, 0x3f684ba3004a50d0, 0xbbf90b93d4632206, 0xbf7d9c2ea85a927d, 0xbc10bcf1ea93cfdc, 0x3f860898536e104a, 0xbc1ab6aa911c445e, 0xbf85eb1c899f0b70, 0x3c21bc22eed1f1fb, 0x3f7d854f73e74c87, 0x3c07a977a3364c40, 0xbf6897719a9d257e, 0xbbeab523e3f93994, 0x3f388cdc8b807c97, 0x3b94875acc7c06a0, 0x3f3b325a11c1f45a, 0xbf3381548f692740, 0x3f12b1fd05559bfa, 0x3ef1ed31cd6feb26, 0xbef29cf593fdf00a, 0x3ed1cea99b59228c, 0x3eaceff221e3598a, 0xbeab0ad4b899b2d9, 0x3e83761b047e21d1, 0x3e696c31c2256049, 0xbe60a714c57f7adf ],
[ 0x3feffd9f78c7524a, 0x3c804ed6ff98e45d, 0x3f5a024365f771bd, 0x3bf3c8f5202cb405, 0xbf70a9732d5284dd, 0xbc11acbd0899ce7e, 0x3f7a4bf47a43042a, 0x3c0e6cb2580d0920, 0xbf7c23802d8a5bb7, 0xbbd9963700abfc80, 0x3f74f40070668329, 0xbc1e1fe1c0e1182a, 0xbf64c9a2c9dccd04, 0x3c080fb9c9cd78c1, 0x3f44f7a50b5bc019, 0xbbd40906b7a1de3a, 0x3f218b04eb90c737, 0xbf2a4c3880c0ea69, 0x3f14b7b82a86f423, 0xbed0bc762b1c2aaa, 0xbee589d6f8892acf, 0x3ed357ab63f7bdf9, 0xbe96675858bbff5e, 0xbe9ea96dcb12a15c, 0x3e88c572fcf5610e, 0xbe3700c93da86dee, 0x3e57ae9ceb75e26e ],
[ 0x3feffed167b12ac2, 0xbc8ddc0ce3ed8fcb, 0x3f4afc85e0f82e12, 0x3bd438f22895e03e, 0xbf6221a9f326bef4, 0xbbf99642b37af330, 0x3f6e3c9aab90bcf4, 0x3bb7dcdfdccc72a0, 0xbf714b1b98141f21, 0x3c1af6edf50eba66, 0x3f6c1c19b9e63d70, 0x3bd4d1e9411f1d28, 0xbf5feac3dbeb5124, 0x3be2400e6ffbc1c8, 0x3f463e88178b0e49, 0x3be3e4ae97774f91, 0xbf04441c86c93f39, 0xbf1c8ceebc5fc50b, 0x3f1125b77a79aa6c, 0xbeeda7be990bc718, 0xbece019960474aff, 0x3ecd229185ef6279, 0xbeacea9fa10885e7, 0xbe8044fd6a2e447a, 0x3e83695f88fc641d, 0xbe60c0dc0ba0d589, 0xbe89194748828b93 ],
[ 0x3fefff6dee89352e, 0x3c8b96c0ba13851d, 0x3f3b23a5a23e4210, 0x3bc727bce1be0014, 0xbf5315107613c673, 0xbbf823f8673f5b7a, 0x3f60c243329a9ca1, 0xbbf65e361cefe652, 0xbf64630116262084, 0xbc00ea6ee40daf79, 0x3f61e84d1022e8cb, 0xbbd9b77b85eed4f0, 0xbf56b41872716325, 0x3bd3e9e001100f64, 0x3f436edde582b265, 0xbbe1cb479a94e148, 0xbf1f7870ebc38e77, 0xbf051ecfdc37801d, 0x3f0711d817e0d3b6, 0xbef0ae90d500d1d8, 0x3eaa85b1bf54920c, 0x3ebfe73958205038, 0xbead222bfef33aa4, 0x3e7833f8b13b1a4e, 0x3e7233b5a19285db, 0xbe61adcf574b7db6, 0x3e7ab10bedc44532 ],
[ 0x3fefffbb8f1049c6, 0x3c7d2c6266b51f26, 0x3f2a740684026555, 0xbba7e24cc3ac5710, 0xbf436d34c8f1c26a, 0xbbe69d73e7d1c977, 0x3f51eb6e14974a25, 0xbbf99b78600e0664, 0xbf5714eb8cc0947f, 0x3bf3613f37c7410b, 0x3f55bec08c01b1d7, 0xbbf3e3a262f6c68a, 0xbf4e4621d82dad12, 0x3bc302878843e2cc, 0x3f3e1b7b564b0e79, 0x3bcf894fc1f14d54, 0xbf224564b69716aa, 0x3ebbf8e3b47f3ccd, 0x3ef8f55a9be1a264, 0xbeeb3b76e6203281, 0x3ec713c795a07e0c, 0x3ea3bb092cfd93e0, 0xbea473b0a8333dee, 0x3e8645526869c143, 0x3e41a343e004b33d, 0xbe576c7e253faad1, 0x3e7e16080963cffe ],
[ 0x3fefffe0e0140857, 0xbc66aa36f86c14dc, 0x3f18fdc1b2dcf7b9, 0x3b87050f50b8f308, 0xbf3322484cf12daa, 0x3bd4cc0408806d4f, 0x3f427dc1bc6cfef5, 0x3beffbb5229f6bb7, 0xbf49202f465eb421, 0x3ba8f3f063b40660, 0x3f493b4c9746835f, 0xbbe04e2d6df2fce5, 0xbf430e9e6142fe9b, 0xbbd0396045094744, 0x3f3555b9d5fb4825, 0x3bd9a40d2ca5ef0b, 0xbf2055983c4ac7a6, 0x3ef68e6c75a5d068, 0x3ee2d4a50d2757ce, 0xbee1de08b56479aa, 0x3ec9110ccc7fe6fd, 0xbe8bb3184d789af8, 0xbe94629a164e82a0, 0x3e8413b087ee5e4d, 0xbe5648d7786f9fbc, 0xbe4293289f8c327d, 0xbe6c283008e726f7 ],
[ 0x3feffff2436a21dc, 0xbc83607959a29d36, 0x3f06e2367dc27f95, 0x3b6d96e6f0151020, 0xbf223c436c36fdab, 0x3bbf0d77fc600a50, 0x3f326bf00867a835, 0xbbdc92e1aecdc750, 0xbf3a51fb50b15f22, 0x3ba248227c6d2260, 0x3f3c0825378fda08, 0x3bd5a8a09c053451, 0xbf36c3dbfe0cbe4a, 0xbbde65769c33f8a1, 0x3f2c1dd1438378df, 0x3ba91bd161f34158, 0xbf194c36a9d7c0dc, 0x3efbf0aab116ca41, 0x3e86bdbd2f103930, 0xbed2b32e8d43ef25, 0x3ec3a7403459770b, 0xbea17411873320fa, 0xbe735bb2691c9b29, 0x3e798313537ed069, 0xbe5cb4b60e85a341, 0x3e02be214cf4c9eb, 0xbe8350a1a851865a ],
[ 0x3feffffa1de8c582, 0x3c8832540129302a, 0x3ef44f21e49054f2, 0x3b8f338cf4086346, 0xbf10d18811478659, 0x3bb914a7a08b6a2b, 0x3f21b964d438f622, 0x3bca52c94c56aaaf, 0xbf2a8d7851f26bf0, 0x3bcc38dbf3ee1223, 0x3f2ddd6df9b6852d, 0xbbc3b0dd7eac9b91, 0xbf29e52b7aac1644, 0x3bc904036dfb5764, 0x3f2165b2034fcab2, 0x3bc27beac4bf3866, 0xbf11b75c3332673a, 0x3ef91a253c42f4e7, 0xbed020b498095051, 0xbebade63f30809ae, 0x3eb89bb0d75e59b7, 0xbea180c78d3dca28, 0x3e6cabfd39b38553, 0x3e66013ffba86cfd, 0xbe564f2b123e1f0b, 0x3e335bf3e5021105, 0xbe8177828ffd35af ],
[ 0x3feffffd8e1a2f22, 0xbc8c10adf6b19989, 0x3ee1783ceac28910, 0xbb87f19d8ee58337, 0xbefe06a8b37e5b93, 0x3b824e8db1358f2e, 0x3f107978c7b8496b, 0x3bbf163b5580927c, 0xbf19d039884f8be5, 0x3bbfce53cd30b1eb, 0x3f1e8d1145e94a54, 0xbbbd0f6e009a99ee, 0xbf1c1f7251172a87, 0xbb83ce0f013dfe90, 0x3f1458b9e0854d68, 0xbbb897cf3950b1a7, 0xbf06eb0557245429, 0x3ef33045cf65279e, 0xbed42c8adf18ab62, 0x3e491109b80f9918, 0x3ea83a9b44249fbf, 0xbe99bcbaf0a8dfd1, 0x3e7900325b58a857, 0x3e34a3cf9c161684, 0xbe50cbcc4d0a916a, 0x3e34275e1b91f084, 0x3e839180c75350e1 ],
[ 0x3fefffff039f9e8f, 0xbc89d1bcd6174e99, 0x3ecd21397ead99cb, 0xbb46abd9c029c47c, 0xbee9f19734d29cf9, 0x3b820c4383da36c1, 0x3efd982bd41d8954, 0x3b8d9bc9988e9666, 0xbf08320fc4836be5, 0x3b7526638b9926a8, 0x3f0e0a1cb1d071f3, 0x3b9d9f5d232bab90, 0xbf0d384223047b9c, 0xbba30d0b2b8a170d, 0x3f0696daf6422bd4, 0x3baba6ac732f399e, 0xbefbb6e2d311a93f, 0x3eea4fcb0ea87efb, 0xbed1c940c5303daf, 0x3ea7469913f4e9c6, 0x3e8ef4b4f8ab67ae, 0xbe8e189c28e8e041, 0x3e7678b281d5bc55, 0xbe49c3bf4e9f2b5d, 0xbe374c9ba997ffed, 0x3e2b4b843f8c7068, 0x3e6c901764507862 ],
[ 0x3fefffff9d446ccc, 0xbc6bb06bab98bc80, 0x3eb789fb715aae95, 0xbaf226d93bf89b40, 0xbed5b333cc7f98f1, 0xbb76bd1091d25440, 0x3ee9b12fdbf90f62, 0x3b8d4b6b0ee9cf46, 0xbef5e06923144d70, 0x3b4c593194857860, 0x3efc6a071925631d, 0xbb8835ef595952e4, 0xbefd178cb0388a82, 0xbb9039272760f01c, 0x3ef7e29d33ac92b6, 0x3b921ff8b0e9d5eb, 0xbeef9203429baad6, 0x3ee094dadeee395c, 0xbeca771cf3500d9f, 0x3eab8fd1c29c21ea, 0xbe5cc8573d7de110, 0xbe7b0362da1722cb, 0x3e6e5eae518f94e9, 0xbe507963addd99a6, 0xbe23f496093d0bef, 0x3e199078a326092d, 0x3e842681ecfe4da1 ],
[ 0x3fefffffda86faa9, 0xbc7d230252d68f26, 0x3ea26f9df8519bd7, 0xbb4e339871c015b7, 0xbec1926290adc888, 0xbb6e36d23dbb2644, 0x3ed5900c02d97304, 0x3b7fa7d21e3ed616, 0xbee3166de6a8c640, 0x3b8b014157867958, 0x3ee9dfcc328729e0, 0x3b820e9fee0b7665, 0xbeebcab1ed5ec38d, 0x3b6d9003794f0fe0, 0x3ee81cd74a57ce17, 0xbb8809fde9c0f6f5, 0xbee106e95b6bf556, 0x3ed379625a71385f, 0xbec1970a5b5bd443, 0x3ea74761c8333ff2, 0xbe80864e125c9951, 0xbe5b83bf9019aa3b, 0x3e60397611c35b28, 0xbe4a25392adb29ac, 0xbe17b832af40d9d4, 0x3df62a02eb79577b, 0x3e8a6da58ffe94f4 ],
[ 0x3feffffff233ee1d, 0x3c8db123ed17221d, 0x3e8bfd7555a3bd68, 0x3b20151cf177b53a, 0xbeab8d7f804d2e73, 0x3b482b366f0bc2dc, 0x3ec17f93e5149289, 0x3b391997bfd26568, 0xbed013b0457d08fa, 0xbb60d6d5a7f06298, 0x3ed6b245d7e1d829, 0xbb79985e02c8ce3b, 0xbed98077548c6951, 0x3b701cd3f1d12c93, 0x3ed7492048ab3ceb, 0xbb70368a0dc0750e, 0xbed17506c7b39cb0, 0x3ec57e94a4c5f5a6, 0xbeb570971200d7db, 0x3ea0a0f956947b21, 0xbe81a9b7bd5bba32, 0x3e451bfc00de3146, 0x3e495b6967f79cbe, 0xbe3fe3c43cb3cf84, 0x3e32f364a7a2dc5f, 0xbdf007442a10cc14, 0xbe7ef5ab6fc5e849 ],
[ 0x3feffffffb127525, 0x3c8504f382db4102, 0x3e74980cb3c80949, 0x3b17fbdd923f8057, 0xbe94ea6ce697296f, 0x3b3ea42f9c9de533, 0x3eab771d9b6f07b8, 0xbb1e9c1ca9662fe8, 0xbeba26c653fad5b8, 0xbb5146c4cee0e898, 0x3ec3302bb89379de, 0x3b64c55b83ef7a68, 0xbec67f42e5264334, 0xbb66779da26b4197, 0x3ec58b4adafb958e, 0x3b68351251b45e84, 0xbec10f576796285a, 0x3eb66ca44250dd07, 0xbea84ee0ada37543, 0x3e953b6065291e6b, 0xbe7c09ebfd0c581c, 0x3e563062625d59c0, 0x3e1e259c60eb7b83, 0xbe2e43802ad25514, 0x3e351bcdabe8cda5, 0xbdf930fc3df6e909, 0xbe881cdd770e1c81 ],
[ 0x3feffffffe4aed5e, 0x3c4389c0f32ad0f0, 0x3e5d5f3a8dea7357, 0x3affa07c18622dd2, 0xbe7ebfb14c9170c0, 0x3b19e40632b4145d, 0x3e94d9228525f449, 0x3b2d35bd7f959136, 0xbea48b536addac5f, 0xbb461ace22b32569, 0x3eaf48ccf23a68e2, 0xbb4ee1d13c79c281, 0xbeb3183b6134cf03, 0x3b4e1f4d5fe2a06c, 0x3eb31efde2215f01, 0xbb564a7021e23fba, 0xbeafd9eeb0f18fdb, 0x3ea63414459ae298, 0xbe99dda81be20b5a, 0x3e88da7d306423c5, 0xbe7303da86a4fc28, 0x3e54f5e1327706b9, 0xbe23efb5eefcbe53, 0xbe131bc5ce1ce65d, 0xbe13eafe1b05c93f, 0xbdf47fc2d9cc851e, 0x3e7d27265006a9df ],
[ 0x3fefffffff6d1e56, 0xbc864d969b4be4c4, 0x3e444d26de513197, 0x3ae76fc20fc4b365, 0xbe65e32de7af8977, 0xbaf888fd6ae18a1c, 0x3e7e9e05b3c8f38a, 0x3b17532141b12aa7, 0xbe8f2f6fa7db5b1d, 0x3b2b3bf498e3462c, 0x3e9899dcace485eb, 0x3b11885a0ae9e878, 0xbe9f34b7eef3c9b2, 0x3b3294a3b618b470, 0x3ea04be030272d14, 0xbb4df83095e40f79, 0xbe9c73bd22571559, 0x3e94edda838439f5, 0xbe89fc860b504677, 0x3e7b0d686a260420, 0xbe672370c2fdbe10, 0x3e4ee29f0d197d25, 0xbe2b4d88d500c5be, 0x3dc96014c45b0178, 0x3e0238f19dc8fd82, 0xbde8d34d46ae6567, 0xbe454105fe4a9cd8 ],
[ 0x3fefffffffd01f89, 0xbc735e8e39884f62, 0x3e2b334fac4b9f99, 0x3ac32178ed1a4971, 0xbe4e2cec6323e50e, 0xbac0e5693f9d4908, 0x3e65c027d5bba36a, 0xbaefc46fb3cc7ae0, 0xbe76df4d024fffbe, 0xbb090fd7226ec57a, 0x3e82aaf7c205b9ea, 0x3b2dbec2005b45a8, 0xbe88902edfbfefdd, 0xbb2c353aca58d08a, 0x3e8ab2ab1b338249, 0x3b2b498186c39105, 0xbe885abe0ff198d3, 0x3e82d32f7c3621eb, 0xbe78c141c71dbc95, 0x3e6b9fa6fbb9b198, 0xbe59db5fe2c2f5b9, 0x3e43b8e07840483e, 0xbe26d95e5070d91d, 0x3dfd7616168b0e49, 0x3e1f2be0744b3a5f, 0xbdd737a375809985, 0xbe7936d4936fb865 ],
[ 0x3feffffffff0dd2b, 0x3c80df73e7d2fc98, 0x3e11a94ff571654f, 0x3abfbf537b47967d, 0xbe34251f33f5578f, 0x3ad4c9cece8f41b2, 0x3e4de6bc1f75bb9b, 0x3a894afb459a3000, 0xbe6036b5fd1c4158, 0xbb0d582afa097896, 0x3e6b58f1385def96, 0xbaf8778854601996, 0xbe72a2347efb2133, 0xbb026f9e1ef0f378, 0x3e7508db866ffe00, 0x3b164de561a68a21, 0xbe73ffea934685b9, 0x3e702ff87b2e2576, 0xbe666e54eae5fa4b, 0x3e5a9ea2195c567d, 0xbe4ae3b91fecafa1, 0x3e36bb883d2e5ed1, 0xbe1ee10e97715c11, 0x3dfe2873d2b77f1f, 0xbdeaf385ae29d57b, 0xbdbd793eecfc2513, 0x3e420d80dcfa68d1 ],
[ 0x3feffffffffb5be5, 0xbc7729d6819c7f34, 0x3df63ac6b4edc88e, 0xba7c45991835da24, 0xbe1a0ce0dc06a706, 0xbab1b72d11da9dab, 0x3e33e380dd7593a5, 0xbad8ad868a7b5674, 0xbe4638bc4fb02cba, 0x3a87a84506fcda40, 0x3e535753ad4c5875, 0x3aead190ab170366, 0xbe5b41f33cafccba, 0x3af0e3539bf61116, 0x3e5fe694e371a659, 0xbad3a84e01866ea8, 0xbe5f8af0121aa0ab, 0x3e5aa77274dab3d8, 0xbe53616fe8f6a259, 0x3e484fddf4c681a1, 0xbe3a3de05d1b8a31, 0x3e2822529aca9f83, 0xbe126c3dfba84378, 0x3df64c287a84aa09, 0xbe0107d2dac5d83b, 0xbd4e251d1ab1d873, 0x3e58f37005f17b42 ],
[ 0x3feffffffffe9eb0, 0xbc5ea527e0bef1e8, 0x3ddb1e5acf351d87, 0x3a5dc96583ba19f0, 0xbe005042a0a5f3c3, 0xbaa2023f0f13867c, 0x3e199ac8fd63c66c, 0xbaabf57c5fd0501a, 0xbe2d72344378e114, 0x3acc77758959af41, 0x3e3a6be9a123435b, 0x3acdab4af8807c36, 0xbe433aacb4bf6dea, 0x3aebd241ea49ac35, 0x3e474b732e7ceaa7, 0x3adc7c89730b0264, 0xbe47e7eab6531ccb, 0x3e450959f2daae39, 0xbe3ffed4cef94261, 0x3e351c7f99f908a2, 0xbe282b5fd5fbedfc, 0x3e17e1c8e715c978, 0xbe051536822c861b, 0x3debe7e4c220ca82, 0x3e2f5bb67c461296, 0x3da1d7cf04529bf0, 0xbe8acc021ab828c4 ],
[ 0x3fefffffffff9a1b, 0xbc66a87270d2450c, 0x3dc0084ff125639d, 0xba58ad61debedc86, 0xbde3ca42adaa26f6, 0x3a8c20c6583dccdd, 0x3dffe73513c67bf8, 0x3a920d28c0c7e686, 0xbe12dd9aa5a2bee3, 0x3aad76d7235461be, 0x3e216ef6b93944a8, 0xbacf07bd785566de, 0xbe2a2d58e9b22b26, 0xbab19e6ea91dd55e, 0x3e306389b9748f25, 0x3adfbcc52565c0be, 0xbe316cdd9eb58ba2, 0x3e2fdd861b55c500, 0xbe29457846c943d2, 0x3e2178f3905f435c, 0xbe1518cf20c53de2, 0x3e06329939a34b66, 0xbdf5ef3ad85e5d3b, 0x3ddf2b41494e49e9, 0x3e2bad43bc0b622d, 0x3da21a45fa9dcebf, 0xbe8790b3d88f69fe ],
[ 0x3fefffffffffe380, 0x3c87ce07114e4fe0, 0x3da25f9ee0b923dc, 0xba4174c43a73a4d1, 0xbdc74105146a5162, 0xba47d0740e56625c, 0x3de33cde4f35d941, 0xba72a344950797c6, 0xbdf760fe7b666392, 0x3a9a8b77c82ed644, 0x3e063a70fd66d485, 0x3aa6b87715649d6d, 0xbe11324f6fb6dfa1, 0x3ab3fc045e39915f, 0x3e163a31a36b815c, 0xba502dec9bc1a700, 0xbe18724ca8970b91, 0x3e172e290891e5de, 0xbe131fc03858aab1, 0x3e0b9e8b0e7fa253, 0xbe01821a002637bd, 0x3df37ba5f3fba5eb, 0xbde3578bf23dc654, 0x3dcfdaf2015d7b54, 0x3df7f6a435069067, 0x3d99d14ee557ec62, 0xbe555f4c743ee571 ],
[ 0x3feffffffffff845, 0x3c7b0edc5a89ab8e, 0x3d846897d4b69fc6, 0x3a2a74852415bb49, 0xbdaa77a4e7dcd735, 0xba434edb43ab7de6, 0x3dc67543695dcc12, 0xba329ae577004af8, 0xbddc05c1e2fc710e, 0x3a5dbbf42d2537a8, 0x3deb639419fedf8e, 0xba8ed72eb9e7a59e, 0xbdf5cfd7eb9bfe87, 0xba7e97db27125fc0, 0x3dfd11578959ba45, 0xba8c0635ac2b5768, 0xbe0082f9e9f7eb37, 0x3e00354ceadad8b3, 0xbdfbc2dee0154fc6, 0x3df4e11efdc66eae, 0xbdebb357c0253f64, 0x3de035f9889bc29c, 0xbdc8e7bdb10b7441, 0x3dbe364571102661, 0xbe212cffcf49a2e8, 0x3d8ee9362bcfec26, 0x3e7cc5b58dd85301 ],
[ 0x3feffffffffffdf8, 0xbc8dcf8b10ff973b, 0x3d65f8b87a31bd85, 0x39d65b265455b658, 0xbd8d2e55024a0fb5, 0x3a2444e1d84cea02, 0x3da9612cc225df4b, 0xba4c784edb664ce7, 0xbdc03ee5f38b9b4d, 0xba691ca8efa41a30, 0x3dd04f2f71e2e96b, 0xba633f36a4e51350, 0xbddab7099f99ced9, 0xba54af7a67f2110c, 0x3de2554b8f609fd1, 0xba729e641eb44218, 0xbde57c87529ca968, 0x3de5cd182c967671, 0xbde3580a2517d57a, 0x3dde3be72b1be982, 0xbdd4e9908689ad08, 0x3dc9a61979d3395b, 0xbdb7b826aadd1c89, 0x3daad3a9fc4a0d1e, 0xbe00e9325ed20970, 0x3d80722198ff452c, 0x3e5c2ef85611aa11 ],
[ 0x3fefffffffffff7b, 0x3c800fa07f7fb612, 0x3d46ed2f2515e933, 0x39d2bc1802a42b92, 0xbd6f2a6c1669c901, 0xba07b3e174cc1840, 0x3d8bc42ba38a13f8, 0x3a2460463d59d3df, 0xbda2391e135afae4, 0xba3bd08c8c5f7b18, 0x3db2c6c24550f64f, 0xba3fdc861a487110, 0xbdbf9a3c1b0d63ec, 0xba5843dc8d9ad3d5, 0x3dc6502546ab341a, 0x3a645f812e48eb98, 0xbdcaf223186006d1, 0x3dcc388dd1764f41, 0xbdc9e65a242b52aa, 0x3dc4fcd2787781eb, 0xbdbe3cbdb20a48d6, 0x3db35639e9fcd410, 0xbd55b8e97774b2c9, 0x3d966ffe6a100bc9, 0xbe15706c390c113e, 0x3d6ff0d11cf61949, 0x3e72054de347e3f8 ],
[ 0x3fefffffffffffdf, 0x3c75669e670f914c, 0x3d272fd93e036cdc, 0x39b1c553d12fbbd0, 0xbd501f450d1e61b2, 0x39cbed807e60c078, 0x3d6d68fb81b2ed89, 0x39dc7ea3c4444cc0, 0xbd83c706aa4d2328, 0x3a2d6d2d51dd414d, 0x3d94e6479565838e, 0x3a350580f36c14c1, 0xbda20e9eb83b3dd9, 0xba104b6334a32fd0, 0x3daa35b9d2fcac80, 0x3a1c07c6978bf2f0, 0xbdb04a134f6e3dcb, 0x3db196579f27ddbe, 0xbdb0ab97aa74c700, 0x3dabf68355f542b1, 0xbda49da25a547134, 0x3d9bd64993a3958e, 0xbdc1193990186399, 0x3d81c0e98335ae18, 0x3e2e08edb685494a, 0x3d5cb9fcc058465b, 0xbe894cacccfb8964 ],
[ 0x3feffffffffffff8, 0x3c70160ef15c497e, 0x3d06ba91ac734786, 0xb9af81d6fa69b5b2, 0xbd3028a39099f4db, 0xb9c83ed68de15404, 0x3d4e292863e1795e, 0xb9db292e812abb68, 0xbd64c4e690fbdd14, 0xba080991e1d4ef25, 0x3d767e6e5ac60fd1, 0x3a01d2ca68dcf0e8, 0xbd83f00d80afa00c, 0xba23e174dc7225ac, 0x3d8db88ee63eb28a, 0x3a28abd97527892f, 0xbd92fe58a1f19368, 0x3d951dbeae22a5c8, 0xbd94a4d54823e0fc, 0x3d91e432d674cfba, 0xbd8b001e26c6e764, 0x3d8328ce695259fe, 0xbdb4e492cf7d4f4c, 0x3d6aaa77d339dc00, 0x3e2366538db382e5, 0x3d4826ad7d581503, 0xbe8056e0810b14da ],
[ 0x3feffffffffffffe, 0x3c759ab24e589a30, 0x3ce5982008db1304, 0xb981cf9bda64b38a, 0xbd0f610e8cde57ac, 0xb99884dcd86f98c8, 0x3d2df2dac2f2d47f, 0xb997f27bf279d988, 0xbd451b17f95fc0b4, 0xb9e063e04485c3e7, 0x3d576996ddc975d7, 0xb9e21489d6648428, 0xbd6546155a972b18, 0xb9b9d3fb518aa7c0, 0x3d70456ed89c4f24, 0x3a1eee772fc32c5e, 0xbd755d6295aa388a, 0x3d786ead99977388, 0xbd789b3d387efa6e, 0x3d76011e175e64f8, 0xbd70cd70515af47b, 0x3d69402199dfdde7, 0xbda806743bc32b08, 0x3d530e561550364a, 0x3e170093985e2c1b, 0x3d331999a27ace63, 0xbe735f54db0f4dbc ],
[ 0x3ff0000000000000, 0xbc8a6d7d18831888, 0x3cc3e296303b2297, 0x39668cf648cfed1c, 0xbced8456ef97c744, 0x398fcded17005500, 0x3d0ccb92e6c24c8d, 0xb9aa704dc202cff2, 0xbd24c1aa8cf1229b, 0x39c652efa61e4ec2, 0x3d37918b6b83c0fb, 0x39b2fb01fb8836dc, 0xbd45f073659de44d, 0xb9e9ceb48a2d1931, 0x3d5134d070b5921e, 0x39d9af3038fcc184, 0xbd5730a2938c09dd, 0x3d5b4091041f5905, 0xbd5c3c44ab8c8421, 0x3d5a06b4f4c3044d, 0xbd5704f511555fe7, 0x3d4fe51fcfc1acba, 0x3d95e7229a07e7cd, 0x3d39f8121f6c3146, 0xbe0692b2f9b3f445, 0x3d1c8c34f73d3823, 0x3e6301e540260d52 ],
];

960
vendor/pxfm/src/err/erfc.rs vendored Normal file
View File

@@ -0,0 +1,960 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::dd_fmla;
use crate::double_double::DoubleDouble;
use crate::err::erf::{Erf, erf_accurate, erf_fast};
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
static ASYMPTOTIC_POLY: [[u64; 13]; 6] = [
[
0x3fe20dd750429b6d,
0x3c61a1feb75a48a8,
0xbfd20dd750429b6c,
0x3fdb14c2f863e403,
0xbff0ecf9db3af35d,
0x400d9eb53ca6eeed,
0xc030a945830d95c8,
0x4056e8a963e2f1f5,
0xc0829b7ccc8f396f,
0x40b15e716e83c27e,
0xc0e1cfdcfbcaf22a,
0x4111986cc7a7e8fe,
0xc1371f7540590a91,
],
[
0x3fe20dd750429ae7,
0x3c863da89e801fd4,
0xbfd20dd750400795,
0x3fdb14c2f57c490c,
0xbff0ecf95c8c9014,
0x400d9e981f2321ef,
0xc030a81482de1506,
0x4056d662420a604b,
0xc08233c96fff7772,
0x40af5d62018d3e37,
0xc0d9ae55e9554450,
0x410052901e10d139,
0xc1166465df1385f0,
],
[
0x3fe20dd75041e3fc,
0xbc7c9b491c4920fc,
0xbfd20dd74e5f1526,
0x3fdb14c1d35a40e0,
0xbff0ecdecd30e86b,
0x400d9b4e7f725263,
0xc030958b5ca8fb39,
0x40563e3179bf609c,
0xc0806bbd1cd2d0fd,
0x40a7b66eb6d1d2f2,
0xc0cce5a4b1afab75,
0x40e8b5c6ae6f773c,
0xc0f5475860326f86,
],
[
0x3fe20dd75025cfe9,
0x3c55a92eef32fb20,
0xbfd20dd71eb9d4e7,
0x3fdb14af4c25db28,
0xbff0ebc78a22b3d8,
0x400d85287a0b3399,
0xc03045f751e5ca1d,
0x4054a0d87ddea589,
0xc07ac6a0981d1eee,
0x409f44822f567956,
0xc0bcba372de71349,
0x40d1a4a19f550ca4,
0xc0d52a580455ed79,
],
[
0x3fe20dd74eb31d84,
0xbc439c4054b7c090,
0xbfd20dd561af98c4,
0x3fdb1435165d9df1,
0xbff0e6b60308e940,
0x400d3ce30c140882,
0xc02f2083e404c299,
0x40520f113d89b42a,
0xc0741433ebd89f19,
0x4092f35b6a3154f6,
0xc0ab020a4313cf3b,
0x40b90f07e92da7ee,
0xc0b6565e1d7665c3,
],
[
0x3fe20dd744b3517b,
0xbc6f77ab25e01ab4,
0xbfd20dcc62ec4024,
0x3fdb125bfa4f66c1,
0xbff0d80e65381970,
0x400ca11fbcfa65b2,
0xc02cd9eaffb88315,
0x404e010db42e0da7,
0xc06c5c85250ef6a3,
0x4085e118d9c1eeaf,
0xc098d74be13d3d30,
0x40a211b1b2b5ac83,
0xc09900be759fc663,
],
];
static ASYMPTOTIC_POLY_ACCURATE: [[u64; 30]; 10] = [
[
0x3fe20dd750429b6d,
0x3c61ae3a912b08f0,
0xbfd20dd750429b6d,
0xbc51ae34c0606d68,
0x3fdb14c2f863e924,
0xbc796c0f4c848fc8,
0xbff0ecf9db3e71b6,
0x3c645d756bd288b0,
0x400d9eb53fad4672,
0xbcac61629de9adf2,
0xc030a945f3d147ea,
0x3cb8fec5ad7ece20,
0x4056e8c02f27ca6d,
0xc0829d1c21c363e0,
0x40b17349b70be627,
0xc0e28a6bb4686182,
0x411602d1662523ca,
0xc14ccae7625c4111,
0x4184237d064f6e0d,
0xc1bb1e5466ca3a2f,
0x41e90ae06a0f6cc1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6d,
0x3c61adaa62435c10,
0xbfd20dd750429b6d,
0xbc441516126827c8,
0x3fdb14c2f863e90b,
0x3c7a535780ba5ed4,
0xbff0ecf9db3e65d6,
0xbc9089edde27ad07,
0x400d9eb53fa52f20,
0xbcabc9737e9464ac,
0xc030a945f2cd7621,
0xbcc589f28b700332,
0x4056e8bffd7e194e,
0xc0829d18716876e2,
0x40b17312abe18250,
0xc0e287e73592805c,
0x4115ebf7394a39c1,
0xc14c2f14d46d0cf9,
0x4182af3d256f955e,
0xc1b7041659ebd7aa,
0x41e6039c232e2f71,
0xc2070ca15c5a07cb,
0,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6d,
0x3c5d3c35b5d37410,
0xbfd20dd750429b56,
0xbc7c028415f6f81b,
0x3fdb14c2f863c1cf,
0x3c51bb0de6470dbc,
0xbff0ecf9db33c363,
0x3c80f8068459eb16,
0x400d9eb53b9ce57b,
0x3ca20cce33e7d84a,
0xc030a945aa2ec4fa,
0xbcdf6e0fcd7c6030,
0x4056e8b824d2bfaa,
0xc0829cc372a6d0b0,
0x40b1703a99ddd429,
0xc0e2749f9a267cc6,
0x4115856a17271849,
0xc14a8bcb4ba9753f,
0x418035dcce882940,
0xc1b1e5d8c5e6e043,
0x41dfe3b4f365386e,
0xc20398fdef2b98fe,
0x42184234d4f4ea12,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6a,
0x3c8ae622b765e9fd,
0xbfd20dd750428f0e,
0x3c703c6c67d69513,
0x3fdb14c2f8563e8e,
0x3c6766a6bd7aa89c,
0xbff0ecf9d8dedd48,
0x3c90af52e90336e3,
0x400d9eb4aad086fe,
0x3ca640d371d54a19,
0xc030a93f1d01cfe0,
0xbcc68dbd8d9c522c,
0x4056e842e9fd5898,
0xc08299886ef1fb80,
0x40b15e0f0162c9a0,
0xc0e222dbc6b04cd8,
0x411460268db1ebdf,
0xc1474f53ce065fb3,
0x417961ca8553f870,
0xc1a8788395d13798,
0x41d35e37b25d0e81,
0xc1f707b7457c8f5e,
0x4211ff852df1c023,
0xc21b75d0ec56e2cd,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429a8f,
0xbc766d8dda59bcea,
0xbfd20dd7503fdbab,
0x3c6707bdffc2b3fe,
0x3fdb14c2f6526025,
0xbc27fa4bb9541140,
0xbff0ecf99c417d45,
0xbc9748645ef7af94,
0x400d9eaa9c712a7d,
0x3ca79e478994ebb4,
0xc030a8ef11fbf141,
0x3cbb5c72d69f8954,
0x4056e4653e0455b1,
0xc08286909448e6cf,
0x40b113424ce76821,
0xc0e1346d859e76de,
0x4111f9f6cf2293bf,
0xc14258e6e3b337db,
0x41714029ecd465fb,
0xc19c530df5337a6f,
0x41c34bc4bbccd336,
0xc1e4a37c52641688,
0x420019707cec2974,
0xc21031fe736ea169,
0x420f6b3003de3ddf,
0,
0,
0,
0,
0,
],
[
0x3fe20dd75042756b,
0x3c84ad9178b56910,
0xbfd20dd74feda9e8,
0xbc78141c70bbc8d6,
0x3fdb14c2cb128467,
0xbc709aebaa106821,
0xbff0ecf603921a0b,
0x3c97d3cb5bceaf0b,
0x400d9e3e1751ca59,
0x3c76622ae5642670,
0xc030a686af57f547,
0x3cc083b320aff6b6,
0x4056cf0b6c027326,
0xc0823afcb69443d3,
0x40b03ab450d9f1b9,
0xc0de74cdb76bcab4,
0x410c671b60e607f1,
0xc138f1376d324ce4,
0x4163b64276234676,
0xc18aff0ce13c5a8e,
0x41aef20247251e87,
0xc1cc9f5662f721f6,
0x41e4687858e185e1,
0xc1f4fa507be073c2,
0x41fb99ac35ee4acc,
0xc1f16cb585ee3fa9,
0,
0,
0,
0,
],
[
0x3fe20dd7503e730d,
0x3c84e524a098a467,
0xbfd20dd7498fa6b2,
0x3c260a4e27751c80,
0x3fdb14c061bd2a0c,
0x3c695a8f847d2fc2,
0xbff0ecd0f11b8c7d,
0xbc94126deea76061,
0x400d9b1344463548,
0x3cafe09a4eca9b0e,
0xc030996ea52a87ed,
0xbca924f920db26c0,
0x40567a2264b556b0,
0xc0815dfc2c86b6b5,
0x40accc291b62efe4,
0xc0d81375a78e746a,
0x41033a6f15546329,
0xc12c1e9dc1216010,
0x4152397ea3d43fda,
0xc174661e5b2ea512,
0x4193412367ca5d45,
0xc1ade56b9d7f37c4,
0x41c2851d9722146d,
0xc1d19027baf0c3fe,
0x41d7e7b8b6ab58ac,
0xc1d4c446d56aaf22,
0x41c1492190400505,
0,
0,
0,
],
[
0x3fe20dd74ff10852,
0x3c8a32f26deff875,
0xbfd20dd6f06c491c,
0x3c770c16e1793358,
0x3fdb14a7d5e7fd4a,
0x3c7479998b54db5b,
0xbff0ebbdb3889c5f,
0xbc759b853e11369c,
0x400d89dd249d7ef8,
0xbc84b5edf0c8c314,
0xc0306526fb386114,
0xbc840d04eed7c7e0,
0x40557ff657e429ce,
0xc07ef63e90d38630,
0x40a6d4f34c4ea3da,
0xc0d04542b9e36a54,
0x40f577bf19097738,
0xc119702fe47c736d,
0x413a7ae12b54fdc6,
0xc157ca3f0f7c4fa9,
0x417225d983963cbf,
0xc1871a6eac612f9e,
0x4198086324225e1e,
0xc1a3de68670a7716,
0x41a91674de4dcbe9,
0xc1a6b44cc15b76c2,
0x419a36dae0f30d80,
0xc17cffc1747ea3dc,
0,
0,
],
[
0x3fe20dd74ba8f300,
0xbc59dd256871d210,
0xbfd20dd3593675bc,
0x3c7ec0e7ffa91ad9,
0x3fdb13eef86a077a,
0xbc74fb5d78d411b8,
0xbff0e5cf52a11f3a,
0xbc851f36c779dc8c,
0x400d4417a08b39d5,
0x3c91be9fb5956638,
0xc02f91b9f6ce80c3,
0xbccc9c99dd42829c,
0x405356439f45bb43,
0xc078c0ca12819b48,
0x409efcad2ecd6671,
0xc0c21b0af6fc1039,
0x40e327d215ee30c9,
0xc101fabda96167b0,
0x411d82e4373b315d,
0xc134ed9e2ff591e9,
0x41495c85dcd8eab5,
0xc159f016f0a3d62a,
0x41660e89d918b96f,
0xc16e97be202cba64,
0x4170d8a081619793,
0xc16c5422b4fcfc65,
0x4161131a9dc6aed1,
0xc14a457d9dced257,
0x4123605e980e8b86,
0,
],
[
0x3fe20dd7319d4d25,
0x3c82b02992c3b7ab,
0xbfd20dc29c13ab1b,
0xbc7d78d79b4ad767,
0x3fdb115a57b5ab13,
0xbc6aa8c45be0aa2e,
0xbff0d58ec437efd7,
0xbc5994f00a15e850,
0x400cb1742e229f23,
0xbca8000471d54399,
0xc02d99a5edf7b946,
0xbcbaf76ed7e35cde,
0x4050a8b71058eb28,
0xc072d88289da5bfc,
0x40943ddf24168edb,
0xc0b3e9dfc38b6d1a,
0x40d18d4df97ab3df,
0xc0eb550fc62dcab5,
0x41029cb71f116ed1,
0xc115fc9cc4e854e3,
0x41265915fd0567b1,
0xc1335eb5fca0e46d,
0x413c5261ecc0d789,
0xc14138932dc4eafc,
0x414117d4eb18facd,
0xc13af96163e35eca,
0x4130454a3a63c766,
0xc11c2ebc1d39b44a,
0x40ff3327698e0e6b,
0xc0d094febc3dff35,
],
];
// Approximation for the fast path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_1(z_dd: DoubleDouble) -> DoubleDouble {
const C: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555995d37,
0x3fa55555558489dc,
];
let z = z_dd.to_f64();
let mut q = dd_fmla(f64::from_bits(C[4]), z_dd.hi, f64::from_bits(C[3]));
q = dd_fmla(q, z, f64::from_bits(C[2]));
let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[1]), q * z);
v = DoubleDouble::quick_mult(z_dd, v);
DoubleDouble::f64_add(f64::from_bits(C[0]), v)
}
#[inline]
fn exp_1(x: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
let k = (x.hi * INVLOG2).round_ties_even_finite();
const LOG2_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3bbabc9e3b39803f),
f64::from_bits(0x3f262e42fefa39ef),
);
let k_dd = DoubleDouble::quick_f64_mult(k, LOG2_DD);
let mut y_dd = DoubleDouble::from_exact_add(x.hi - k_dd.hi, x.lo);
y_dd.lo -= k_dd.lo;
let ki: i64 = k as i64; /* Note: k is an integer, this is just a conversion. */
let mi = (ki >> 12).wrapping_add(0x3ff);
let i2: i64 = (ki >> 6) & 0x3f;
let i1: i64 = ki & 0x3f;
let t1 = DoubleDouble::new(
f64::from_bits(EXP_REDUCE_T0[i2 as usize].0),
f64::from_bits(EXP_REDUCE_T0[i2 as usize].1),
);
let t2 = DoubleDouble::new(
f64::from_bits(EXP_REDUCE_T1[i1 as usize].0),
f64::from_bits(EXP_REDUCE_T1[i1 as usize].1),
);
let mut v = DoubleDouble::quick_mult(t2, t1);
let q = q_1(y_dd);
v = DoubleDouble::quick_mult(v, q);
let scale = f64::from_bits((mi as u64) << 52);
v.hi *= scale;
v.lo *= scale;
v
}
struct Exp {
e: i32,
result: DoubleDouble,
}
fn exp_accurate(x_dd: DoubleDouble) -> Exp {
static E2: [u64; 28] = [
0x3ff0000000000000,
0xb960000000000000,
0x3ff0000000000000,
0xb9be200000000000,
0x3fe0000000000000,
0x3a03c00000000000,
0x3fc5555555555555,
0x3c655555555c78d9,
0x3fa5555555555555,
0x3c455555545616e2,
0x3f81111111111111,
0x3c011110121fc314,
0x3f56c16c16c16c17,
0xbbef49e06ee3a56e,
0x3f2a01a01a01a01a,
0x3b6b053e1eeab9c0,
0x3efa01a01a01a01a,
0x3ec71de3a556c733,
0x3e927e4fb7789f66,
0x3e5ae64567f54abe,
0x3e21eed8eff8958b,
0x3de6124613837216,
0x3da93974aaf26a57,
0x3d6ae7f4fd6d0bd9,
0x3d2ae7e982620b25,
0x3ce94e4ca59460d8,
0x3ca69a2a4b7ef36d,
0x3c6abfe1602308c9,
];
const LOG2INV: f64 = f64::from_bits(0x3ff71547652b82fe);
let k: i32 = unsafe {
(x_dd.hi * LOG2INV)
.round_ties_even_finite()
.to_int_unchecked::<i32>()
};
const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa39ef);
/* we approximate LOG2Lacc ~ log(2) - LOG2H with 38 bits, so that
k*LOG2Lacc is exact (k has at most 11 bits) */
const LOG2_L: f64 = f64::from_bits(0x3c7abc9e3b398000);
const LOG2_TINY: f64 = f64::from_bits(0x398f97b57a079a19);
let yh = dd_fmla(-k as f64, LOG2_H, x_dd.hi);
/* since |xh+xl| >= 2.92 we have |k| >= 4;
(|k|-1/2)*log(2) <= |x| <= (|k|+1/2)*log(2) thus
1-1/(2|k|) <= |x/(k*log(2))| <= 1+1/(2|k|) thus by Sterbenz theorem
yh is exact too */
let mut t = DoubleDouble::from_full_exact_add(-k as f64 * LOG2_L, x_dd.lo);
let mut y_dd = DoubleDouble::from_exact_add(yh, t.hi);
y_dd.lo = dd_fmla(-k as f64, LOG2_TINY, y_dd.lo + t.lo);
/* now yh+yl approximates xh + xl - k*log(2), and we approximate p(yh+yl)
in h + l */
/* Since |xh| <= 742, we assume |xl| <= ulp(742) = 2^-43. Then since
|k| <= round(742/log(2)) = 1070, |yl| <= 1070*LOG2L + 2^-42 < 2^-42.7.
Since |yh| <= log(2)/2, the contribution of yl is negligible as long
as |i*p[i]*yh^(i-1)*yl| < 2^-104, which holds for i >= 16.
Thus for coefficients of degree 16 or more, we don't take yl into account.
*/
let mut h = f64::from_bits(E2[19 + 8]); // degree 19
for a in (16..=18).rev() {
h = dd_fmla(h, y_dd.hi, f64::from_bits(E2[a + 8])); // degree i
}
/* degree 15: h*(yh+yl)+E2[15 + 8] */
t = DoubleDouble::from_exact_mult(h, y_dd.hi);
t.lo = dd_fmla(h, y_dd.lo, t.lo);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(E2[15 + 8]), t.hi);
v.lo += t.lo;
for a in (8..=14).rev() {
/* degree i: (h+l)*(yh+yl)+E2[i+8] */
t = DoubleDouble::quick_mult(v, y_dd);
v = DoubleDouble::from_exact_add(f64::from_bits(E2[a + 8]), t.hi);
v.lo += t.lo;
}
for a in (0..=7).rev() {
/* degree i: (h+l)*(yh+yl)+E2[2i]+E2[2i+1] */
t = DoubleDouble::quick_mult(v, y_dd);
v = DoubleDouble::from_exact_add(f64::from_bits(E2[2 * a]), t.hi);
v.lo += t.lo + f64::from_bits(E2[2 * a + 1]);
}
Exp { e: k, result: v }
}
#[cold]
fn erfc_asympt_accurate(x: f64) -> f64 {
/* subnormal exceptions */
if x == f64::from_bits(0x403a8f7bfbd15495) {
return dd_fmla(
f64::from_bits(0x0000000000000001),
-0.25,
f64::from_bits(0x000667bd620fd95b),
);
}
let u_dd = DoubleDouble::from_exact_mult(x, x);
let exp_result = exp_accurate(DoubleDouble::new(-u_dd.lo, -u_dd.hi));
/* compute 1/x as double-double */
let yh = 1.0 / x;
/* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
let yl = yh * dd_fmla(-x, yh, 1.0);
// yh+yl approximates 1/x
static THRESHOLD: [u64; 10] = [
0x3fb4500000000000,
0x3fbe000000000000,
0x3fc3f00000000000,
0x3fc9500000000000,
0x3fcf500000000000,
0x3fd3100000000000,
0x3fd7100000000000,
0x3fdbc00000000000,
0x3fe0b00000000000,
0x3fe3000000000000,
];
let mut i = 0usize;
while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
i += 1;
}
let p = ASYMPTOTIC_POLY_ACCURATE[i];
let mut u_dd = DoubleDouble::from_exact_mult(yh, yh);
u_dd.lo = dd_fmla(2.0 * yh, yl, u_dd.lo);
/* the polynomial p has degree 29+2i, and its coefficient of largest
degree is p[14+6+i] */
let mut z_dd = DoubleDouble::new(0., f64::from_bits(p[14 + 6 + i]));
for a in (13..=27 + 2 * i).rev().step_by(2) {
/* degree j: (zh+zl)*(uh+ul)+p[(j-1)/2+6]] */
let v = DoubleDouble::quick_mult(z_dd, u_dd);
z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[(a - 1) / 2 + 6]), v.hi);
z_dd.lo += v.lo;
}
for a in (1..=11).rev().step_by(2) {
let v = DoubleDouble::quick_mult(z_dd, u_dd);
z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[a - 1]), v.hi);
z_dd.lo += v.lo + f64::from_bits(p[a]);
}
/* multiply by yh+yl */
u_dd = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
/* now uh+ul approximates p(1/x), i.e., erfc(x)*exp(x^2) */
/* now multiply (uh+ul)*(eh+el), after normalizing uh+ul to reduce the
number of exceptional cases */
u_dd = DoubleDouble::from_exact_add(u_dd.hi, u_dd.lo);
let v = DoubleDouble::quick_mult(u_dd, exp_result.result);
/* multiply by 2^e */
/* multiply by 2^e */
let mut res = ldexp(v.to_f64(), exp_result.e);
if res < f64::from_bits(0x0010000000000000) {
/* for erfc(x) in the subnormal range, we have to perform a special
rounding */
let mut corr = v.hi - ldexp(res, -exp_result.e);
corr += v.lo;
/* add corr*2^e */
res += ldexp(corr, exp_result.e);
}
res
}
#[cold]
fn erfc_accurate(x: f64) -> f64 {
if x < 0. {
let mut v_dd = erf_accurate(-x);
let t = DoubleDouble::from_exact_add(1.0, v_dd.hi);
v_dd.hi = t.hi;
v_dd.lo += t.lo;
return v_dd.to_f64();
} else if x <= f64::from_bits(0x3ffb59ffb450828c) {
// erfc(x) >= 2^-6
let mut v_dd = erf_accurate(x);
let t = DoubleDouble::from_exact_add(1.0, -v_dd.hi);
v_dd.hi = t.hi;
v_dd.lo = t.lo - v_dd.lo;
return v_dd.to_f64();
}
// now 0x1.b59ffb450828cp+0 < x < 0x1.b39dc41e48bfdp+4
erfc_asympt_accurate(x)
}
/* Fast path for 0x1.713786d9c7c09p+1 < x < 0x1.b39dc41e48bfdp+4,
using the asymptotic formula erfc(x) = exp(-x^2) * p(1/x)*/
fn erfc_asympt_fast(x: f64) -> Erf {
/* for x >= 0x1.9db1bb14e15cap+4, erfc(x) < 2^-970, and we might encounter
underflow issues in the computation of l, thus we delegate this case
to the accurate path */
if x >= f64::from_bits(0x4039db1bb14e15ca) {
return Erf {
err: 1.0,
result: DoubleDouble::default(),
};
}
let mut u = DoubleDouble::from_exact_mult(x, x);
let e_dd = exp_1(DoubleDouble::new(-u.lo, -u.hi));
/* the assumptions from exp_1 are satisfied:
* a_mul ensures |ul| <= ulp(uh), thus |ul/uh| <= 2^-52
* since |x| < 0x1.9db1bb14e15cap+4 we have
|ul| < ulp(0x1.9db1bb14e15cap+4^2) = 2^-43 */
/* eh+el approximates exp(-x^2) with maximal relative error 2^-74.139 */
/* compute 1/x as double-double */
let yh = 1.0 / x;
/* Assume 1 <= x < 2, then 0.5 <= yh <= 1,
and yh = 1/x + eps with |eps| <= 2^-53. */
/* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
let yl = yh * dd_fmla(-x, yh, 1.0);
/* x*yh-1 = x*(1/x+eps)-1 = x*eps
with |x*eps| <= 2^-52, thus the error on the FMA is bounded by
ulp(2^-52.1) = 2^-105.
Now |yl| <= |yh| * 2^-52 <= 2^-52, thus the rounding error on
yh * __builtin_fma (-x, yh, 1.0) is bounded also by ulp(2^-52.1) = 2^-105.
From [6], Lemma 3.7, if yl was computed exactly, then yh+yl would differ
from 1/x by at most yh^2/theta^3*(1/x-yh)^2 for some theta in [yh,1/x]
or [1/x,yh].
Since yh, 1/x <= 1, this gives eps^2 <= 2^-106.
Adding the rounding errors, we have:
|yh + yl - 1/x| <= 2^-105 + 2^-105 + 2^-106 < 2^-103.67.
For the relative error, since |yh| >= 1/2, this gives:
|yh + yl - 1/x| < 2^-102.67 * |yh+yl|
*/
const THRESHOLD: [u64; 6] = [
0x3fbd500000000000,
0x3fc59da6ca291ba6,
0x3fcbc00000000000,
0x3fd0c00000000000,
0x3fd3800000000000,
0x3fd6300000000000,
];
let mut i = 0usize;
while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
i += 1;
}
let p = ASYMPTOTIC_POLY[i];
u = DoubleDouble::from_exact_mult(yh, yh);
/* Since |yh| <= 1, we have |uh| <= 1 and |ul| <= 2^-53. */
u.lo = dd_fmla(2.0 * yh, yl, u.lo);
/* uh+ul approximates (yh+yl)^2, with absolute error bounded by
ulp(ul) + yl^2, where ulp(ul) is the maximal rounding error in
the FMA, and yl^2 is the neglected term.
Since |ul| <= 2^-53, ulp(ul) <= 2^-105, and since |yl| <= 2^-52,
this yields |uh + ul - yh^2| <= 2^-105 + 2^-104 < 2^-103.41.
For the relative error, since |(yh+yl)^2| >= 1/4:
|uh + ul - yh^2| < 2^-101.41 * |uh+ul|.
And relatively to 1/x^2:
yh + yl = 1/x * (1 + eps1) with |eps1| < 2^-102.67
uh + ul = (yh+yl)^2 * (1 + eps2) with |eps2| < 2^-101.41
This yields:
|uh + ul - 1/x| < 2^-100.90 * |uh+ul|.
*/
/* evaluate p(uh+ul) */
let mut zh: f64 = f64::from_bits(p[12]); // degree 23
zh = dd_fmla(zh, u.hi, f64::from_bits(p[11])); // degree 21
zh = dd_fmla(zh, u.hi, f64::from_bits(p[10])); // degree 19
/* degree 17: zh*(uh+ul)+p[i] */
let mut v = DoubleDouble::quick_f64_mult(zh, u);
let mut z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[9]), v.hi);
z_dd.lo += v.lo;
for a in (3..=15).rev().step_by(2) {
v = DoubleDouble::quick_mult(z_dd, u);
z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[((a + 1) / 2) as usize]), v.hi);
z_dd.lo += v.lo;
}
/* degree 1: (zh+zl)*(uh+ul)+p[0]+p[1] */
v = DoubleDouble::quick_mult(z_dd, u);
z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[0]), v.hi);
z_dd.lo += v.lo + f64::from_bits(p[1]);
/* multiply by yh+yl */
u = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
/* now uh+ul approximates p(1/x) */
/* now multiply (uh+ul)*(eh+el) */
v = DoubleDouble::quick_mult(u, e_dd);
/* Write y = 1/x. We have the following errors:
* the maximal mathematical error is:
|erfc(x)*exp(x^2) - p(y)| < 2^-71.804 * |p(y)| (for i=3) thus
|erfc(x) - exp(-x^2)*p(y)| < 2^-71.804 * |exp(-x^2)*p(y)|
* the error in approximating exp(-x^2) by eh+el:
|eh + el - exp(-x^2)| < 2^-74.139 * |eh + el|
* the fact that we evaluate p on yh+yl instead of 1/x
this error is bounded by |p'| * |yh+yl - 1/x|, where
|yh+yl - 1/x| < 2^-102.67 * |yh+yl|, and the relative
error is bounded by |p'/p| * |yh+yl - 1/x|.
Since the maximal value of |p'/p| is bounded by 27.2 (for i=0),
this yields 27.2 * 2^-102.67 < 2^-97.9
* the rounding errors when evaluating p on yh+yl: this error is bounded
(relatively) by 2^-67.184 (for i=5), see analyze_erfc_asympt_fast()
in erfc.sage
* the rounding error in (uh+ul)*(eh+el): we assume this error is bounded
by 2^-80 (relatively)
This yields a global relative bound of:
(1+2^-71.804)*(1+2^-74.139)*(1+2^-97.9)*(1+2^-67.184)*(1+2^-80)-1
< 2^-67.115
*/
if v.hi >= f64::from_bits(0x044151b9a3fdd5c9) {
Erf {
err: f64::from_bits(0x3bbd900000000000) * v.hi,
result: v,
} /* 2^-67.115 < 0x1.d9p-68 */
} else {
Erf {
result: v,
err: f64::from_bits(0x0010000000000000),
} // this overestimates 0x1.d9p-68 * h
}
}
#[inline]
fn erfc_fast(x: f64) -> Erf {
if x < 0.
// erfc(x) = 1 - erf(x) = 1 + erf(-x)
{
let res = erf_fast(-x);
/* h+l approximates erf(-x), with relative error bounded by err,
where err <= 0x1.78p-69 */
let err = res.err * res.result.hi; /* convert into absolute error */
let mut t = DoubleDouble::from_exact_add(1.0, res.result.hi);
t.lo += res.result.lo;
// since h <= 2, the fast_two_sum() error is bounded by 2^-105*h <= 2^-104
/* After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(2) = 2^-51
thus assuming |l| <= 2^-51 after the cr_erf_fast() call,
we have |t| <= 2^-50 here, thus the rounding
error on t -= *l is bounded by ulp(2^-50) = 2^-102.
The absolute error is thus bounded by err + 2^-104 + 2^-102
= err + 0x1.4p-102.
The maximal value of err here is for |x| < 0.0625, where cr_erf_fast()
returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
Adding 0x1.4p-102 is thus exact. */
return Erf {
err: err + f64::from_bits(0x3994000000000000),
result: t,
};
} else if x <= f64::from_bits(0x400713786d9c7c09) {
let res = erf_fast(x);
/* h+l approximates erf(x), with relative error bounded by err,
where err <= 0x1.78p-69 */
let err = res.err * res.result.hi; /* convert into absolute error */
let mut t = DoubleDouble::from_exact_add(1.0, -res.result.hi);
t.lo -= res.result.lo;
/* for x >= 0x1.e861fbb24c00ap-2, erf(x) >= 1/2, thus 1-h is exact
by Sterbenz theorem, thus t = 0 in fast_two_sum(), and we have t = -l
here, thus the absolute error is err */
if x >= f64::from_bits(0x3fde861fbb24c00a) {
return Erf { err, result: t };
}
/* for x < 0x1.e861fbb24c00ap-2, the error in fast_two_sum() is bounded
by 2^-105*h, and since h <= 1/2, this yields 2^-106.
After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(1/2) = 2^-53
thus assuming |l| <= 2^-53 after the cr_erf_fast() call,
we have |t| <= 2^-52 here, thus the rounding
error on t -= *l is bounded by ulp(2^-52) = 2^-104.
The absolute error is thus bounded by err + 2^-106 + 2^-104
The maximal value of err here is for x < 0.0625, where cr_erf_fast()
returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
Adding 0x1.4p-104 is thus exact. */
return Erf {
err: err + f64::from_bits(0x3974000000000000),
result: t,
};
}
/* Now THRESHOLD1 < x < 0x1.b39dc41e48bfdp+4 thus erfc(x) < 0.000046. */
/* on a i7-8700 with gcc 12.2.0, for x in [THRESHOLD1,+5.0],
the average reciprocal throughput is about 111 cycles
(among which 20 cycles for exp_1) */
erfc_asympt_fast(x)
}
/// Complementary error function
///
/// Max ulp 0.5
pub fn f_erfc(x: f64) -> f64 {
let t: u64 = x.to_bits();
let at: u64 = t & 0x7fff_ffff_ffff_ffff;
if t >= 0x8000000000000000u64
// x = -NaN or x <= 0 (excluding +0)
{
// for x <= -0x1.7744f8f74e94bp2, erfc(x) rounds to 2 (to nearest)
if t >= 0xc017744f8f74e94bu64
// x = NaN or x <= -0x1.7744f8f74e94bp2
{
if t >= 0xfff0000000000000u64 {
// -Inf or NaN
if t == 0xfff0000000000000u64 {
return 2.0;
} // -Inf
return x + x; // NaN
}
return black_box(2.0) - black_box(f64::from_bits(0x3c90000000000000)); // rounds to 2 or below(2)
}
// for -9.8390953768041405e-17 <= x <= 0, erfc(x) rounds to 1 (to nearest)
if f64::from_bits(0xbc9c5bf891b4ef6a) <= x {
return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
}
} else
// x = +NaN or x >= 0 (excluding -0)
{
// for x >= 0x1.b39dc41e48bfdp+4, erfc(x) < 2^-1075: rounds to 0 or 2^-1074
if at >= 0x403b39dc41e48bfdu64
// x = NaN or x >= 0x1.b39dc41e48bfdp+4
{
if at >= 0x7ff0000000000000u64 {
// +Inf or NaN
if at == 0x7ff0000000000000u64 {
return 0.0;
} // +Inf
return x + x; // NaN
}
return black_box(f64::from_bits(0x0000000000000001)) * black_box(0.25); // 0 or 2^-1074 wrt rounding
}
// for 0 <= x <= 0x1.c5bf891b4ef6ap-55, erfc(x) rounds to 1 (to nearest)
if x <= f64::from_bits(0x3c8c5bf891b4ef6a) {
return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
}
}
/* now -0x1.7744f8f74e94bp+2 < x < -0x1.c5bf891b4ef6ap-54
or 0x1.c5bf891b4ef6ap-55 < x < 0x1.b39dc41e48bfdp+4 */
let result = erfc_fast(x);
let left = result.result.hi + (result.result.lo - result.err);
let right = result.result.hi + (result.result.lo + result.err);
if left == right {
return left;
}
erfc_accurate(x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfc() {
assert_eq!(f_erfc(1.0), 0.15729920705028513);
assert_eq!(f_erfc(0.5), 0.4795001221869535);
assert_eq!(f_erfc(0.000000005), 0.9999999943581042);
assert_eq!(f_erfc(-0.00000000000065465465423305), 1.0000000000007387);
assert!(f_erfc(f64::NAN).is_nan());
assert_eq!(f_erfc(f64::INFINITY), 0.0);
assert_eq!(f_erfc(f64::NEG_INFINITY), 2.0);
}
}

421
vendor/pxfm/src/err/erfcx.rs vendored Normal file
View File

@@ -0,0 +1,421 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::pow_exec::exp_dd_fast;
#[inline]
fn core_erfcx(x: f64) -> DoubleDouble {
if x <= 8. {
// Rational approximant for erfcx generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx,err1}=MiniMaxApproximation[f[z],{z,{1, 8},11,11},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx];
// den=Denominator[approx];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 12] = [
(0xbc836faeb9a312bb, 0x3ff000000000ee8e),
(0x3c91842f891bec6a, 0x4002ca20a78aaf8f),
(0x3c7916e8a1c30681, 0x4005e955f70aed5b),
(0x3cabad150d828d82, 0x4000646f5807ad07),
(0xbc6f482680d66d9c, 0x3ff1449e03ed381c),
(0xbc7188796156ae19, 0x3fdaa7e997e3b034),
(0xbc5c8af0642761e3, 0x3fbe836282058d4a),
(0xbc372829be2d072f, 0x3f99a2b2adc2ec05),
(0x3c020cc8b96000ab, 0x3f6e6cc3d120a955),
(0x3bdd138e6c136806, 0x3f3743d6735eaf13),
(0xbb9fbd22f0675122, 0x3ef1c1d36ebe29a2),
(0xb89093cc981c934c, 0xbc43c18bc6385c74),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let x8 = x4 * x4;
let e0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[9]),
x,
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[11]),
x,
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc95d65be031374e, 0x400bd10c4fb1dbe5),
(0x3cb2d8f661db08a0, 0x4016a649ff973199),
(0x3ca32cbcfdc0ea93, 0x4016daab399c1ffc),
(0xbca2982868536578, 0x400fd61ab892d14c),
(0xbca2e29199e17fd9, 0x40001f56c4d495a3),
(0x3c412ce623a1790a, 0x3fe852b582135164),
(0x3c61152eaf4b0dc5, 0x3fcb760564da7cde),
(0xbc1b57ff91d81959, 0x3fa6e146988df835),
(0x3c17183d8445f19a, 0x3f7b06599b5e912f),
(0xbbd0ada61b85ff98, 0x3f449e39467b73d0),
(0xbb658d84fc735e67, 0x3eff794442532b51),
];
let e0 = DoubleDouble::mul_f64_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
x,
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[9]),
x,
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[11]),
x,
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
return DoubleDouble::div(p_num, p_den);
}
// for large x erfcx(x) ~ 1/sqrt(pi) / x * R(1/x)
const ONE_OVER_SQRT_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3c61ae3a914fed80, 0x3fe20dd750429b6d));
let r = DoubleDouble::from_quick_recip(x);
// Rational approximant generated by Wolfram:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[1/x^2]Erfc[1/x]/x*Sqrt[Pi]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{2^-23,1/8},8,8},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 9] = [
(0xbb1d2ee37e46a4cd, 0x3ff0000000000000),
(0x3ca2e575a4ce3d30, 0x4001303ab00c8bac),
(0xbccf38381e5ee521, 0x4030a97aeed54c9f),
(0xbcc3a2842df0dd3d, 0x4036f7733c9fd2f9),
(0xbcfeaf46506f16ed, 0x4051c5f382750553),
(0x3ccbb9f5e11d176a, 0x404ac0081e0749e0),
(0xbcf374f8966ae2a5, 0x4052082526d99a5c),
(0x3cbb5530b924f224, 0x402feabbf6571c29),
(0xbcbcdd50a3ca4776, 0x40118726e1f2d204),
];
const Q: [(u64, u64); 9] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3ca2e4613c9e0017, 0x4001303ab00c8bac),
(0xbcce5f17cf14e51d, 0x4031297aeed54c9f),
(0xbcdf7e0fed176f92, 0x40380a76e7a09bb2),
(0x3cfc57b67a2797af, 0x4053bb22e04faf3e),
(0xbcd3e63b7410b46b, 0x404ff46317ae9483),
(0xbce122c15db2653f, 0x405925ef8a428c36),
(0x3ce174ebe3e52c8e, 0x4040f49acfe692e1),
(0xbcda0e267ce6e2e6, 0x40351a07878bfbd3),
];
let mut p_num = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[8]),
r,
DoubleDouble::from_bit_pair(P[7]),
);
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[6]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[5]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[0]));
let mut p_den = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[8]),
r,
DoubleDouble::from_bit_pair(Q[7]),
);
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[6]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[5]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[4]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(p_den, r, f64::from_bits(0x3ff0000000000000));
let v0 = DoubleDouble::quick_mult(ONE_OVER_SQRT_PI, r);
let v1 = DoubleDouble::div(p_num, p_den);
DoubleDouble::quick_mult(v0, v1)
}
/// Scaled complementary error function (exp(x^2)*erfc(x))
pub fn f_erfcx(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// x == NaN, x == inf, x == 0, |x| <= f64::EPSILON
if x.is_nan() {
return f64::NAN;
}
if x.to_bits().wrapping_shl(1) == 0 {
return 1.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
0.
} else {
f64::INFINITY
};
}
if ux <= 0x7888f5c28f5c28f6u64 {
// |x| <= 2.2204460492503131e-18
return 1.;
}
// |x| <= f64::EPSILON
use crate::common::f_fmla;
const M_TWO_OVER_SQRT_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc71ae3a914fed80, 0xbff20dd750429b6d));
return f_fmla(
M_TWO_OVER_SQRT_PI.lo,
x,
f_fmla(M_TWO_OVER_SQRT_PI.hi, x, 1.),
);
}
if x.to_bits() >= 0xc03aa449ebc84dd6 {
// x <= -sqrt(709.783) ~ -26.6417
return f64::INFINITY;
}
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
if ax <= 0x3ff0000000000000u64 {
// |x| <= 1
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{-1, 1},10,10},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0xbb488611350b1950, 0x3ff0000000000000),
(0xbc86ae482c7f2342, 0x3ff9c5d39e89602f),
(0x3c6702d70b807254, 0x3ff5a4c406d6468b),
(0x3c7fe41fc43cfed5, 0x3fe708e7f401bd0c),
(0x3c73a4a355172c6d, 0x3fd0d9a0c1a7126c),
(0x3c5f4c372faa270f, 0x3fb154722e30762e),
(0xbc04c0227976379e, 0x3f88ecebb62ce646),
(0xbbdc9ea151b9eb33, 0x3f580ea84143877b),
(0xbb6dae7001a91491, 0x3f1c3c5f95579b0a),
(0x3b6aca5e82c52897, 0x3ecea4db51968d9e),
(0x3a41c4edd175d2af, 0x3dbc0dccea7fc8ed),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let x8 = x4 * x4;
let q0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[9]),
x,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(x2, q1, q0);
let r1 = DoubleDouble::mul_add(x2, q3, q2);
let s0 = DoubleDouble::mul_add(x4, r1, r0);
let s1 = DoubleDouble::mul_add(x2, DoubleDouble::from_bit_pair(P[10]), q4);
let p_num = DoubleDouble::mul_add(x8, s1, s0);
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc7bae414cad99c8, 0x4005e9d57765fdce),
(0x3c8fa553bed15758, 0x400b8c670b3fbcda),
(0x3ca6c7ad610f1019, 0x4004f2ca59958153),
(0x3c87787f336cc4e6, 0x3ff55c267090315a),
(0xbc6ef55d4b2c4150, 0x3fde8b84b64b6f4e),
(0x3c570d63c94be3a3, 0x3fbf0d5e36017482),
(0x3c1882a745ef572e, 0x3f962f73633506c1),
(0xbc0850bb6fc82764, 0x3f65593e0dc46acd),
(0xbbb9dc0097d7d776, 0x3f290545603e2f94),
(0xbb776e5781e3889d, 0x3edb29c49d18cf89),
];
let q0 = DoubleDouble::mul_f64_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
x,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[9]),
x,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(x2, q1, q0);
let r1 = DoubleDouble::mul_add(x2, q3, q2);
let s0 = DoubleDouble::mul_add(x4, r1, r0);
let s1 = DoubleDouble::mul_add(x2, DoubleDouble::from_bit_pair(Q[10]), q4);
let p_den = DoubleDouble::mul_add(x8, s1, s0);
let v = DoubleDouble::div(p_num, p_den);
return v.to_f64();
}
let mut erfcx_abs_x = core_erfcx(f64::from_bits(ax));
if x < 0. {
// exp(x^2)erfc(-x) = 2*exp(x^2) - erfcx(|x|)
erfcx_abs_x = DoubleDouble::from_exact_add(erfcx_abs_x.hi, erfcx_abs_x.lo);
let d2x = DoubleDouble::from_exact_mult(x, x);
let expd2x = exp_dd_fast(d2x);
return DoubleDouble::mul_f64_add(expd2x, 2., -erfcx_abs_x).to_f64();
}
erfcx_abs_x.to_f64()
}
#[cfg(test)]
mod tests {
use crate::f_erfcx;
#[test]
fn test_erfcx() {
assert_eq!(f_erfcx(2.2204460492503131e-18), 1.0);
assert_eq!(f_erfcx(-2.2204460492503131e-18), 1.0);
assert_eq!(f_erfcx(-f64::EPSILON), 1.0000000000000002);
assert_eq!(f_erfcx(f64::EPSILON), 0.9999999999999998);
assert_eq!(f_erfcx(-173.), f64::INFINITY);
assert_eq!(f_erfcx(-9.4324165432), 8.718049147018359e38);
assert_eq!(f_erfcx(9.4324165432), 0.059483265496416374);
assert_eq!(f_erfcx(-1.32432512125), 11.200579112797806);
assert_eq!(f_erfcx(1.32432512125), 0.3528722004785406);
assert_eq!(f_erfcx(-0.532431235), 2.0560589406595384);
assert_eq!(f_erfcx(0.532431235), 0.5994337293294584);
assert_eq!(f_erfcx(1e-26), 1.0);
assert_eq!(f_erfcx(-0.500000000023073), 1.952360489253639);
assert_eq!(f_erfcx(-175.), f64::INFINITY);
assert_eq!(f_erfcx(f64::INFINITY), 0.);
assert_eq!(f_erfcx(f64::NEG_INFINITY), f64::INFINITY);
assert!(f_erfcx(f64::NAN).is_nan());
}
}

229
vendor/pxfm/src/err/erfcxf.rs vendored Normal file
View File

@@ -0,0 +1,229 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expdf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval6};
#[inline]
fn core_erfcx(x: f32) -> f64 {
// x here is already always > 1
let dx = x as f64;
if x < 8. {
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx,err1}=MiniMaxApproximation[f[z],{z,{1,8},7,7},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx];
// den=Denominator[approx];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff00000804c8f8f),
f64::from_bits(0x3ffb7307ea8fdbeb),
f64::from_bits(0x3ff7081ba7bc735c),
f64::from_bits(0x3fe767338b33532a),
f64::from_bits(0x3fce3c8288507fd6),
f64::from_bits(0x3fa7ca2cb4ae697f),
f64::from_bits(0x3f72b11b0dfb2348),
f64::from_bits(0xbd9f64f0c15c479b),
);
let p_den = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4006c071e850132e),
f64::from_bits(0x400d30326bc347ee),
f64::from_bits(0x40060d8d56bada75),
f64::from_bits(0x3ff56643fc4580eb),
f64::from_bits(0x3fdb0e194e72a513),
f64::from_bits(0x3fb5154759b61be3),
f64::from_bits(0x3f8090b063cce524),
);
return p_num / p_den;
}
// for large x erfcx(x) ~ 1/sqrt(pi) / x * R(1/x)
const ONE_OVER_SQRT_PI: f64 = f64::from_bits(0x3fe20dd750429b6d);
let r = 1. / dx;
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[1/x^2]Erfc[1/x]/x*Sqrt[Pi]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{2^-12,1/8},5,5},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval6(
r,
f64::from_bits(0x3ff0000000000002),
f64::from_bits(0xbfd09caf2bb541c3),
f64::from_bits(0x40132238367ae454),
f64::from_bits(0xc0060bc62c3711b1),
f64::from_bits(0x40024a90d229158d),
f64::from_bits(0xc0013665d8ff3813),
);
let p_den = f_polyeval6(
r,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfd09caf2bb5101d),
f64::from_bits(0x4015223836772f2c),
f64::from_bits(0xc00715911b5f5f5c),
f64::from_bits(0x4010b66411ec4e1f),
f64::from_bits(0xc00b325c767ed436),
);
(r * ONE_OVER_SQRT_PI) * (p_num / p_den)
}
/// Scaled complementary error function (exp(x^2)*erfc(x))
///
/// ulp 0.5
pub fn f_erfcxf(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6499_999au32 {
// |x| == 0, |x| == inf, |x| == NaN, |x| <= 1.19209290e-08
if ux <= 0x6499_999au32 {
// |x| == 0, |x| <= 1.19209290e-08
return 1.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
0.
} else {
f32::INFINITY
};
}
return f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if x <= -9.382415 {
// x <= -9.382415
return f32::INFINITY;
}
if ax <= 0x34000000u32 {
// |x| < ulp(1) we use taylor series at 0
// erfcx(x) ~ 1-(2 x)/Sqrt[\[Pi]]+x^2-(4 x^3)/(3 Sqrt[\[Pi]])+x^4/2-(8 x^5)/(15 Sqrt[\[Pi]])+O[x]^6
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
const M_TWO_OVER_SQRT_PI: f32 = f32::from_bits(0xbf906ebb);
return f_fmlaf(x, M_TWO_OVER_SQRT_PI, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::common::f_fmla;
const M_TWO_OVER_SQRT_PI: f64 = f64::from_bits(0xbff20dd750429b6d);
let dx = x as f64;
return f_fmla(dx, M_TWO_OVER_SQRT_PI, 1.) as f32;
}
}
if ax <= 0x3f800000u32 {
// |x| <= 1
let dx = x as f64;
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{-1,1},7,7},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval8(
dx,
f64::from_bits(0x3feffffffffffff8),
f64::from_bits(0x3ff26c328bd2dc5f),
f64::from_bits(0x3fe6f91b9fa5f58c),
f64::from_bits(0x3fd09edf3fcf5ee1),
f64::from_bits(0x3faddb3bcedbff91),
f64::from_bits(0x3f7e43b5dd4b7587),
f64::from_bits(0x3f3baab6b3e61d7b),
f64::from_bits(0xbe83e7d629825321),
);
let p_den = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x40023d04ee0abc28),
f64::from_bits(0x400252b377263d61),
f64::from_bits(0x3ff510af7f826479),
f64::from_bits(0x3fddfc089c4731ed),
f64::from_bits(0x3fba79b040e28b0a),
f64::from_bits(0x3f8aea2f3579235a),
f64::from_bits(0x3f485d2875b4f88c),
);
return (p_num / p_den) as f32;
}
let erfcx_abs_x = core_erfcx(f32::from_bits(ax));
if x < 0. {
// exp(x^2)erfc(-x) = 2*exp(x^2) - erfcx(|x|)
let dx = x as f64;
return f_fmla(2., core_expdf(dx * dx), -erfcx_abs_x) as f32;
}
erfcx_abs_x as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfcx() {
assert_eq!(f_erfcxf(5.19209290e-09), 1.0);
assert_eq!(f_erfcxf(1.19209290e-08), 1.0);
assert_eq!(f_erfcxf(f32::EPSILON), 0.9999999);
assert_eq!(f_erfcxf(12.1), 0.046469606);
assert_eq!(f_erfcxf(7.1), 0.07869752);
assert_eq!(f_erfcxf(1.1), 0.40173045);
assert_eq!(f_erfcxf(-0.23), 1.3232007);
assert_eq!(f_erfcxf(-1.4325), 15.234794);
assert_eq!(f_erfcxf(-10.), f32::INFINITY);
assert_eq!(f_erfcxf(f32::INFINITY), 0.);
assert_eq!(f_erfcxf(f32::NEG_INFINITY), f32::INFINITY);
assert!(f_erfcxf(f32::NAN).is_nan());
}
}

416
vendor/pxfm/src/err/erff.rs vendored Normal file
View File

@@ -0,0 +1,416 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya
// with:
// > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|],
// [k/8, (k + 1)/8]);
// for k = 0..31.
static COEFFS: [[u64; 8]; 32] = [
[
0x3ff20dd750429b6d,
0xbfd812746b037753,
0x3fbce2f219e8596a,
0xbf9b82cdacb78fda,
0x3f756479297dfda5,
0xbf48b3ac5455ef02,
0xbf7126fcac367e3b,
0x3fb2d0bdb3ba4984,
],
[
0x3ff20dd750429b6d,
0xbfd812746b0379a8,
0x3fbce2f21a03cf2a,
0xbf9b82ce30de083e,
0x3f7565bcad3eb60f,
0xbf4c02c66f659256,
0x3f1f92f673385229,
0xbeedef402648ae90,
],
[
0x3ff20dd750429b34,
0xbfd812746b032dce,
0x3fbce2f219d84aae,
0xbf9b82ce22dcf139,
0x3f7565b9efcd4af1,
0xbf4c021f1af414bc,
0x3f1f7c6d177eff82,
0xbeec9e4410dcf865,
],
[
0x3ff20dd750426eab,
0xbfd812746ae592c7,
0x3fbce2f211525f14,
0xbf9b82ccc125e63f,
0x3f756596f261cfd3,
0xbf4bfde1ff8eeecf,
0x3f1f31a9d15dc5d8,
0xbeea5a4362844b3c,
],
[
0x3ff20dd75039c705,
0xbfd812746777e74d,
0x3fbce2f17af98a1b,
0xbf9b82be4b817cbe,
0x3f7564bec2e2962e,
0xbf4bee86f9da3558,
0x3f1e9443689dc0cc,
0xbee79c0f230805d8,
],
[
0x3ff20dd74f811211,
0xbfd81274371a3e8f,
0x3fbce2ec038262e5,
0xbf9b8265b82c5e1f,
0x3f75615a2e239267,
0xbf4bc63ae023dceb,
0x3f1d87c2102f7e06,
0xbee49584bea41d62,
],
[
0x3ff20dd746d063e3,
0xbfd812729a8a950f,
0x3fbce2cb0a2df232,
0xbf9b80eca1f51278,
0x3f75572e26c46815,
0xbf4b715e5638b65e,
0x3f1bfbb195484968,
0xbee177a565c15c52,
],
[
0x3ff20dd701b44486,
0xbfd812691145f237,
0x3fbce23a06b8cfd9,
0xbf9b7c1dc7245288,
0x3f753e92f7f397dd,
0xbf4ad97cc4acf0b2,
0x3f19f028b2b09b71,
0xbedcdc4da08da8c1,
],
[
0x3ff20dd5715ac332,
0xbfd8123e680bd0eb,
0x3fbce0457aded691,
0xbf9b6f52d52bed40,
0x3f750c291b84414c,
0xbf49ea246b1ad4a9,
0x3f177654674e0ca0,
0xbed737c11a1bcebb,
],
[
0x3ff20dce6593e114,
0xbfd811a59c02eadc,
0x3fbcdab53c7cd7d5,
0xbf9b526d2e321eed,
0x3f74b1d32cd8b994,
0xbf48963143ec0a1e,
0x3f14ad5700e4db91,
0xbed231e100e43ef2,
],
[
0x3ff20db48bfd5a62,
0xbfd80fdd84f9e308,
0x3fbccd340d462983,
0xbf9b196a29287680,
0x3f74210c2c13a0f7,
0xbf46dbdfb4ff71ae,
0x3f11bca2d17fbd71,
0xbecbca36f90c7cf5,
],
[
0x3ff20d64b2f8f508,
0xbfd80b4d4f19fa8b,
0x3fbcb088197262e3,
0xbf9ab51fd02e5b99,
0x3f734e1e5e81a632,
0xbf44c66377b502ce,
0x3f0d9ad25066213c,
0xbec4b0df7dd0cfa1,
],
[
0x3ff20c8fc1243576,
0xbfd8010cb2009e27,
0x3fbc7a47e9299315,
0xbf9a155be5683654,
0x3f7233502694997b,
0xbf426c94b7d81300,
0x3f08094f1de25fb9,
0xbebe0e3d776c6eef,
],
[
0x3ff20a9bd1611bc1,
0xbfd7ec7fbce83f90,
0x3fbc1d757d7317b7,
0xbf992c160cd589f0,
0x3f70d307269cc5c2,
0xbf3fda5b0d2d1879,
0x3f02fdd7b3b14a7f,
0xbeb54eed4a26af5a,
],
[
0x3ff20682834f943d,
0xbfd7c73f747bf5a9,
0x3fbb8c2db4a9ffd1,
0xbf97f0e4ffe989ec,
0x3f6e7061eae4166e,
0xbf3ad36e873fff2d,
0x3efd39222396128e,
0xbead83dacec5ea6b,
],
[
0x3ff1feb8d12676d7,
0xbfd7898347284afe,
0x3fbaba3466b34451,
0xbf9663adc573e2f9,
0x3f6ae99fb17c3e08,
0xbf3602f950ad5535,
0x3ef5e9717490609d,
0xbea3fca107bbc8d5,
],
[
0x3ff1f12fe3c536fa,
0xbfd72b1d1f22e6d3,
0x3fb99fc0eed4a896,
0xbf948db0a87bd8c6,
0x3f673e368895aa61,
0xbf319b35d5301fc8,
0x3ef007987e4bb033,
0xbe9a7edcd4c2dc70,
],
[
0x3ff1db7b0df84d5d,
0xbfd6a4e4a41cde02,
0x3fb83bbded16455d,
0xbf92809b3b36977e,
0x3f639c08bab44679,
0xbf2b7b45a70ed119,
0x3ee6e99b36410e7b,
0xbe913619bb7ebc0c,
],
[
0x3ff1bb1c85c4a527,
0xbfd5f23b99a249a3,
0x3fb694c91fa0d12c,
0xbf9053e1ce11c72d,
0x3f602bf72c50ea78,
0xbf24f478fb56cb02,
0x3ee005f80ecbe213,
0xbe85f2446bde7f5b,
],
[
0x3ff18dec3bd51f9d,
0xbfd5123f58346186,
0x3fb4b8a1ca536ab4,
0xbf8c4243015cc723,
0x3f5a1a8a01d351ef,
0xbf1f466b34f1d86b,
0x3ed5f835eea0bf6a,
0xbe7b83165b939234,
],
[
0x3ff152804c3369f4,
0xbfd4084cd4afd4bc,
0x3fb2ba2e836e47aa,
0xbf8800f2dfc6904b,
0x3f54a6daf0669c59,
0xbf16e326ab872317,
0x3ecd9761a6a755a5,
0xbe70fca33f9dd4b5,
],
[
0x3ff1087ad68356aa,
0xbfd2dbb044707459,
0x3fb0aea8ceaa0384,
0xbf840b516d52b3d2,
0x3f500c9e05f01d22,
0xbf1076afb0dc0ff7,
0x3ec39fadec400657,
0xbe64b5761352e7e3,
],
[
0x3ff0b0a7a8ba4a22,
0xbfd196990d22d4a1,
0x3fad5551e6ac0c4d,
0xbf807cce1770bd1a,
0x3f4890347b8848bf,
0xbf0757ec96750b6a,
0x3eb9b258a1e06bce,
0xbe58fc6d22da7572,
],
[
0x3ff04ce2be70fb47,
0xbfd0449e4b0b9cac,
0x3fa97f7424f4b0e7,
0xbf7ac825439c42f4,
0x3f428f5f65426dfb,
0xbf005b699a90f90f,
0x3eb0a888eecf4593,
0xbe4deace2b32bb31,
],
[
0x3fefbf9fb0e11cc8,
0xbfcde2640856545a,
0x3fa5f5b1f47f8510,
0xbf7588bc71eb41b9,
0x3f3bc6a0a772f56d,
0xbef6b9fad1f1657a,
0x3ea573204ba66504,
0xbe41d38065c94e44,
],
[
0x3feed8f18c99e031,
0xbfcb4cb6acd903b4,
0x3fa2c7f3dddd6fc1,
0xbf713052067df4e0,
0x3f34a5027444082f,
0xbeef672bab0e2554,
0x3e9b83c756348cc9,
0xbe3534f1a1079499,
],
[
0x3fedebd33044166d,
0xbfc8d7cd9053f7d8,
0x3f9ff9957fb3d6e7,
0xbf6b50be55de0f36,
0x3f2e92c8ec53a628,
0xbee5a4b88d508007,
0x3e91a27737559e26,
0xbe2942ae62cb2c14,
],
[
0x3fecfdbf0386f3bd,
0xbfc68e33d93b0dc4,
0x3f9b2683d58f53de,
0xbf65a9174e70d26f,
0x3f269ddd326d49cd,
0xbeddd8f397a8219c,
0x3e86a755016ad4dd,
0xbe1e366e0139187d,
],
[
0x3fec132adb8d7464,
0xbfc475a899f61b46,
0x3f970a431397a77c,
0xbf612e3d35beeee2,
0x3f20c16b05738333,
0xbed4a47f873e144e,
0x3e7d3d494c698c02,
0xbe12302c59547fe5,
],
[
0x3feb2f5fd05555e7,
0xbfc28feefbe03ec7,
0x3f93923acbb3a676,
0xbf5b4ff793cd6358,
0x3f18ea0eb8c913bc,
0xbeccb31ec2baceb1,
0x3e730011e7e80c04,
0xbe0617710635cb1d,
],
[
0x3fea54853cd9593e,
0xbfc0dbdbaea4dc8e,
0x3f90a93e2c20a0fd,
0xbf55c969ff401ea8,
0x3f129e0cc64fe627,
0xbec4160d8e9d3c2a,
0x3e68e7b67594624a,
0xbdfb1cf2c975b09b,
],
[
0x3fe983ceece09ff8,
0xbfbeacc78f7a2d00,
0x3f8c74418410655f,
0xbf51756a050e441e,
0x3f0bff3650f7f548,
0xbebc56c0217d3ada,
0x3e607b4918d0b489,
0xbdf0d4be8c1c50f8,
],
];
/// Error function
///
/// Max ulp 0.5
#[inline]
pub fn f_erff(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffffu32;
if x_abs >= 0x4080_0000u32 {
static ONE: [f32; 2] = [1.0, -1.0];
static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
let sign = x.is_sign_negative() as usize;
if x_abs >= 0x7f80_0000u32 {
return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
}
return ONE[sign] + SMALL[sign];
}
// Polynomial approximation:
// erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14)
let xd = x as f64;
let xsq = xd * xd;
const EIGHT: u32 = 3 << 23;
let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
let c = COEFFS[idx];
let x4 = xsq * xsq;
let c0 = f_fmla(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c1 = f_fmla(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
let c2 = f_fmla(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c3 = f_fmla(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
let x8 = x4 * x4;
let p0 = f_fmla(x4, c1, c0);
let p1 = f_fmla(x4, c3, c2);
(xd * f_fmla(x8, p1, p0)) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_erff_test() {
assert_eq!(f_erff(0.0), 0.0);
assert_eq!(f_erff(1.0), 0.8427008);
assert_eq!(f_erff(0.5), 0.5204999);
assert_eq!(f_erff(f32::INFINITY), 1.0);
assert_eq!(f_erff(f32::NEG_INFINITY), -1.0);
assert!(f_erff(f32::NAN).is_nan());
}
}

345
vendor/pxfm/src/err/erffc.rs vendored Normal file
View File

@@ -0,0 +1,345 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, f_fmla};
use std::hint::black_box;
static ERR0: [u64; 128] = [
0x3ff0000000000000,
0x3ff0163da9fb3335,
0x3ff02c9a3e778061,
0x3ff04315e86e7f85,
0x3ff059b0d3158574,
0x3ff0706b29ddf6de,
0x3ff0874518759bc8,
0x3ff09e3ecac6f383,
0x3ff0b5586cf9890f,
0x3ff0cc922b7247f7,
0x3ff0e3ec32d3d1a2,
0x3ff0fb66affed31b,
0x3ff11301d0125b51,
0x3ff12abdc06c31cc,
0x3ff1429aaea92de0,
0x3ff15a98c8a58e51,
0x3ff172b83c7d517b,
0x3ff18af9388c8dea,
0x3ff1a35beb6fcb75,
0x3ff1bbe084045cd4,
0x3ff1d4873168b9aa,
0x3ff1ed5022fcd91d,
0x3ff2063b88628cd6,
0x3ff21f49917ddc96,
0x3ff2387a6e756238,
0x3ff251ce4fb2a63f,
0x3ff26b4565e27cdd,
0x3ff284dfe1f56381,
0x3ff29e9df51fdee1,
0x3ff2b87fd0dad990,
0x3ff2d285a6e4030b,
0x3ff2ecafa93e2f56,
0x3ff306fe0a31b715,
0x3ff32170fc4cd831,
0x3ff33c08b26416ff,
0x3ff356c55f929ff1,
0x3ff371a7373aa9cb,
0x3ff38cae6d05d866,
0x3ff3a7db34e59ff7,
0x3ff3c32dc313a8e5,
0x3ff3dea64c123422,
0x3ff3fa4504ac801c,
0x3ff4160a21f72e2a,
0x3ff431f5d950a897,
0x3ff44e086061892d,
0x3ff46a41ed1d0057,
0x3ff486a2b5c13cd0,
0x3ff4a32af0d7d3de,
0x3ff4bfdad5362a27,
0x3ff4dcb299fddd0d,
0x3ff4f9b2769d2ca7,
0x3ff516daa2cf6642,
0x3ff5342b569d4f82,
0x3ff551a4ca5d920f,
0x3ff56f4736b527da,
0x3ff58d12d497c7fd,
0x3ff5ab07dd485429,
0x3ff5c9268a5946b7,
0x3ff5e76f15ad2148,
0x3ff605e1b976dc09,
0x3ff6247eb03a5585,
0x3ff6434634ccc320,
0x3ff6623882552225,
0x3ff68155d44ca973,
0x3ff6a09e667f3bcd,
0x3ff6c012750bdabf,
0x3ff6dfb23c651a2f,
0x3ff6ff7df9519484,
0x3ff71f75e8ec5f74,
0x3ff73f9a48a58174,
0x3ff75feb564267c9,
0x3ff780694fde5d3f,
0x3ff7a11473eb0187,
0x3ff7c1ed0130c132,
0x3ff7e2f336cf4e62,
0x3ff80427543e1a12,
0x3ff82589994cce13,
0x3ff8471a4623c7ad,
0x3ff868d99b4492ed,
0x3ff88ac7d98a6699,
0x3ff8ace5422aa0db,
0x3ff8cf3216b5448c,
0x3ff8f1ae99157736,
0x3ff9145b0b91ffc6,
0x3ff93737b0cdc5e5,
0x3ff95a44cbc8520f,
0x3ff97d829fde4e50,
0x3ff9a0f170ca07ba,
0x3ff9c49182a3f090,
0x3ff9e86319e32323,
0x3ffa0c667b5de565,
0x3ffa309bec4a2d33,
0x3ffa5503b23e255d,
0x3ffa799e1330b358,
0x3ffa9e6b5579fdbf,
0x3ffac36bbfd3f37a,
0x3ffae89f995ad3ad,
0x3ffb0e07298db666,
0x3ffb33a2b84f15fb,
0x3ffb59728de5593a,
0x3ffb7f76f2fb5e47,
0x3ffba5b030a1064a,
0x3ffbcc1e904bc1d2,
0x3ffbf2c25bd71e09,
0x3ffc199bdd85529c,
0x3ffc40ab5fffd07a,
0x3ffc67f12e57d14b,
0x3ffc8f6d9406e7b5,
0x3ffcb720dcef9069,
0x3ffcdf0b555dc3fa,
0x3ffd072d4a07897c,
0x3ffd2f87080d89f2,
0x3ffd5818dcfba487,
0x3ffd80e316c98398,
0x3ffda9e603db3285,
0x3ffdd321f301b460,
0x3ffdfc97337b9b5f,
0x3ffe264614f5a129,
0x3ffe502ee78b3ff6,
0x3ffe7a51fbc74c83,
0x3ffea4afa2a490da,
0x3ffecf482d8e67f1,
0x3ffefa1bee615a27,
0x3fff252b376bba97,
0x3fff50765b6e4540,
0x3fff7bfdad9cbe14,
0x3fffa7c1819e90d8,
0x3fffd3c22b8f71f1,
];
static ERFC_COEFFS: [[u64; 16]; 2] = [
[
0x3fec162355429b28,
0x400d99999999999a,
0x3fdda951cece2b85,
0xbff70ef6cff4bcc4,
0x4003d7f7b3d617de,
0xc009d0aa47537c51,
0x4009754ea9a3fcb1,
0xc0027a5453fcc015,
0x3ff1ef2e0531aeba,
0xbfceca090f5a1c06,
0xbfb7a3cd173a063c,
0x3fb30fa68a68fddd,
0x3f555ad9a326993a,
0xbf907e7b0bb39fbf,
0x3f52328706c0e950,
0x3f6d6aa0b7b19cfe,
],
[
0x401137c8983f8516,
0x400799999999999a,
0x3fc05b53aa241333,
0xbfca3f53872bf870,
0x3fbde4c30742c9d5,
0xbfacb24bfa591986,
0x3f9666aec059ca5f,
0xbf7a61250eb26b0b,
0x3f52b28b7924b34d,
0x3f041b13a9d45013,
0xbf16dd5e8a273613,
0x3ef09ce8ea5e8da5,
0x3ed33923b4102981,
0xbec1dfd161e3f984,
0xbe8c87618fcae3b3,
0x3e8e8a6ffa0ba2c7,
],
];
/// Complementary error function
///
/// Max ULP 0.5
pub fn f_erfcf(x: f32) -> f32 {
let ax = f32::from_bits(x.to_bits() & 0x7fff_ffff);
let axd = ax as f64;
let x2 = axd * axd;
let t = x.to_bits();
let at = t & 0x7fff_ffff;
let sgn = t >> 31;
let i: i64 = (at > 0x40051000) as i64;
/* for x < -0x1.ea8f94p+1, erfc(x) rounds to 2 (to nearest) */
if t > 0xc07547cau32 {
// x < -0x1.ea8f94p+1
if t >= 0xff800000u32 {
// -Inf or NaN
if t == 0xff800000u32 {
return 2.0;
} // -Inf
return x + x; // NaN
}
return black_box(2.0) - black_box(f32::from_bits(0x33000000)); // rounds to 2 or nextbelow(2)
}
/* at is the absolute value of x
for x >= 0x1.41bbf8p+3, erfc(x) < 2^-150, thus rounds to 0 or to 2^-149
depending on the rounding mode */
if at >= 0x4120ddfcu32 {
// |x| >= 0x1.41bbf8p+3
if at >= 0x7f800000u32 {
// +Inf or NaN
if at == 0x7f800000u32 {
return 0.0;
} // +Inf
return x + x; // NaN
}
// 0x1p-149f * 0.25f rounds to 0 or 2^-149 depending on rounding
return black_box(f32::from_bits(0x00000001)) * black_box(0.25);
}
if at <= 0x3db80000u32 {
// |x| <= 0x1.7p-4
if t == 0xb76c9f62u32 {
// x = -0x1.d93ec4p-17
return black_box(f32::from_bits(0x3f800085)) + black_box(f32::from_bits(0x33000000)); // exceptional case
}
/* for |x| <= 0x1.c5bf88p-26. erfc(x) rounds to 1 (to nearest) */
if at <= 0x32e2dfc4u32 {
// |x| <= 0x1.c5bf88p-26
if at == 0 {
return 1.0;
}
static D: [f32; 2] = [f32::from_bits(0xb2800000), f32::from_bits(0x33000000)];
return 1.0 + D[sgn as usize];
}
/* around 0, erfc(x) behaves as 1 - (odd polynomial) */
const C: [u64; 5] = [
0x3ff20dd750429b6d,
0xbfd812746b03610b,
0x3fbce2f218831d2f,
0xbf9b82c609607dcb,
0x3f7553af09b8008e,
];
let fw0 = f_fmla(x2, f64::from_bits(C[4]), f64::from_bits(C[3]));
let fw1 = f_fmla(x2, fw0, f64::from_bits(C[2]));
let fw2 = f_fmla(x2, fw1, f64::from_bits(C[1]));
let f0 = x as f64 * f_fmla(x2, fw2, f64::from_bits(C[0]));
return (1.0 - f0) as f32;
}
/* now -0x1.ea8f94p+1 <= x <= 0x1.41bbf8p+3, with |x| > 0x1.7p-4 */
const ILN2: f64 = f64::from_bits(0x3ff71547652b82fe);
const LN2H: f64 = f64::from_bits(0x3f762e42fefa0000);
const LN2L: f64 = f64::from_bits(0x3d0cf79abd6f5dc8);
let jt = dd_fmla(x2, ILN2, -(1024. + f64::from_bits(0x3f70000000000000))).to_bits();
let j: i64 = ((jt << 12) as i64) >> 48;
let sf = ((j >> 7) as u64)
.wrapping_add(0x3ffu64 | (sgn as u64) << 11)
.wrapping_shl(52);
const CH: [u64; 4] = [
0xbfdffffffffff333,
0x3fc5555555556a14,
0xbfa55556666659b4,
0x3f81111074cc7b22,
];
let d = f_fmla(LN2L, j as f64, f_fmla(LN2H, j as f64, x2));
let d2 = d * d;
let e0 = f64::from_bits(ERR0[(j & 127) as usize]);
let fw0 = f_fmla(d, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let fw1 = f_fmla(d, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let fw2 = f_fmla(d2, fw0, fw1);
let f = f_fmla(d2, fw2, d);
let ct = ERFC_COEFFS[i as usize];
let z = (axd - f64::from_bits(ct[0])) / (axd + f64::from_bits(ct[1]));
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let c = &ct[3..];
let sw0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
let sw1 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
let sw2 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
let sw3 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
let zw0 = f_fmla(z2, sw1, sw0);
let zw1 = f_fmla(z2, sw3, sw2);
let sw4 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
let sw5 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
let zw2 = f_fmla(z4, zw1, zw0);
let zw3 = f_fmla(z2, sw5, sw4);
let zw4 = f_fmla(z4, f64::from_bits(c[12]), zw3);
let mut s = f_fmla(z8, zw4, zw2);
s = f_fmla(z, s, f64::from_bits(ct[2]));
static OFF: [f64; 2] = [0., 2.];
let r = (f64::from_bits(sf) * f_fmla(-f, e0, e0)) * s;
let y = OFF[sgn as usize] + r;
y as f32
}
#[cfg(test)]
mod tests {
use crate::f_erfcf;
#[test]
fn test_erfc() {
assert_eq!(f_erfcf(0.0), 1.0);
assert_eq!(f_erfcf(0.5), 0.47950011);
assert_eq!(f_erfcf(1.0), 0.1572992);
assert!(f_erfcf(f32::NAN).is_nan());
assert_eq!(f_erfcf(f32::INFINITY), 0.0);
assert_eq!(f_erfcf(f32::NEG_INFINITY), 2.0);
}
}

692
vendor/pxfm/src/err/inverf.rs vendored Normal file
View File

@@ -0,0 +1,692 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_log_dd;
use crate::polyeval::{f_polyeval4, f_polyeval5};
#[cold]
fn inverf_0p06_to_0p75(x: f64) -> f64 {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 10] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
(0xbc72fc55f73765f6, 0xbff433be821423d0),
(0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4),
(0x3c56dfb85e83a2c5, 0xbfb770b6827e0829),
(0x3bff1472ecdfa403, 0x3f7a98a2980282bb),
(0x3baffb33d69d6276, 0xbf142a246fd2c07c),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let vx2 = vz * vz;
let vx4 = vx2 * vx2;
let vx8 = vx4 * vx4;
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let num = DoubleDouble::mul_add(vx8, p4, r0);
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 10] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
(0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c),
(0x3c7861f549c6aa61, 0x3fe91b12cf47da3a),
(0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da),
(0x3c1552b0ec0ba7b3, 0x3f939ada247f7609),
(0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6),
];
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let den = DoubleDouble::mul_add(vx8, p4, r0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult_f64(r, x);
k.to_f64()
}
#[inline]
fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 {
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0x3c936555853a8b2c, 0x3ff0001df06a2515),
(0x3cea488e802db3c3, 0x404406ba373221da),
(0xbce27d42419754e3, 0x407b0442e38a9597),
(0xbd224a407624cbdf, 0x409c9277e31ef446),
(0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a),
(0x3d105bc37bc61b58, 0x40b46be8f860f4d9),
(0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7),
(0x3d1d52013ba8aa38, 0x40aae93a603cf3ea),
(0xbd07a75306df0fc3, 0x4098ab8357dc2e51),
(0x3d1bb6770bb7a27e, 0x407ebead00879010),
(0xbbfcbff4a9737936, 0x3f8936117ccbff83),
];
let z2 = DoubleDouble::quick_mult(z, z);
let z4 = DoubleDouble::quick_mult(z2, z2);
let z8 = DoubleDouble::quick_mult(z4, z4);
let q0 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[1]),
z,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[3]),
z,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[5]),
z,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[7]),
z,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[9]),
z,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4);
let num = DoubleDouble::mul_add(z8, s1, s0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc75b1109d4a3262, 0x40440782efaab17f),
(0x3d1f7775b207d84f, 0x407b2da74b0d39f2),
(0xbd3291fdbab49501, 0x409dac8d9e7c90b2),
(0xbd58d8fdd27707a9, 0x40b178dfeffa3192),
(0xbd57fc74ad705ce0, 0x40bad19b686f219f),
(0x3d4075510031f2cd, 0x40be70a598208cea),
(0xbd5442e109152efb, 0x40b9683ef36ae330),
(0x3d5398192933962e, 0x40b04b7c4c3ca8ee),
(0x3d2d04d03598e303, 0x409bd0080799fbf1),
(0x3d2a988eb552ef44, 0x40815a46f12bafe3),
];
let q0 = DoubleDouble::mul_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
z,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[3]),
z,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[5]),
z,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[7]),
z,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[9]),
z,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4);
let den = DoubleDouble::mul_add(z8, s1, s0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, zeta_sqrt);
f64::copysign(k.to_f64(), x)
}
// branch for |x| > 0.9999 for extreme tail
#[cold]
fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 14] = [
(0x3c97612f9b24a614, 0x3ff0000ba84cc7a5),
(0xbcee8fe2da463412, 0x40515246546f5d88),
(0x3d2fa4a2b891b526, 0x40956b6837159b11),
(0x3d5d673ffad4f817, 0x40c5a1aa3be58652),
(0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75),
(0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2),
(0xbda78e569c0d237f, 0x410a385c627c461c),
(0xbdab3123ebc465d7, 0x4110f05ca2b65fe5),
(0x3d960def35955192, 0x4110bb079af2fe08),
(0xbd97904816054836, 0x410911c24610c11c),
(0xbd937745e9192593, 0x40fc603244adca35),
(0xbd65fbc476d63050, 0x40e6399103188c21),
(0xbd61016ef381cce6, 0x40c6482b44995b89),
(0x3c326105c49e5a1a, 0xbfab44bd8b4e3138),
];
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[13]),
DoubleDouble::from_bit_pair(P[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let num = DoubleDouble::mul_add(z8, q1, q0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 14] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcfc7b886ee61417, 0x405152838f711f3c),
(0xbd33f933c14e831a, 0x409576cb78cab36e),
(0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced),
(0x3d7be430c664bf7e, 0x40e766fdc8c7638c),
(0x3dac662e74cdfc0e, 0x4100276b5f47b5f1),
(0x3da67d06e82a8495, 0x410f843887f8a24a),
(0x3dbbf2e22fc2550a, 0x4116d04271703e08),
(0xbdb2fb3aed100853, 0x4119aff4ed32b74b),
(0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd),
(0x3dab2d8b8c1937eb, 0x410f71c38e84cb34),
(0xbda4e2e8a50b7370, 0x4100ca04b0f36b94),
(0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7),
(0x3d6938ea702c0328, 0x40c923ee1ab270c4),
];
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[13]),
DoubleDouble::from_bit_pair(Q[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let den = DoubleDouble::mul_add(z8, q1, q0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, zeta_sqrt);
f64::copysign(k.to_f64(), x)
}
/// Inverse error function
///
/// ulp 0.5
pub fn f_erfinv(x: f64) -> f64 {
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax >= 0x3ff0000000000000u64 || ax <= 0x3cb0000000000000u64 {
// |x| >= 1, |x| == 0, |x| <= f64::EPSILON
if ax == 0 {
// |x| == 0
return 0.;
}
if ax <= 0x3cb0000000000000u64 {
// |x| <= f64::EPSILON
// inverf(x) ~ Sqrt[Pi]x/2+O[x]^3
const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b);
return x * SQRT_PI_OVER_2;
}
// |x| > 1
if ax == 0x3ff0000000000000u64 {
// |x| == 1
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
return f64::NAN; // x == NaN, x = Inf, x > 1
}
let z = f64::from_bits(ax);
if ax <= 0x3f8374bc6a7ef9db {
// 0.0095
// for small |x| using taylor series first 3 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::from_exact_mult(z, z);
let p = f_fmla(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fc053c2c0ab91c5),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult_f64(r, z);
return f64::copysign(v.to_f64(), x);
} else if ax <= 0x3faeb851eb851eb8 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::from_exact_mult(z, z);
let p = f_polyeval4(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
f64::from_bits(0x3faa7c85c89bb08b),
f64::from_bits(0x3fa5eeb1d488e312),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult_f64(r, z);
return f64::copysign(v.to_f64(), x);
}
if ax <= 0x3fe8000000000000u64 {
// |x| < 0.75
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 5] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let ps_num = f_polyeval5(
vz.hi,
f64::from_bits(0xbff433be821423d0),
f64::from_bits(0x3fdf15f19e9d8da4),
f64::from_bits(0xbfb770b6827e0829),
f64::from_bits(0x3f7a98a2980282bb),
f64::from_bits(0xbf142a246fd2c07c),
);
let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0]));
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 5] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
];
let ps_den = f_polyeval5(
vz.hi,
f64::from_bits(0xbffbdaeff6fbb81c),
f64::from_bits(0x3fe91b12cf47da3a),
f64::from_bits(0xbfc7c5d0ffb7f1da),
f64::from_bits(0x3f939ada247f7609),
f64::from_bits(0xbf41be65038ccfe6),
);
let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0]));
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult_f64(r, z);
let err = f_fmla(
k.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3c40000000000000), // 2^-59
);
let ub = k.hi + (k.lo + err);
let lb = k.hi + (k.lo - err);
if ub == lb {
return f64::copysign(k.to_f64(), x);
}
return inverf_0p06_to_0p75(x);
}
let q = DoubleDouble::from_full_exact_add(1.0, -z);
let mut zeta = fast_log_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if z < 0.9999 {
inverf_asympt_small(rz, zeta_sqrt, x)
} else {
inverf_asympt_long(rz, zeta_sqrt, x)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfinv() {
assert!(f_erfinv(f64::NEG_INFINITY).is_nan());
assert!(f_erfinv(f64::INFINITY).is_nan());
assert!(f_erfinv(f64::NAN).is_nan());
assert_eq!(f_erfinv(f64::EPSILON), 1.9678190753608283e-16);
assert_eq!(f_erfinv(-0.5435340000000265), -0.5265673336010599);
assert_eq!(f_erfinv(0.5435340000000265), 0.5265673336010599);
assert_eq!(f_erfinv(0.001000000000084706), 0.0008862271575416209);
assert_eq!(f_erfinv(-0.001000000000084706), -0.0008862271575416209);
assert_eq!(f_erfinv(0.71), 0.7482049711849852);
assert_eq!(f_erfinv(-0.71), -0.7482049711849852);
assert_eq!(f_erfinv(0.41), 0.381014610957532);
assert_eq!(f_erfinv(-0.41), -0.381014610957532);
assert_eq!(f_erfinv(0.32), 0.29165547581744206);
assert_eq!(f_erfinv(-0.32), -0.29165547581744206);
assert_eq!(f_erfinv(0.82), 0.9480569762323499);
assert_eq!(f_erfinv(-0.82), -0.9480569762323499);
assert_eq!(f_erfinv(0.05), 0.044340387910005497);
assert_eq!(f_erfinv(-0.05), -0.044340387910005497);
assert_eq!(f_erfinv(0.99), 1.8213863677184494);
assert_eq!(f_erfinv(-0.99), -1.8213863677184494);
assert_eq!(f_erfinv(0.9900000000867389), 1.8213863698392927);
assert_eq!(f_erfinv(-0.9900000000867389), -1.8213863698392927);
assert_eq!(f_erfinv(0.99999), 3.123413274341571);
assert_eq!(f_erfinv(-0.99999), -3.123413274341571);
}
}

704
vendor/pxfm/src/err/inverfc.rs vendored Normal file
View File

@@ -0,0 +1,704 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::{fast_log_d_to_dd, fast_log_dd};
use crate::polyeval::{f_polyeval4, f_polyeval5};
#[cold]
fn inverf_0p06_to_0p75(x: DoubleDouble) -> DoubleDouble {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 10] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
(0xbc72fc55f73765f6, 0xbff433be821423d0),
(0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4),
(0x3c56dfb85e83a2c5, 0xbfb770b6827e0829),
(0x3bff1472ecdfa403, 0x3f7a98a2980282bb),
(0x3baffb33d69d6276, 0xbf142a246fd2c07c),
];
let x2 = DoubleDouble::quick_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let vx2 = vz * vz;
let vx4 = vx2 * vx2;
let vx8 = vx4 * vx4;
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let num = DoubleDouble::mul_add(vx8, p4, r0);
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 10] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
(0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c),
(0x3c7861f549c6aa61, 0x3fe91b12cf47da3a),
(0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da),
(0x3c1552b0ec0ba7b3, 0x3f939ada247f7609),
(0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6),
];
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let den = DoubleDouble::mul_add(vx8, p4, r0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, x)
}
#[inline]
fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0x3c936555853a8b2c, 0x3ff0001df06a2515),
(0x3cea488e802db3c3, 0x404406ba373221da),
(0xbce27d42419754e3, 0x407b0442e38a9597),
(0xbd224a407624cbdf, 0x409c9277e31ef446),
(0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a),
(0x3d105bc37bc61b58, 0x40b46be8f860f4d9),
(0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7),
(0x3d1d52013ba8aa38, 0x40aae93a603cf3ea),
(0xbd07a75306df0fc3, 0x4098ab8357dc2e51),
(0x3d1bb6770bb7a27e, 0x407ebead00879010),
(0xbbfcbff4a9737936, 0x3f8936117ccbff83),
];
let z2 = DoubleDouble::quick_mult(z, z);
let z4 = DoubleDouble::quick_mult(z2, z2);
let z8 = DoubleDouble::quick_mult(z4, z4);
let q0 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[1]),
z,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[3]),
z,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[5]),
z,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[7]),
z,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[9]),
z,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4);
let num = DoubleDouble::mul_add(z8, s1, s0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc75b1109d4a3262, 0x40440782efaab17f),
(0x3d1f7775b207d84f, 0x407b2da74b0d39f2),
(0xbd3291fdbab49501, 0x409dac8d9e7c90b2),
(0xbd58d8fdd27707a9, 0x40b178dfeffa3192),
(0xbd57fc74ad705ce0, 0x40bad19b686f219f),
(0x3d4075510031f2cd, 0x40be70a598208cea),
(0xbd5442e109152efb, 0x40b9683ef36ae330),
(0x3d5398192933962e, 0x40b04b7c4c3ca8ee),
(0x3d2d04d03598e303, 0x409bd0080799fbf1),
(0x3d2a988eb552ef44, 0x40815a46f12bafe3),
];
let q0 = DoubleDouble::mul_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
z,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[3]),
z,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[5]),
z,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[7]),
z,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[9]),
z,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4);
let den = DoubleDouble::mul_add(z8, s1, s0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, zeta_sqrt)
}
// branch for |x| > 0.9999 for extreme tail
#[cold]
fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 14] = [
(0x3c97612f9b24a614, 0x3ff0000ba84cc7a5),
(0xbcee8fe2da463412, 0x40515246546f5d88),
(0x3d2fa4a2b891b526, 0x40956b6837159b11),
(0x3d5d673ffad4f817, 0x40c5a1aa3be58652),
(0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75),
(0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2),
(0xbda78e569c0d237f, 0x410a385c627c461c),
(0xbdab3123ebc465d7, 0x4110f05ca2b65fe5),
(0x3d960def35955192, 0x4110bb079af2fe08),
(0xbd97904816054836, 0x410911c24610c11c),
(0xbd937745e9192593, 0x40fc603244adca35),
(0xbd65fbc476d63050, 0x40e6399103188c21),
(0xbd61016ef381cce6, 0x40c6482b44995b89),
(0x3c326105c49e5a1a, 0xbfab44bd8b4e3138),
];
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[13]),
DoubleDouble::from_bit_pair(P[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let num = DoubleDouble::mul_add(z8, q1, q0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 14] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcfc7b886ee61417, 0x405152838f711f3c),
(0xbd33f933c14e831a, 0x409576cb78cab36e),
(0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced),
(0x3d7be430c664bf7e, 0x40e766fdc8c7638c),
(0x3dac662e74cdfc0e, 0x4100276b5f47b5f1),
(0x3da67d06e82a8495, 0x410f843887f8a24a),
(0x3dbbf2e22fc2550a, 0x4116d04271703e08),
(0xbdb2fb3aed100853, 0x4119aff4ed32b74b),
(0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd),
(0x3dab2d8b8c1937eb, 0x410f71c38e84cb34),
(0xbda4e2e8a50b7370, 0x4100ca04b0f36b94),
(0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7),
(0x3d6938ea702c0328, 0x40c923ee1ab270c4),
];
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[13]),
DoubleDouble::from_bit_pair(Q[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let den = DoubleDouble::mul_add(z8, q1, q0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, zeta_sqrt)
}
#[inline]
fn erf_core(x: DoubleDouble) -> DoubleDouble {
// x is always positive, here, should be filtered out before the call
if x.hi <= 0.0095 {
// 0.0095
// for small |x| using taylor series first 3 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::quick_mult(x, x);
let p = f_fmla(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fc053c2c0ab91c5),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult(r, x);
return v;
} else if x.hi <= 0.06 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::quick_mult(x, x);
let p = f_polyeval4(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
f64::from_bits(0x3faa7c85c89bb08b),
f64::from_bits(0x3fa5eeb1d488e312),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult(r, x);
return v;
}
if x.hi <= 0.75 {
// |x| < 0.75
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 5] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
];
let x2 = DoubleDouble::quick_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let ps_num = f_polyeval5(
vz.hi,
f64::from_bits(0xbff433be821423d0),
f64::from_bits(0x3fdf15f19e9d8da4),
f64::from_bits(0xbfb770b6827e0829),
f64::from_bits(0x3f7a98a2980282bb),
f64::from_bits(0xbf142a246fd2c07c),
);
let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0]));
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 5] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
];
let ps_den = f_polyeval5(
vz.hi,
f64::from_bits(0xbffbdaeff6fbb81c),
f64::from_bits(0x3fe91b12cf47da3a),
f64::from_bits(0xbfc7c5d0ffb7f1da),
f64::from_bits(0x3f939ada247f7609),
f64::from_bits(0xbf41be65038ccfe6),
);
let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0]));
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, x);
let err = f_fmla(
k.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3c40000000000000), // 2^-59
);
let ub = k.hi + (k.lo + err);
let lb = k.hi + (k.lo - err);
if ub == lb {
return k;
}
return inverf_0p06_to_0p75(x);
}
let q = DoubleDouble::full_add_f64(-x, 1.0);
let mut zeta = fast_log_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if x.hi < 0.9999 {
inverf_asympt_small(rz, zeta_sqrt)
} else {
inverf_asympt_long(rz, zeta_sqrt)
}
}
#[cold]
fn inverfc_extra_small(x: f64) -> DoubleDouble {
// Reversed order for erfinv with direct identity without subtraction.
let q = x;
let mut zeta = fast_log_d_to_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if x >= 0.0001 {
inverf_asympt_small(rz, zeta_sqrt)
} else {
inverf_asympt_long(rz, zeta_sqrt)
}
}
/// Complementary inverse error function
pub fn f_erfcinv(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x4000000000000000u64 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
return f64::INFINITY;
}
if ix == 0x4000000000000000u64 {
return f64::NEG_INFINITY;
}
return f64::NAN; // x == NaN, x == Inf, x > 2
}
if x == 1. {
return 0.;
}
// we compute erfcinv through identity
// erfcinv(x) = -erfinv(1-x)
static SIGN: [f64; 2] = [1.0, -1.0];
if x < 0.1 {
return inverfc_extra_small(x).to_f64();
}
let dx = if x > 1. {
DoubleDouble::from_full_exact_sub(2., x)
} else {
DoubleDouble::new(0., x)
};
let sign = SIGN[(x > 1.) as usize];
let mut dx = DoubleDouble::full_add_f64(-dx, 1.);
dx = DoubleDouble::from_exact_add(dx.hi, dx.lo);
erf_core(dx).to_f64() * sign
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inverfc() {
assert_eq!(f_erfcinv(0.12), 1.0993909519492193);
assert_eq!(f_erfcinv(1.0000000000027623e-13), 5.261512368864527);
assert_eq!(f_erfcinv(1.0001200000182189), -0.00010634724760131264);
assert_eq!(f_erfcinv(0.7001200000182189), 0.2723481758403576);
assert_eq!(f_erfcinv(1.5231200000182189), -0.502985998867995);
assert_eq!(f_erfcinv(1.99545434324323243), -2.0064739778442213);
assert_eq!(f_erfcinv(1.), 0.);
assert!(f_erfcinv(2.05).is_nan());
assert!(f_erfcinv(-0.01).is_nan());
assert!(f_erfcinv(f64::NAN).is_nan());
assert!(f_erfcinv(f64::NEG_INFINITY).is_nan());
assert!(f_erfcinv(f64::INFINITY).is_nan());
}
}

80
vendor/pxfm/src/err/inverfcf.rs vendored Normal file
View File

@@ -0,0 +1,80 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::err::inverff::erfinv_core;
/// Complementary inverse error function
///
/// Max ulp 0.5
pub fn f_erfcinvf(x: f32) -> f32 {
let ix = x.to_bits();
let ux = ix.wrapping_shl(1);
if ix >= 0x4000_0000u32 || ux == 0 {
if x.is_infinite() {
return f32::INFINITY;
}
if ux == 0 {
return f32::INFINITY;
}
if ix == 0x3f80_0000u32 {
return 0.;
}
// x > 2
if ix == 0x4000_0000u32 {
// x == 2.
return f32::NEG_INFINITY;
}
return f32::NAN; // x == NaN, x < 0
}
let z = x as f64;
static SIGN: [f32; 2] = [1.0, -1.0];
// inferfc(x) = -inverf(1-x)
// ax doesn't need to be extremely accurate,
// it's just boundary detection so will do subtraction in f32
erfinv_core(1. - z, (1. - x).abs().to_bits(), SIGN[(x > 1.) as usize])
}
#[cfg(test)]
mod tests {
use super::f_erfcinvf;
#[test]
fn m_test() {
assert_eq!(f_erfcinvf(2.), f32::NEG_INFINITY);
assert!(f_erfcinvf(-1.).is_nan());
assert_eq!(f_erfcinvf(0.), f32::INFINITY);
assert!(f_erfcinvf(2.1).is_nan());
assert_eq!(f_erfcinvf(0.5), 0.47693628);
assert_eq!(f_erfcinvf(1.5), -0.47693628);
assert_eq!(f_erfcinvf(0.002), 2.1851242);
assert_eq!(f_erfcinvf(1.002), -0.0017724329);
assert!(f_erfcinvf(f32::NAN).is_nan());
}
}

359
vendor/pxfm/src/err/inverff.rs vendored Normal file
View File

@@ -0,0 +1,359 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::logs::simple_fast_log;
use crate::polyeval::{
f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval3, f_polyeval5,
f_polyeval10, f_polyeval11,
};
#[inline]
pub(crate) fn erfinv_core(z: f64, ax: u32, sign: f32) -> f32 {
if ax <= 0x3c1ba5e3u32 {
// 0.0095
// for small |x| using taylor series first 3 terms
let z2 = z * z;
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let p = f_polyeval3(
z2,
f64::from_bits(0x3fec5bf891b4ef6b),
f64::from_bits(0x3fcdb29fb2fee5e4),
f64::from_bits(0x3fc053c2c0ab91c5),
) * z;
return f32::copysign(p as f32, sign);
} else if ax <= 0x3d75c28fu32 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
let z2 = z * z;
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let p = f_polyeval5(
z2,
f64::from_bits(0x3fec5bf891b4ef6b),
f64::from_bits(0x3fcdb29fb2fee5e4),
f64::from_bits(0x3fc053c2c0ab91c5),
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
) * z;
return f32::copysign(p as f32, sign);
}
if ax <= 0x3f400000u32 {
// |x| <= 0.75
let z2 = z * z;
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.06,0.75},8,7},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let r = z2 - 0.5625;
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,8}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval9(
r,
f64::from_bits(0x3fa329348a73d9d4),
f64::from_bits(0xbfd2cb089b644580),
f64::from_bits(0x3fed229149f732d6),
f64::from_bits(0xbff6a233d2028bff),
f64::from_bits(0x3ff268adbfbb6023),
f64::from_bits(0xbfddac401c7d70f4),
f64::from_bits(0x3fb3b1bd759d5046),
f64::from_bits(0xbf67aeb45bad547e),
f64::from_bits(0xbf01ccc7434d381b),
);
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,7}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_estrin_polyeval8(
r,
f64::from_bits(0x3fa1aac2ee4b1413),
f64::from_bits(0xbfd279342e281c99),
f64::from_bits(0x3feef89a353c6d1b),
f64::from_bits(0xbffa8f1b7cd6d0a7),
f64::from_bits(0x3ff89ce6289819a1),
f64::from_bits(0xbfe7db5282a4a2e1),
f64::from_bits(0x3fc543f9a928db4a),
f64::from_bits(0xbf888fd2990e88db),
);
let k = (p_num / p_den) * z;
f32::copysign(k as f32, sign)
} else if ax <= 0x3f580000u32 {
// |x| <= 0.84375
let z2 = z * z;
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.75,0.84375},6,6},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let r = z2 - 0.84375;
// x0=SetPrecision[0.84375,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval10(
r,
f64::from_bits(0x3f116d07e62cbb74),
f64::from_bits(0xbf5c38d390052412),
f64::from_bits(0x3f92d6f96f84efe3),
f64::from_bits(0xbfbac9189cae446b),
f64::from_bits(0x3fd5dd124fb25677),
f64::from_bits(0xbfe49845d46b80ab),
f64::from_bits(0x3fe556c4913f60f8),
f64::from_bits(0xbfd59e527704e33b),
f64::from_bits(0x3fb07614a5e6c9f1),
f64::from_bits(0xbf60ce54a2d8a789),
);
// x0=SetPrecision[0.84375,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_polyeval10(
r,
f64::from_bits(0x3f09fbdd1c987d1e),
f64::from_bits(0xbf5602ad17d419f4),
f64::from_bits(0x3f8efe31ea5bc71d),
f64::from_bits(0xbfb77e5f1bd26730),
f64::from_bits(0x3fd4c3f03e4f5478),
f64::from_bits(0xbfe5aa87dfc5e757),
f64::from_bits(0x3fe9c6406f9abc0b),
f64::from_bits(0xbfdff2f008b4db05),
f64::from_bits(0x3fc1123be5319800),
f64::from_bits(0xbf83be49c2d5cb9e),
);
let k = (p_num / p_den) * z;
f32::copysign(k as f32, sign)
} else if ax <= 0x3f700000u32 {
// |x| <= 0.9375
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.84375,0.9375},10,9},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let x2 = z * z;
let r = x2 - 0.87890625;
// x0=SetPrecision[0.87890625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval11(
r,
f64::from_bits(0x3ec70f1cbf8a758b),
f64::from_bits(0xbf1c9dff87b698d0),
f64::from_bits(0x3f5dfe7be00cc21c),
f64::from_bits(0xbf913fd09c5a3682),
f64::from_bits(0x3fb7ab0095693976),
f64::from_bits(0xbfd3b3ca6a3c9919),
f64::from_bits(0x3fe3533be6d1d8c8),
f64::from_bits(0xbfe48208ef308ac7),
f64::from_bits(0x3fd361a82dab69d1),
f64::from_bits(0xbfa2401965a98195),
f64::from_bits(0xbf54ba4d14ca54e3),
);
// x0=SetPrecision[0.87890625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_polyeval10(
r,
f64::from_bits(0x3ec0699f391e2327),
f64::from_bits(0xbf151ec184941078),
f64::from_bits(0x3f5717bb379a3c6e),
f64::from_bits(0xbf8beed3755c3484),
f64::from_bits(0x3fb46148b4a431ef),
f64::from_bits(0xbfd25690b7bc76fa),
f64::from_bits(0x3fe3f1b2f4ee0d9d),
f64::from_bits(0xbfe888a7a4511975),
f64::from_bits(0x3fdd84db18f2a240),
f64::from_bits(0xbfb844807521be56),
);
let f = z * (p_num / p_den);
f32::copysign(f as f32, sign)
} else {
// Rational approximation generated by Wolfram Mathematica:
// for inverf(x) = sqrt(-log(1-x))*R(1/sqrt(-log(1-x)))
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},7,6},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let zeta = -simple_fast_log(1. - z);
let zeta_sqrt = zeta.sqrt();
let rcp_zeta = (1. / zeta) * zeta_sqrt;
let p_num = f_estrin_polyeval8(
rcp_zeta,
f64::from_bits(0x3ff00072876c578e),
f64::from_bits(0x40314e00c10282da),
f64::from_bits(0x404f4a1412af03f6),
f64::from_bits(0x404c895cc0d9b1b3),
f64::from_bits(0x404545794620bfaf),
f64::from_bits(0x403264d21ea21354),
f64::from_bits(0x3fc5a5141dd19237),
f64::from_bits(0xbf8c2e49707c21ec),
);
let p_den = f_estrin_polyeval7(
rcp_zeta,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x403151312c313d77),
f64::from_bits(0x405032345fa3d0cd),
f64::from_bits(0x4053e0a81d4c5f09),
f64::from_bits(0x4054fa20c5e0731c),
f64::from_bits(0x404620d7f94d4804),
f64::from_bits(0x4035d7400867b81f),
);
let r = zeta_sqrt * (p_num / p_den);
f32::copysign(r as f32, sign)
}
}
/// Inverse error function
///
/// Max ulp 0.5
pub fn f_erfinvf(x: f32) -> f32 {
let ax = x.to_bits() & 0x7fff_ffff;
if ax >= 0x3f800000u32 || ax <= 0x3400_0000u32 {
// |x| >= 1, |x| == 0, |x| <= f32::EPSILON
if ax == 0 {
// |x| == 0
return 0.;
}
if ax <= 0x3400_0000u32 {
// |x| <= f32::EPSILON
// inverf(x) ~ Sqrt[Pi]x/2+O[x]^3
const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b);
return (x as f64 * SQRT_PI_OVER_2) as f32;
}
if ax == 0x3f800000u32 {
// |x| == 1
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
// |x| > 1
return f32::NAN; // |x| == NaN, |x| == Inf, |x| > 1
}
let z = f32::from_bits(ax) as f64;
erfinv_core(z, ax, x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_test_inv_erff() {
assert!(f_erfinvf(f32::NEG_INFINITY).is_nan());
assert!(f_erfinvf(f32::INFINITY).is_nan());
assert!(f_erfinvf(-1.1).is_nan());
assert!(f_erfinvf(1.1).is_nan());
assert_eq!(f_erfinvf(f32::EPSILON), 1.05646485e-7);
assert_eq!(f_erfinvf(-1.), f32::NEG_INFINITY);
assert_eq!(f_erfinvf(1.), f32::INFINITY);
assert_eq!(f_erfinvf(0.002), 0.0017724558);
assert_eq!(f_erfinvf(-0.002), -0.0017724558);
assert_eq!(f_erfinvf(0.02), 0.017726395);
assert_eq!(f_erfinvf(-0.02), -0.017726395);
assert_eq!(f_erfinvf(0.05), 0.044340387);
assert_eq!(f_erfinvf(-0.05), -0.044340387);
assert_eq!(f_erfinvf(0.5), 0.47693628);
assert_eq!(f_erfinvf(-0.5), -0.47693628);
assert_eq!(f_erfinvf(0.76), 0.8308411);
assert_eq!(f_erfinvf(-0.76), -0.8308411);
assert_eq!(f_erfinvf(0.92), 1.2379221);
assert_eq!(f_erfinvf(-0.92), -1.2379221);
assert_eq!(f_erfinvf(0.97), 1.5344859);
assert_eq!(f_erfinvf(-0.97), -1.5344859);
assert_eq!(f_erfinvf(0.99), 1.8213866);
assert_eq!(f_erfinvf(-0.99), -1.8213866);
assert_eq!(f_erfinvf(0.7560265), 0.82385886);
}
}

56
vendor/pxfm/src/err/mod.rs vendored Normal file
View File

@@ -0,0 +1,56 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![deny(unreachable_pub)]
mod erf;
mod erf_poly;
mod erfc;
mod erfcx;
mod erfcxf;
mod erff;
mod erffc;
mod inverf;
mod inverfc;
mod inverfcf;
mod inverff;
mod rerf;
mod rerf_poly;
mod rerff;
pub use erf::f_erf;
pub use erfc::f_erfc;
pub use erfcx::f_erfcx;
pub use erfcxf::f_erfcxf;
pub use erff::f_erff;
pub use erffc::f_erfcf;
pub use inverf::f_erfinv;
pub use inverfc::f_erfcinv;
pub use inverfcf::f_erfcinvf;
pub use inverff::f_erfinvf;
pub use rerf::f_rerf;
pub use rerff::f_rerff;

233
vendor/pxfm/src/err/rerf.rs vendored Normal file
View File

@@ -0,0 +1,233 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::err::rerf_poly::RERF_HARD;
use crate::polyeval::f_polyeval7;
#[cold]
#[inline(never)]
fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 {
// Polynomial for x/erf(x)
// Generated by Sollya.
// d = [0, 1/16];
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107...|], d);
// See ./notes/r_erf_tiny_hard.sollya
const C: [(u64, u64); 10] = [
(0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b),
(0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2),
(0xbc0cb8b926064434, 0x3f842aa561ecc102),
(0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727),
(0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472),
(0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d),
(0xbb9e05d21f4e1755, 0xbef3adb84631c39c),
(0x3b6ee5dc31565280, 0xbec366647cacdcc9),
(0x3b3698f8162c5fac, 0x3eaabb9db9f3b048),
(0xbb026f5401fce891, 0xbe66cd40349520b6),
];
let mut p = DoubleDouble::mul_add(
z2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
let z = DoubleDouble::div_dd_f64(p, x);
z.to_f64()
}
#[inline]
fn rerf_poly_tiny(z: f64, x: f64) -> f64 {
let z2 = DoubleDouble::from_exact_mult(z, z);
// Polynomial for x/erf(x)
// Generated by Sollya.
// d = [0, 1/16];
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107, 107, 107, D...|], d);
// See ./notes/r_erf_tiny.sollya
let p = f_polyeval7(
z2.hi,
f64::from_bits(0xbf75207c7ef80727),
f64::from_bits(0x3f2db4a8d7c36a03),
f64::from_bits(0x3f20faa7a8db7f27),
f64::from_bits(0xbef3adae94983bb2),
f64::from_bits(0xbec3b05fe5c49f32),
f64::from_bits(0x3ed67902690892be),
f64::from_bits(0xbf3090033375e5ee),
);
let mut r = DoubleDouble::quick_mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)),
);
r = DoubleDouble::quick_mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)),
);
r = DoubleDouble::quick_mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)),
);
r = DoubleDouble::from_exact_add(r.hi, r.lo);
r = DoubleDouble::div_dd_f64(r, x);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3b90000000000000), // 2^-70
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r.to_f64();
}
rerf_poly_tiny_hard(x, z2)
}
#[inline]
fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 {
let c = &RERF_HARD[idx];
let mut p = DoubleDouble::mul_add(
z2,
DoubleDouble::from_bit_pair(c[10]),
DoubleDouble::from_bit_pair(c[9]),
);
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
let z = DoubleDouble::div_dd_f64(p, x);
z.to_f64()
}
/// Computes 1/erf(x)
///
/// Max ulp 0.5001
pub fn f_rerf(x: f64) -> f64 {
let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
let t = z.to_bits();
let ux = t;
/* 1/erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
if ux > 0x4017afb48dc96626
// |x| > 0x4017afb48dc96626
{
let os = f64::copysign(1.0, x);
const MASK: u64 = 0x7ff0000000000000u64;
if ux > MASK {
return x + x; /* NaN */
}
if ux == MASK {
return os; /* +/-Inf */
}
return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
}
/* now |x| <= 0x4017afb48dc96626 */
if z < f64::from_bits(0x3c20000000000000) {
// |x| < 0.0000000000000000004336808689942018
/* for x=-0 the code below returns +0 which is wrong */
if x == 0. {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if z.to_bits() <= 0x38b7f12369dedu64 {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
/* double-double approximation of 2/sqrt(pi) to nearest */
const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8618f13eb7ca89),
f64::from_bits(0x3fec5bf891b4ef6b),
);
/* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3)
term to the main term is in x^2/3, thus less than 2^-123 */
/* scale x by 2^106 to get out the subnormal range */
let sx = x * f64::from_bits(0x4690000000000000);
let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx);
// scale back by 2^106, since we're performed the division
prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000));
return prod.to_f64();
}
if z.to_bits() < 0x3fb0000000000000u64 {
return rerf_poly_tiny(z, x);
}
const SIXTEEN: u64 = 4 << 52;
let idx =
unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() };
let z2 = DoubleDouble::from_exact_mult(z, z);
rerf_poly_hard(x, z2, idx)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erf() {
assert_eq!(f_rerf(65.), 1.0);
assert_eq!(f_rerf(3.), 1.0000220909849995);
assert_eq!(f_rerf(-3.), -1.0000220909849995);
assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197);
assert_eq!(
f_rerf(0.0000000000000000002336808689942018),
3.7924667486354975e18
);
assert_eq!(f_rerf(2.000225067138672), 1.004695025872889);
assert_eq!(f_rerf(0.), f64::INFINITY);
assert_eq!(f_rerf(-0.), f64::NEG_INFINITY);
assert!(f_rerf(f64::NAN).is_nan());
}
}

1331
vendor/pxfm/src/err/rerf_poly.rs vendored Normal file

File diff suppressed because it is too large Load Diff

468
vendor/pxfm/src/err/rerff.rs vendored Normal file
View File

@@ -0,0 +1,468 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// Polynomials approximating x/erf(x) on ( k/8, (k + 1)/8 ) generated by Sollya and SageMath
// ```text
// def build_sollya_script(idx):
// return f"""
// d = [{idx}/8, {idx + 1}/8];
//
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], d);
//
// for i from 0 to degree(Q) by 2 do {{
// write(coeff(Q, i)) >> "coefficients.txt";
// write("\\n") >> "coefficients.txt";
// }};
// """
//
// def load_coefficients(filename):
// with open(filename, "r") as f:
// return [RealField(500)(line.strip()) for line in f if line.strip()]
//
// def call_sollya_on_interval(idx):
// sollya_script = build_sollya_script(idx)
// with open("tmp_interval.sollya", "w") as f:
// f.write(sollya_script)
// import subprocess
// if os.path.exists("coefficients.txt"):
// os.remove("coefficients.txt")
// try:
// result = subprocess.run(
// ["sollya", "tmp_interval.sollya"],
// check=True,
// capture_output=True,
// text=True
// )
// except subprocess.CalledProcessError as e:
// return
//
// def print_coeffs(poly):
// print("[")
// for i in range(len(poly)):
// coeff = poly[i]
// print(double_to_hex(coeff), ",")
// print("],")
//
// print(f"static COEFFS: [[u64; 8]; 32] = [")
// for i in range(0, 32):
// call_sollya_on_interval(i)
// coeffs = load_coefficients(f"coefficients.txt")
// print_coeffs(coeffs)
// print("];")
// ```
static COEFFS: [[u64; 8]; 32] = [
[
0x3fec5bf891b4ef6b,
0x3fd2e7fb0bcdee7f,
0x3f842aa5641a200a,
0xbf752081ae81d16e,
0x3f2e1a191fb85592,
0xbf203a20ad500043,
0x3f861a864f719e76,
0xbfc79f68bad20bd1,
],
[
0x3fec5bf891b4ef6b,
0x3fd2e7fb0bcdf45b,
0x3f842aa561f35512,
0xbf75207c8167ac1d,
0x3f2db4b119b4ce20,
0x3f20fa28737c4219,
0xbef38e74cca2219a,
0xbec5d70713fc621e,
],
[
0x3fec5bf891b4ef30,
0x3fd2e7fb0bce1c0f,
0x3f842aa56138541f,
0xbf75207c6197eb7c,
0x3f2db4799120e074,
0x3f20fc28d915a6e9,
0xbef3ea5b479dc053,
0xbebbffe6df8ec372,
],
[
0x3fec5bf891b4bf18,
0x3fd2e7fb0bde166f,
0x3f842aa53c721766,
0xbf7520796733bbec,
0x3f2db21eebf4144f,
0x3f210545cc78d0f0,
0xbef48ad7e4aa2d10,
0xbeb24a043ad31907,
],
[
0x3fec5bf891ab16e9,
0x3fd2e7fb0dc7b919,
0x3f842aa29d8381e7,
0xbf7520592585d601,
0x3f2da30df1566e43,
0x3f212780ff325aa6,
0xbef5e98ea9819e42,
0xbe9849d52099dcb9,
],
[
0x3fec5bf890ddfa8d,
0x3fd2e7fb28aab312,
0x3f842a8a461f0eb7,
0xbf751f93b2d27114,
0x3f2d66789eed5f95,
0x3f21818ff1832f50,
0xbef84264724049ef,
0x3e9df12b02e82a5a,
],
[
0x3fec5bf887f64fa4,
0x3fd2e7fbfcc05f75,
0x3f842a02323e2099,
0xbf751c86d291ced6,
0x3f2cbd5653cde433,
0x3f223299b32b8583,
0xbefb7fc6e286cd94,
0x3eb49676cb3da393,
],
[
0x3fec5bf84f8e2488,
0x3fd2e7ffe83d2974,
0x3f842821c5cc659c,
0xbf7514805a6196e3,
0x3f2b723680f64bb5,
0x3f233416dcfcd366,
0xbefefe55300afaa7,
0x3ebf0c475fb71e7a,
],
[
0x3fec5bf7999e6afe,
0x3fd2e809c6d4caa7,
0x3f84247256be4a56,
0xbf750838db0c0cf5,
0x3f29e7e867267388,
0x3f24226adee5ce74,
0xbf00c0830af2bf01,
0x3ec26fb6b18e628b,
],
[
0x3fec5bf801fc5ad5,
0x3fd2e80618e8941e,
0x3f84254c04b0b234,
0xbf7509d7cf351201,
0x3f2a01829944820c,
0x3f241d7bb0c7e2de,
0xbf00c2d844916d26,
0x3ec2817d39abc26b,
],
[
0x3fec5c0938a12f13,
0x3fd2e7706c510d79,
0x3f8448392db86aae,
0xbf75526e9c6046f0,
0x3f2facd0bc0e7862,
0x3f21fc4093e1e6b7,
0xbefdf54af68ba968,
0x3ebfe348fc246c15,
],
[
0x3fec5c6dcdadc5d8,
0x3fd2e495072afff3,
0x3f84d6f390564d4d,
0xbf764a7e85749c85,
0x3f37effb62caee80,
0x3f19cb39bc236ae6,
0xbef6d7035785e8f3,
0x3eb755aa2e58fc52,
],
[
0x3fec5dd74381acff,
0x3fd2dbe68140f116,
0x3f86459e1acfda0f,
0xbf7865203923a03d,
0x3f43665053a48049,
0x3f0409e353b761ea,
0xbeeb0b00f567c9f8,
0x3eabc33000611b25,
],
[
0x3fec6175431226d1,
0x3fd2c8dcbb0babcc,
0x3f88f5bfd61e5d2e,
0xbf7bc60de8dff620,
0x3f4d9b7076c7767c,
0xbf0106584fac3547,
0xbed0a56cd1030deb,
0x3e970ee11e7beb48,
],
[
0x3fec68445d99a8e9,
0x3fd2a9d608dbfea2,
0x3f8cc072ddf22cb6,
0xbf7fe5f2efdc5f5c,
0x3f5431d1deff38bc,
0xbf197220e4a1dda8,
0x3ec9e2469e6c1c67,
0x3e4be72535d53d7b,
],
[
0x3fec713c415bf088,
0x3fd28610e83aa38c,
0x3f9049ee1942f46b,
0xbf81c513d165d6fd,
0x3f585bc13e0fcaba,
0xbf22715362e30768,
0x3ede6bfa3c69e8e3,
0xbe852cd85f8dea5b,
],
[
0x3fec770e08b47107,
0x3fd2716324b22047,
0x3f91460d403e6b9c,
0xbf829ab46375f10d,
0x3f5a0e7f00c76fb5,
0xbf2484890f2d7eeb,
0x3ee207b21bbd8496,
0xbe8bbee036671b6a,
],
[
0x3fec6f4a2d01088d,
0x3fd288e494bc89b7,
0x3f905203788a2821,
0xbf81eab2727ce365,
0x3f58ddba75a3c100,
0xbf2347c9a317a175,
0x3ee099c93ce5f44f,
0xbe88e9f9c064f833,
],
[
0x3fec4c9bbce50c7d,
0x3fd2e8175b0e1837,
0x3f89a2d1518c7a4c,
0xbf7f3fa91859127e,
0x3f55431c495b1077,
0xbf1fc1af665bb1f8,
0x3eda0f1d735195cb,
0xbe827b8d6fa224ed,
],
[
0x3fec03cce39d7213,
0x3fd39c2316e290bf,
0x3f7b674438899313,
0xbf783644c88c71fb,
0x3f5047a3da485180,
0xbf1748b54f823d57,
0x3ed20c86d3302f22,
0xbe77f94cafe045a8,
],
[
0x3feb913f0adf6c4b,
0x3fd49c4cedae09fc,
0xbf4a6dea9778f474,
0xbf7006dc4e6c8125,
0x3f461483c254fa5f,
0xbf0e75052760bf18,
0x3ec65425869bc096,
0xbe6bc2df9fbc0f82,
],
[
0x3feafbeda3b7d400,
0x3fd5cb900ee1fb5e,
0xbf8228d16e87de3d,
0xbf6011d44e155bf5,
0x3f3993b736442257,
0xbf017c7ee5efa6ad,
0x3eb886e337d2e3c2,
0xbe5cba4b79e90043,
],
[
0x3fea54849d309eba,
0x3fd701afa55e3d21,
0xbf90c72bb2e2799f,
0xbf33c92573294e34,
0x3f265284f7a6d53a,
0xbef09f09298ed1e8,
0x3ea7153a46cb2e27,
0xbe49ef6ec79265fd,
],
[
0x3fe9b128df667870,
0x3fd816d295a867cb,
0xbf9713f11ea84a26,
0x3f4edcbdd63903bb,
0x3ef44f54fc7a6024,
0xbed45da547d2fcb8,
0x3e9049754d57a9a7,
0xbe32aba05ca26a69,
],
[
0x3fe927f49edf4ace,
0x3fd8ecd207c6a7d1,
0xbf9b8cd215124008,
0x3f5cbab209dd389d,
0xbf12a8920ea6230f,
0x3eb442dfce60b0e2,
0x3e52494e415c7728,
0xbe09a1b1bbb9cee4,
],
[
0x3fe8ca3d7437d06f,
0x3fd973c08b6d33fb,
0xbf9e272ca1fccc06,
0x3f61efd00e2016b6,
0xbf1e6dab18e9d45a,
0x3ed0b446e3469be1,
0xbe7503c584488bed,
0x3e069968660290a4,
],
[
0x3fe8a1f4b154f663,
0x3fd9a9a8b81692d4,
0xbf9f1e9312dd4501,
0x3f632b4d20599404,
0xbf2119c1b5e43b24,
0x3ed42b9874284d56,
0xbe7c17cc1eef4b9d,
0x3e117f0a9057a689,
],
[
0x3fe8b15bfcf78f33,
0x3fd99720c884ab33,
0xbf9ed2265979f5a6,
0x3f62d3c30432692b,
0xbf20a17346c37362,
0x3ed36538f2d21c31,
0xbe7aac6bb10f8b90,
0x3e1061d3a1737044,
],
[
0x3fe8f479e98cb825,
0x3fd94ab3f8d0c80c,
0xbf9da7afe85abf94,
0x3f618fe28f71a3d4,
0xbf1df723b2a63e38,
0x3ed0d190252a7f7c,
0xbe7631fdd49272b0,
0x3e0a17567cab4a94,
],
[
0x3fe9636d647b61c0,
0x3fd8d4aaba0e0212,
0xbf9bf904574e56ea,
0x3f5fb68684d8555d,
0xbf19d06f9cf17bbf,
0x3ecb92b9f0b8acf3,
0xbe7145bde0c499ae,
0x3e033cf1cb08ce4c,
],
[
0x3fe9f4c3301b6d33,
0x3fd844100b4598b3,
0xbf9a0b94e19be990,
0x3f5c0ed55c70532f,
0xbf15a786c9e62b23,
0x3ec5e3f05a04f5c6,
0xbe69ea9db2e37883,
0x3dfb3e5ad2cd0fb2,
],
[
0x3fea9f469c75536c,
0x3fd7a51b3d9eda10,
0xbf980f63a2cb486c,
0x3f5887f72a9f07e0,
0xbf11e4d454f2f994,
0x3ec113d0aed8bdef,
0xbe6311f84083acf4,
0x3df2e4dc2e50e3fa,
],
];
/// Computes 1/erf(x)
///
/// Max ulp 0.5
pub fn f_rerff(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffffu32;
if x == 0. {
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
if x_abs >= 0x4080_0000u32 {
static ONE: [f32; 2] = [1.0, -1.0];
static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
let sign = x.is_sign_negative() as usize;
if x_abs >= 0x7f80_0000u32 {
return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
}
return ONE[sign] + SMALL[sign];
}
// Polynomial approximation see [COEFFS] for generation:
// 1/erf(x) ~ (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14) / x
let xd = x as f64;
let xsq = xd * xd;
const EIGHT: u32 = 3 << 23;
let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
let c = COEFFS[idx];
let x4 = xsq * xsq;
let c0 = f_fmla(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c1 = f_fmla(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
let c2 = f_fmla(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c3 = f_fmla(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
let x8 = x4 * x4;
let p0 = f_fmla(x4, c1, c0);
let p1 = f_fmla(x4, c3, c2);
((f_fmla(x8, p1, p0)) / xd) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_erff_test() {
assert!(f_rerff(f32::NAN).is_nan());
assert_eq!(f_rerff(0.0), f32::INFINITY);
assert_eq!(f_rerff(-0.0), f32::NEG_INFINITY);
assert_eq!(f_rerff(0.015255669), 58.096153);
assert_eq!(f_rerff(1.0), 1.1866608);
assert_eq!(f_rerff(0.5), 1.9212301);
}
}

89
vendor/pxfm/src/exponents/auxiliary.rs vendored Normal file
View File

@@ -0,0 +1,89 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::dyadic_float::{DyadicSign, f64_from_parts};
#[inline]
pub(crate) fn ldexp(d: f64, i: i32) -> f64 {
let mut n = i;
let exp_max = 1023;
let exp_min = -1022;
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
// 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
let f_exp_max = f64_from_parts(DyadicSign::Pos, EXP_BIAS << 1, 0);
// 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
let f_exp_min = f64_from_parts(DyadicSign::Pos, 1, 0);
let mut x = d;
if n < exp_min {
// 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
let f_pow_subnorm = f64_from_parts(DyadicSign::Pos, 52 + EXP_BIAS, 0);
let mul = f_exp_min * f_pow_subnorm;
let add = -exp_min - 52i32;
// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
// This must be reachable by three scaling multiplications (two here and one final).
debug_assert!(-exp_min + 52i32 + exp_max <= add * 2 + -exp_min);
x *= mul;
n += add;
if n < exp_min {
x *= mul;
n += add;
if n < exp_min {
n = exp_min;
}
}
} else if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
n = exp_max;
}
}
}
let scale = f64_from_parts(DyadicSign::Pos, (EXP_BIAS as i32 + n) as u64, 0);
x * scale
}
#[inline]
pub(crate) fn fast_ldexp(d: f64, i: i32) -> f64 {
let mut u = d.to_bits();
u = u.wrapping_add((i as u64).wrapping_shl(52));
f64::from_bits(u)
}

411
vendor/pxfm/src/exponents/exp.rs vendored Normal file
View File

@@ -0,0 +1,411 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, fmla, pow2i, rintk};
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::round::RoundFinite;
/// Exp for given value for const context.
/// This is simplified version just to make a good approximation on const context.
#[inline]
pub const fn exp(d: f64) -> f64 {
const EXP_POLY_1_D: f64 = 2f64;
const EXP_POLY_2_D: f64 = 0.16666666666666674f64;
const EXP_POLY_3_D: f64 = -0.0027777777777777614f64;
const EXP_POLY_4_D: f64 = 6.613756613755705e-5f64;
const EXP_POLY_5_D: f64 = -1.6534391534392554e-6f64;
const EXP_POLY_6_D: f64 = 4.17535139757361979584e-8f64;
const L2_U: f64 = 0.693_147_180_559_662_956_511_601_805_686_950_683_593_75;
const L2_L: f64 = 0.282_352_905_630_315_771_225_884_481_750_134_360_255_254_120_68_e-12;
const R_LN2: f64 =
1.442_695_040_888_963_407_359_924_681_001_892_137_426_645_954_152_985_934_135_449_406_931;
let qf = rintk(d * R_LN2);
let q = qf as i32;
let mut r = fmla(qf, -L2_U, d);
r = fmla(qf, -L2_L, r);
let f = r * r;
// Poly for u = r*(exp(r)+1)/(exp(r)-1)
let mut u = EXP_POLY_6_D;
u = fmla(u, f, EXP_POLY_5_D);
u = fmla(u, f, EXP_POLY_4_D);
u = fmla(u, f, EXP_POLY_3_D);
u = fmla(u, f, EXP_POLY_2_D);
u = fmla(u, f, EXP_POLY_1_D);
let u = 1f64 + 2f64 * r / (u - r);
let i2 = pow2i(q);
u * i2
// if d < -964f64 {
// r = 0f64;
// }
// if d > 709f64 {
// r = f64::INFINITY;
// }
}
pub(crate) static EXP_REDUCE_T0: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc719083535b085e, 0x3ff02c9a3e778061),
(0x3c8d73e2a475b466, 0x3ff059b0d3158574),
(0x3c6186be4bb28500, 0x3ff0874518759bc8),
(0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
(0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
(0xbc96c51039449b3a, 0x3ff11301d0125b51),
(0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
(0xbc819041b9d78a76, 0x3ff172b83c7d517b),
(0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
(0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
(0x3c8dc775814a8494, 0x3ff2063b88628cd6),
(0x3c99b07eb6c70572, 0x3ff2387a6e756238),
(0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
(0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
(0x3c90024754db41d4, 0x3ff2d285a6e4030b),
(0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
(0x3c932721843659a6, 0x3ff33c08b26416ff),
(0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
(0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
(0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
(0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
(0x3c489b7a04ef80d0, 0x3ff44e086061892d),
(0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
(0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
(0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
(0xbc807abe1db13cac, 0x3ff5342b569d4f82),
(0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
(0x3c96324c054647ac, 0x3ff5ab07dd485429),
(0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
(0xbc9383c17e40b496, 0x3ff6247eb03a5585),
(0xbc9bb60987591c34, 0x3ff6623882552225),
(0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
(0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
(0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
(0xbc90245957316dd4, 0x3ff75feb564267c9),
(0xbc841577ee049930, 0x3ff7a11473eb0187),
(0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
(0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
(0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
(0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
(0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
(0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
(0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
(0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
(0xbc9359495d1cd532, 0x3ffa0c667b5de565),
(0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
(0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
(0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
(0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
(0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
(0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
(0x3c811065895048de, 0x3ffc199bdd85529c),
(0x3c92884dff483cac, 0x3ffc67f12e57d14b),
(0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
(0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
(0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
(0x3c9c2300696db532, 0x3ffda9e603db3285),
(0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
(0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
(0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
(0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
(0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
(0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
];
pub(crate) static EXP_REDUCE_T1: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
(0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
(0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
(0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
(0x3c984711d4c35ea0, 0x3ff003779a95f959),
(0xbc80484245243778, 0x3ff0042936faa3d8),
(0xbc94b237da2025fa, 0x3ff004dadb113da0),
(0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
(0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
(0xbc94acf197a00142, 0x3ff006eff583fc3d),
(0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
(0x3c7da93f90835f76, 0x3ff0085382faef83),
(0xbc86a79084ab093c, 0x3ff00905554425d4),
(0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
(0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
(0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
(0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
(0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
(0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
(0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
(0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
(0xbc991b2060859320, 0x3ff00f4714af41d3),
(0x3c8427068ab22306, 0x3ff00ff93412315c),
(0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
(0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
(0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
(0xbc734104ee7edae8, 0x3ff012c1fecd613b),
(0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
(0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
(0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
(0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
(0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
(0xbc990565902c5f44, 0x3ff016f0169949ed),
(0x3c870fc41c5c2d54, 0x3ff017a28af25567),
(0x3c94b9a6e145d76c, 0x3ff018550706ab62),
(0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
(0xbc977669f033c7de, 0x3ff019ba16628de2),
(0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
(0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
(0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
(0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
(0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
(0x3c816996709da2e2, 0x3ff01de9fe280ac8),
(0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
(0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
(0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
(0xbc98f06d8a148a32, 0x3ff020b533856324),
(0xbc82bf310fc54eb6, 0x3ff02168143b0281),
(0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
(0xbc9491793e46834c, 0x3ff022cdece68c4f),
(0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
(0xbc9314aa16278aa4, 0x3ff02433e494b755),
(0x3c848daf888e9650, 0x3ff024e6ec0da046),
(0x3c856dc8046821f4, 0x3ff02599fb483385),
(0x3c945b42356b9d46, 0x3ff0264d1244c719),
(0xbc7082ef51b61d7e, 0x3ff027003103b10e),
(0x3c72106ed0920a34, 0x3ff027b357854772),
(0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
(0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
(0x3c564cbba902ca28, 0x3ff029ccf99d720a),
(0x3c94383ef231d206, 0x3ff02a803f2d170d),
(0x3c94a47a505b3a46, 0x3ff02b338c811703),
(0x3c9e471202234680, 0x3ff02be6e199c811),
];
// sets the exponent of a binary64 number to 0 (subnormal range)
#[inline]
pub(crate) fn to_denormal(x: f64) -> f64 {
let mut ix = x.to_bits();
ix &= 0x000fffffffffffff;
f64::from_bits(ix)
}
#[inline]
fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 7] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x39c712f72ecec2cf, 0x3fe0000000000000),
(0x3c65555555554d07, 0x3fc5555555555555),
(0x3c455194d28275da, 0x3fa5555555555555),
(0x3c012faa0e1c0f7b, 0x3f81111111111111),
(0xbbf4ba45ab25d2a3, 0x3f56c16c16da6973),
(0xbbc9091d845ecd36, 0x3f2a01a019eb7f31),
];
let mut r = DoubleDouble::quick_mul_add(
DoubleDouble::from_bit_pair(C[6]),
z,
DoubleDouble::from_bit_pair(C[5]),
);
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[4]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1]));
DoubleDouble::quick_mul_add_f64(r, z, f64::from_bits(0x3ff0000000000000))
}
#[cold]
fn as_exp_accurate(x: f64, t: f64, tz: DoubleDouble, ie: i64) -> f64 {
let mut ix = x.to_bits();
if ((ix >> 52) & 0x7ff) < 0x3c9 {
return 1. + x;
}
/* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23,
thus since l2h is exactly representable on 29 bits, l2h*t is exact. */
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
let dx = f_fmla(-L2.hi, t, x);
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), t);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let mut f = exp_poly_dd(dz);
f = DoubleDouble::quick_mult(dz, f);
if ix > 0xc086232bdd7abcd2u64 {
// x < -708.396
ix = 1i64.wrapping_sub(ie).wrapping_shl(52) as u64;
f = DoubleDouble::quick_mult(f, tz);
f = DoubleDouble::add(tz, f);
let new_f = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.lo += new_f.lo;
f.hi = to_denormal(f.hi + f.lo);
} else {
if tz.hi == 1.0 {
let fhe = DoubleDouble::from_exact_add(tz.hi, f.hi);
let fhl = DoubleDouble::from_exact_add(fhe.lo, f.lo);
f.hi = fhe.hi;
f.lo = fhl.hi;
ix = f.lo.to_bits();
if (ix & 0x000fffffffffffff) == 0 {
let v = fhl.lo.to_bits();
let d: i64 = (((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64).wrapping_shl(1)
as i64)
.wrapping_add(1);
ix = ix.wrapping_add(d as u64);
f.lo = f64::from_bits(ix);
}
} else {
f = DoubleDouble::quick_mult(f, tz);
f = DoubleDouble::add(tz, f);
}
f = DoubleDouble::from_exact_add(f.hi, f.lo);
f.hi = fast_ldexp(f.hi, ie as i32);
}
f.hi
}
/// Computes exponent
///
/// Max found ULP 0.5
pub fn f_exp(x: f64) -> f64 {
let mut ix = x.to_bits();
let aix = ix & 0x7fffffffffffffff;
// exp(x) rounds to 1 to nearest for |x| <= 5.55112e-17
if aix <= 0x3c90000000000000u64 {
// |x| <= 5.55112e-17
return 1.0 + x;
}
if aix >= 0x40862e42fefa39f0u64 {
// |x| >= 709.783
if aix > 0x7ff0000000000000u64 {
return x + x;
} // nan
if aix == 0x7ff0000000000000u64 {
// |x| = inf
return if (ix >> 63) != 0 {
0.0 // x = -inf
} else {
x // x = inf
};
}
if (ix >> 63) == 0 {
// x >= 709.783
let z = std::hint::black_box(f64::from_bits(0x7fe0000000000000));
return z * z;
}
if aix >= 0x40874910d52d3052u64 {
// x <= -745.133
return f64::from_bits(0x18000000000000) * f64::from_bits(0x3c80000000000000);
}
}
const S: f64 = f64::from_bits(0x40b71547652b82fe);
let t = (x * S).round_finite();
let jt: i64 = unsafe {
t.to_int_unchecked::<i64>() // this is already finite here
};
let i0: i64 = (jt >> 6) & 0x3f;
let i1 = jt & 0x3f;
let ie: i64 = jt >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let tz = DoubleDouble::quick_mult(t0, t1);
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
/* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23,
thus since l2h is exactly representable on 29 bits, l2h*t is exact. */
let dx = f_fmla(L2.lo, t, f_fmla(-L2.hi, t, x));
let dx2 = dx * dx;
const CH: [u64; 4] = [
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc55555557e54ff,
0x3fa55555553a12f4,
];
let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let p = f_fmla(dx2, pw0, pw1);
let mut f = DoubleDouble::new(f_fmla(tz.hi * dx, p, tz.lo), tz.hi);
const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
if ix > 0xc086232bdd7abcd2u64 {
// subnormal case: x < -708.396
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
let sums = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.hi = sums.hi;
f.lo += sums.lo;
let ub = f.hi + (f.lo + EPS);
let lb = f.hi + (f.lo - EPS);
if ub != lb {
return as_exp_accurate(x, t, tz, ie);
}
f.hi = to_denormal(lb);
} else {
let ub = f.hi + (f.lo + EPS);
let lb = f.hi + (f.lo - EPS);
if ub != lb {
return as_exp_accurate(x, t, tz, ie);
}
f.hi = fast_ldexp(lb, ie as i32);
}
f.hi
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn exp_test() {
assert!(
(exp(0f64) - 1f64).abs() < 1e-8,
"Invalid result {}",
exp(0f64)
);
assert!(
(exp(5f64) - 148.4131591025766034211155800405522796f64).abs() < 1e-8,
"Invalid result {}",
exp(5f64)
);
}
#[test]
fn f_exp_test() {
assert_eq!(f_exp(0.000000014901161193847656), 1.0000000149011614);
assert_eq!(f_exp(0.), 1.);
assert_eq!(f_exp(5f64), 148.4131591025766034211155800405522796f64);
assert_eq!(f_exp(f64::INFINITY), f64::INFINITY);
assert_eq!(f_exp(f64::NEG_INFINITY), 0.);
assert!(f_exp(f64::NAN).is_nan());
}
}

238
vendor/pxfm/src/exponents/exp10.rs vendored Normal file
View File

@@ -0,0 +1,238 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
#[inline]
fn exp10_poly_dd(z: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 6] = [
(0xbcaf48ad494ea102, 0x40026bb1bbb55516),
(0xbcae2bfab318d399, 0x40053524c73cea69),
(0x3ca81f50779e162b, 0x4000470591de2ca4),
(0x3c931a5cc5d3d313, 0x3ff2bd7609fd98c4),
(0x3c8910de8c68a0c2, 0x3fe1429ffd336aa3),
(0xbc605e703d496537, 0x3fca7ed7086882b4),
];
let mut r = DoubleDouble::quick_mul_add(
DoubleDouble::from_bit_pair(C[5]),
z,
DoubleDouble::from_bit_pair(C[4]),
);
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1]));
DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[0]))
}
#[cold]
fn as_exp10_accurate(x: f64) -> f64 {
let mut ix = x.to_bits();
let t = (f64::from_bits(0x40ca934f0979a371) * x).round_ties_even_finite();
let jt: i64 = unsafe {
t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion
};
let i1 = jt & 0x3f;
let i0 = (jt >> 6) & 0x3f;
let ie = jt >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let dt = DoubleDouble::quick_mult(t0, t1);
const L0: f64 = f64::from_bits(0x3f13441350800000);
const L1: f64 = f64::from_bits(0xbd1f79fef311f12b);
const L2: f64 = f64::from_bits(0xb9aac0b7c917826b);
let dx = x - L0 * t;
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2, L1), t);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let mut f = exp10_poly_dd(dz);
f = DoubleDouble::quick_mult(dz, f);
let mut zfh: f64;
if ix < 0xc0733a7146f72a42u64 {
if (jt & 0xfff) == 0 {
f = DoubleDouble::from_exact_add(f.hi, f.lo);
let zt = DoubleDouble::from_exact_add(dt.hi, f.hi);
f.hi = zt.lo;
f = DoubleDouble::from_exact_add(f.hi, f.lo);
ix = f.hi.to_bits();
if (ix.wrapping_shl(12)) == 0 {
let l = f.lo.to_bits();
let sfh: i64 = ((ix as i64) >> 63) ^ ((l as i64) >> 63);
ix = ix.wrapping_add(((1i64 << 51) ^ sfh) as u64);
}
zfh = zt.hi + f64::from_bits(ix);
} else {
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
f = DoubleDouble::from_exact_add(f.hi, f.lo);
zfh = f.hi;
}
zfh = fast_ldexp(zfh, ie as i32);
} else {
ix = (1u64.wrapping_sub(ie as u64)) << 52;
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
let zt = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.hi = zt.hi;
f.lo += zt.lo;
zfh = to_denormal(f.to_f64());
}
zfh
}
/// Computes exp10
///
/// Max found ULP 0.5
pub fn f_exp10(x: f64) -> f64 {
let mut ix = x.to_bits();
let aix = ix & 0x7fff_ffff_ffff_ffff;
if aix > 0x40734413509f79feu64 {
// |x| > 0x40734413509f79fe
if aix > 0x7ff0000000000000u64 {
return x + x;
} // nan
if aix == 0x7ff0000000000000u64 {
return if (ix >> 63) != 0 { 0.0 } else { x };
}
if (ix >> 63) == 0 {
return f64::from_bits(0x7fe0000000000000) * 2.0; // x > 308.255
}
if aix > 0x407439b746e36b52u64 {
// x < -323.607
return black_box(f64::from_bits(0x0018000000000000))
* black_box(f64::from_bits(0x3c80000000000000));
}
}
// check x integer to avoid a spurious inexact exception
if ix.wrapping_shl(16) == 0 && (aix >> 48) <= 0x4036 {
let kx = x.round_ties_even_finite();
if kx == x {
let k = kx as i64;
if k >= 0 {
let mut r = 1.0;
for _ in 0..k {
r *= 10.0;
}
return r;
}
}
}
/* avoid spurious underflow: for |x| <= 2.41082e-17
exp10(x) rounds to 1 to nearest */
if aix <= 0x3c7bcb7b1526e50eu64 {
return 1.0 + x; // |x| <= 2.41082e-17
}
let t = (f64::from_bits(0x40ca934f0979a371) * x).round_ties_even_finite();
let jt: i64 = unsafe { t.to_int_unchecked::<i64>() }; // t is already integer this is just a conversion
let i1 = jt & 0x3f;
let i0 = (jt >> 6) & 0x3f;
let ie = jt >> 12;
let t00 = EXP_REDUCE_T0[i0 as usize];
let t01 = EXP_REDUCE_T1[i1 as usize];
let t0 = DoubleDouble::from_bit_pair(t00);
let t1 = DoubleDouble::from_bit_pair(t01);
let mut tz = DoubleDouble::quick_mult(t0, t1);
const L0: f64 = f64::from_bits(0x3f13441350800000);
const L1: f64 = f64::from_bits(0x3d1f79fef311f12b);
let dx = f_fmla(-L1, t, f_fmla(-L0, t, x));
let dx2 = dx * dx;
const CH: [u64; 4] = [
0x40026bb1bbb55516,
0x40053524c73cea69,
0x4000470591fd74e1,
0x3ff2bd760a1f32a5,
];
let p0 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let p1 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let p = f_fmla(dx2, p1, p0);
let mut fh = tz.hi;
let fx = tz.hi * dx;
let mut fl = f_fmla(fx, p, tz.lo);
const EPS: f64 = 1.63e-19;
if ix < 0xc0733a7146f72a42u64 {
// x > -307.653
// x > -0x1.33a7146f72a42p+8
let ub = fh + (fl + EPS);
let lb = fh + (fl - EPS);
if lb != ub {
return as_exp10_accurate(x);
}
fh = fast_ldexp(fh + fl, ie as i32);
} else {
// x <= -307.653: exp10(x) < 2^-1022
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
tz = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
fl += tz.lo;
let ub = fh + (fl + EPS);
let lb = fh + (fl - EPS);
if lb != ub {
return as_exp10_accurate(x);
}
fh = to_denormal(fh + fl);
}
fh
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10f() {
assert_eq!(f_exp10(-3.370739843267434), 0.00042585343701025656);
assert_eq!(f_exp10(1.), 10.0);
assert_eq!(f_exp10(2.), 100.0);
assert_eq!(f_exp10(3.), 1000.0);
assert_eq!(f_exp10(4.), 10000.0);
assert_eq!(f_exp10(5.), 100000.0);
assert_eq!(f_exp10(6.), 1000000.0);
assert_eq!(f_exp10(7.), 10000000.0);
assert_eq!(f_exp10(f64::INFINITY), f64::INFINITY);
assert_eq!(f_exp10(f64::NEG_INFINITY), 0.);
assert!(f_exp10(f64::NAN).is_nan());
}
}

225
vendor/pxfm/src/exponents/exp10f.rs vendored Normal file
View File

@@ -0,0 +1,225 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, f_fmlaf};
use crate::polyeval::f_polyeval7;
use crate::round::RoundFinite;
pub(crate) struct ExpBReduc {
pub(crate) hi: f64,
pub(crate) lo: f64,
}
const MID_BITS: u32 = 5;
const MID_MASK: usize = (1 << MID_BITS) - 1;
const LOG2_B: f64 = f64::from_bits(0x400a934f0979a371) * (1 << MID_BITS) as f64;
const M_LOGB_2_HI: f64 = f64::from_bits(0xbfd34413509f8000) / (1 << MID_BITS) as f64;
const M_LOGB_2_LO: f64 = f64::from_bits(0x3d380433b83b532a) / (1 << MID_BITS) as f64;
const EXP_2_MID: [u64; 32] = [
0x3ff0000000000000,
0x3ff059b0d3158574,
0x3ff0b5586cf9890f,
0x3ff11301d0125b51,
0x3ff172b83c7d517b,
0x3ff1d4873168b9aa,
0x3ff2387a6e756238,
0x3ff29e9df51fdee1,
0x3ff306fe0a31b715,
0x3ff371a7373aa9cb,
0x3ff3dea64c123422,
0x3ff44e086061892d,
0x3ff4bfdad5362a27,
0x3ff5342b569d4f82,
0x3ff5ab07dd485429,
0x3ff6247eb03a5585,
0x3ff6a09e667f3bcd,
0x3ff71f75e8ec5f74,
0x3ff7a11473eb0187,
0x3ff82589994cce13,
0x3ff8ace5422aa0db,
0x3ff93737b0cdc5e5,
0x3ff9c49182a3f090,
0x3ffa5503b23e255d,
0x3ffae89f995ad3ad,
0x3ffb7f76f2fb5e47,
0x3ffc199bdd85529c,
0x3ffcb720dcef9069,
0x3ffd5818dcfba487,
0x3ffdfc97337b9b5f,
0x3ffea4afa2a490da,
0x3fff50765b6e4540,
];
// Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
// > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
// Then:
// 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
pub(crate) const EXP10F_COEFFS: [u64; 5] = [
0x40026bb1bbb55515,
0x40053524c73bd3ea,
0x4000470591dff149,
0x3ff2bd7c0a9fbc4d,
0x3fe1429e74a98f43,
];
/// Range reduction function equivalent to exp_b_range_reduc
#[inline]
pub(crate) fn exp_b_range_reduc(x: f32) -> ExpBReduc {
let xd = x as f64;
// kd = round(log2(b) * x)
let kd = (LOG2_B * xd).round_finite();
let k = unsafe { kd.to_int_unchecked::<i32>() }; // it's already not indeterminate.
// hi = floor(kd / 2^MID_BITS)
let exp_hi = (k.wrapping_shr(MID_BITS) as u64).wrapping_shl(52); // 52 = fraction bits in f64
// mh = 2^hi * 2^mid
let mid_index = (k as usize) & MID_MASK;
let mh_bits = EXP_2_MID[mid_index].wrapping_add(exp_hi);
let mh = f64::from_bits(mh_bits);
// dx = x - (hi + mid) * log(2)
let z0 = f_fmla(kd, M_LOGB_2_HI, xd);
let dx = f_fmla(kd, M_LOGB_2_LO, z0);
ExpBReduc { lo: dx, hi: mh }
}
/// Computes exp10
///
/// Max found ULP 0.49999508
#[inline]
pub fn f_exp10f(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fffffff;
// When |x| >= log10(2^128), or x is nan
if x_abs >= 0x421a209bu32 {
// When x < log10(2^-150) or nan
if x_u > 0xc2349e35u32 {
// exp(-Inf) = 0
if x.is_infinite() {
return 0.0;
}
// exp(nan) = nan
if x.is_nan() {
return x;
}
return 0.0;
}
// x >= log10(2^128) or nan
if x > 0. && (x_u >= 0x421a209bu32) {
// x is +inf or nan
return x + f32::INFINITY;
}
}
if x_abs <= 0x3d000000u32 {
// |x| < 1/32
if x_abs <= 0x3b9a209bu32 {
if x_u == 0xb25e5bd9u32 {
// x = -1.2943e-08
return 1.;
}
// |x| < 2^-25
// 10^x ~ 1 + log(10) * x
if x_abs <= 0x32800000u32 {
return f_fmlaf(x, f32::from_bits(0x40135da2), 1.0);
}
}
let xd = x as f64;
// Special polynomial for small x.
// Generated by Sollya:
// d = [-1/32, 1/32];
// f_exp10f = (10^y - 1)/y;
// Q = fpminimax(f_exp10f, 6, [|D...|], d, relative, floating);
// See ./notes/exp10f_small.sollya
let p = f_polyeval7(
xd,
f64::from_bits(0x40026bb1bbb55516),
f64::from_bits(0x40053524c73cfbf6),
f64::from_bits(0x4000470591de0b07),
f64::from_bits(0x3ff2bd760599f3a5),
f64::from_bits(0x3fe142a001511a6f),
f64::from_bits(0x3fca7feffa781d53),
f64::from_bits(0x3fb16e53492c0f0e),
);
return f_fmla(p, xd, 1.) as f32;
}
// Range reduction: 10^x = 2^(mid + hi) * 10^lo
// rr = (2^(mid + hi), lo)
let rr = exp_b_range_reduc(x);
// The low part is approximated by a degree-5 minimax polynomial.
// 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5
let lo2 = rr.lo * rr.lo;
// c0 = 1 + COEFFS[0] * lo
let c0 = f_fmla(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0);
// c1 = COEFFS[1] + COEFFS[2] * lo
let c1 = f_fmla(
rr.lo,
f64::from_bits(EXP10F_COEFFS[2]),
f64::from_bits(EXP10F_COEFFS[1]),
);
// c2 = COEFFS[3] + COEFFS[4] * lo
let c2 = f_fmla(
rr.lo,
f64::from_bits(EXP10F_COEFFS[4]),
f64::from_bits(EXP10F_COEFFS[3]),
);
// p = c1 + c2 * lo^2
// = COEFFS[1] + COEFFS[2] * lo + COEFFS[3] * lo^2 + COEFFS[4] * lo^3
let p = f_fmla(lo2, c2, c1);
// 10^lo ~ c0 + p * lo^2
// 10^x = 2^(mid + hi) * 10^lo
// ~ mh * (c0 + p * lo^2)
// = (mh * c0) + p * (mh * lo^2)
f_fmla(p, lo2 * rr.hi, c0 * rr.hi) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10f() {
assert_eq!(f_exp10f(-1. / 64.), 0.9646616);
assert_eq!(f_exp10f(1. / 64.), 1.0366329);
assert_eq!(f_exp10f(1.), 10.0);
assert_eq!(f_exp10f(2.), 100.0);
assert_eq!(f_exp10f(3.), 1000.0);
assert_eq!(f_exp10f(f32::INFINITY), f32::INFINITY);
assert_eq!(f_exp10f(f32::NEG_INFINITY), 0.);
assert!(f_exp10f(f32::NAN).is_nan());
}
}

591
vendor/pxfm/src/exponents/exp10m1.rs vendored Normal file
View File

@@ -0,0 +1,591 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1::{EXP_M1_2_TABLE1, EXP_M1_2_TABLE2};
use crate::floor::FloorFinite;
use crate::round_ties_even::RoundTiesEven;
const LN10H: f64 = f64::from_bits(0x40026bb1bbb55516);
const LN10L: f64 = f64::from_bits(0xbcaf48ad494ea3e9);
struct Exp10m1 {
exp: DoubleDouble,
err: f64,
}
// Approximation for the fast path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_1(dz: DoubleDouble) -> DoubleDouble {
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555995d37,
0x3fa55555558489dc,
];
let z = dz.to_f64();
let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
q = f_fmla(q, z, f64::from_bits(Q_1[2]));
let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
p0 = DoubleDouble::quick_mult(dz, p0);
p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
p0
}
#[inline]
fn exp1(x: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
let k = (x.hi * INVLOG2).round_ties_even_finite();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
let mut zk = DoubleDouble::from_exact_mult(LOG2H, k);
zk.lo = f_fmla(k, LOG2L, zk.lo);
let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
yz.lo -= zk.lo;
let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
let im: i64 = (ik >> 12).wrapping_add(0x3ff);
let i2: i64 = (ik >> 6) & 0x3f;
let i1: i64 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p0 = DoubleDouble::quick_mult(t2, t1);
let mut q = q_1(yz);
q = DoubleDouble::quick_mult(p0, q);
/* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus
M = 2047, which encodes Inf */
let mut du = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(du);
q.lo *= f64::from_bits(du);
q
}
#[inline]
fn exp10m1_fast(x: f64, tiny: bool) -> Exp10m1 {
if tiny {
return exp10m1_fast_tiny(x);
}
/* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2))
and subtract 1 */
let v = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
/*
The a_mul() call is exact, and the error of the fma() is bounded by
ulp(l).
We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43,
|t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7,
thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L)
<= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95.
Thus:
|h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)|
<= 2^-95 + 1024*2^-110.4 < 2^-94.9 */
let mut p = exp1(v);
let zf: DoubleDouble = if x >= 0. {
// implies h >= 1 and the fast_two_sum pre-condition holds
DoubleDouble::from_exact_add(p.hi, -1.0)
} else {
DoubleDouble::from_exact_add(-1.0, p.hi)
};
p.lo += zf.lo;
p.hi = zf.hi;
/* The error in the above fast_two_sum is bounded by 2^-105*|h|,
with the new value of h, thus the total absolute error is bounded
by eps1*|h_in|+2^-105*|h|.
Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum
of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05.
We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */
Exp10m1 {
exp: p,
err: f64::from_bits(0x3b77a00000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */
}
}
// Approximation for the accurate path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_2(dz: DoubleDouble) -> DoubleDouble {
/* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2.
The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7
in double precision only.
The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6
in double precision only.
The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5
in double precision only. */
/* The following is a degree-7 polynomial generated by Sollya for exp(z)
over [-0.000130273,0.000130273] with absolute error < 2^-113.218
(see file exp_accurate.sollya). Since we use this code only for
|x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1
is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */
const Q_2: [u64; 9] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555555555,
0x3c655555555c4d26,
0x3fa5555555555555,
0x3f81111111111111,
0x3f56c16c3fbb4213,
0x3f2a01a023ede0d7,
];
let z = dz.to_f64();
let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
// multiply q by z and add Q_2[3] + Q_2[4]
let mut p = DoubleDouble::from_exact_mult(q, z);
let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
p.hi = r0.hi;
p.lo += r0.lo + f64::from_bits(Q_2[4]);
// multiply hi+lo by zh+zl and add Q_2[2]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
// multiply hi+lo by zh+zl and add Q_2[1]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
// multiply hi+lo by zh+zl and add Q_2[0]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
p
}
// returns a double-double approximation hi+lo of exp(x*log(10))
// assumes -0x1.041704c068efp+4 < x <= 0x1.34413509f79fep+8
#[inline]
fn exp_2(x: f64) -> DoubleDouble {
let mut k = (x * f64::from_bits(0x40ca934f0979a371)).round_ties_even_finite();
if k == 4194304. {
k = 4194303.; // ensures M < 2047 below
}
// since |x| <= 745 we have k <= 3051520
const LOG2_10H: f64 = f64::from_bits(0x3f134413509f79ff);
const LOG2_10M: f64 = f64::from_bits(0xbb89dc1da9800000);
const LOG2_10L: f64 = f64::from_bits(0xb984fd20dba1f655);
let yhh = dd_fmla(-k, LOG2_10H, x); // exact, |yh| <= 2^-13
let mut ky0 = DoubleDouble::from_exact_add(yhh, -k * LOG2_10M);
ky0.lo = dd_fmla(-k, LOG2_10L, ky0.lo);
/* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(10)
to use the accurate path of exp() */
let ky = DoubleDouble::quick_mult(ky0, DoubleDouble::new(LN10L, LN10H));
let ik = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let im = (ik >> 12).wrapping_add(0x3ff);
let i2 = (ik >> 6) & 0x3f;
let i1 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p = DoubleDouble::quick_mult(t2, t1);
let mut q = q_2(ky);
q = DoubleDouble::quick_mult(p, q);
let mut ud: u64 = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(ud);
q.lo *= f64::from_bits(ud);
q
}
#[cold]
fn exp10m1_accurate_tiny(x: f64) -> f64 {
let x2 = x * x;
let x4 = x2 * x2;
/* The following is a degree-17 polynomial generated by Sollya
(file exp10m1_accurate.sollya),
which approximates exp10m1(x) with relative error bounded by 2^-107.506
for |x| <= 0.0625. */
const Q: [u64; 25] = [
0x40026bb1bbb55516,
0xbcaf48ad494ea3e9,
0x40053524c73cea69,
0xbcae2bfab318d696,
0x4000470591de2ca4,
0x3ca823527cebf918,
0x3ff2bd7609fd98c4,
0x3c931ea51f6641df,
0x3fe1429ffd1d4d76,
0x3c7117195be7f232,
0x3fca7ed70847c8b6,
0xbc54260c5e23d0c8,
0x3fb16e4dfc333a87,
0xbc533fd284110905,
0x3f94116b05fdaa5d,
0xbc20721de44d79a8,
0x3f74897c45d93d43,
0x3f52ea52b2d182ac,
0x3f2facfd5d905b22,
0x3f084fe12df8bde3,
0x3ee1398ad75d01bf,
0x3eb6a9e96fbf6be7,
0x3e8bd456a29007c2,
0x3e6006cf8378cf9b,
0x3e368862b132b6e2,
];
let mut c13 = dd_fmla(f64::from_bits(Q[23]), x, f64::from_bits(Q[22])); // degree 15
let c11 = dd_fmla(f64::from_bits(Q[21]), x, f64::from_bits(Q[20])); // degree 14
c13 = dd_fmla(f64::from_bits(Q[24]), x2, c13); // degree 15
// add Q[19]*x+c13*x2+c15*x4 to Q[18] (degree 11)
let mut p = DoubleDouble::from_exact_add(
f64::from_bits(Q[18]),
f_fmla(f64::from_bits(Q[19]), x, f_fmla(c11, x2, c13 * x4)),
);
// multiply h+l by x and add Q[17] (degree 10)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[17]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
// multiply h+l by x and add Q[16] (degree 9)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[16]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
// multiply h+l by x and add Q[14]+Q[15] (degree 8)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[15]);
p.hi = p0.hi;
// multiply h+l by x and add Q[12]+Q[13] (degree 7)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[13]);
p.hi = p0.hi;
// multiply h+l by x and add Q[10]+Q[11] (degree 6)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[11]);
p.hi = p0.hi;
// multiply h+l by x and add Q[8]+Q[9] (degree 5)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[9]);
p.hi = p0.hi;
// multiply h+l by x and add Q[6]+Q[7] (degree 4)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[7]);
p.hi = p0.hi;
// multiply h+l by x and add Q[4]+Q[5] (degree 3)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[5]);
p.hi = p0.hi;
// multiply h+l by x and add Q[2]+Q[3] (degree 2)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[3]);
p.hi = p0.hi;
// multiply h+l by x and add Q[0]+Q[1] (degree 2)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[1]);
p.hi = p0.hi;
// multiply h+l by x
p = DoubleDouble::quick_f64_mult(x, p);
p.to_f64()
}
#[cold]
fn exp10m1_accurate(x: f64) -> f64 {
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ax <= 0x3fc0000000000000u64 {
// |x| <= 0.125
return exp10m1_accurate_tiny(x);
}
let mut p = exp_2(x);
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
p.lo += zf.lo;
p.hi = zf.hi;
p.to_f64()
}
/* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x),
and return the maximal corresponding absolute error.
We also have |x| > 0x1.0527dbd87e24dp-51.
With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine
exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns
1.63414352331297e-20 < 2^-65.73, and
exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns
1.76283772822891e-20 < 2^-65.62, which proves the relative
error is bounded by 2^-65.62. */
#[inline]
fn exp10m1_fast_tiny(x: f64) -> Exp10m1 {
/* The following is a degree-11 polynomial generated by Sollya
(file exp10m1_fast.sollya),
which approximates exp10m1(x) with relative error bounded by 2^-69.58
for |x| <= 0.0625. */
const P: [u64; 14] = [
0x40026bb1bbb55516,
0xbcaf48abcf79e094,
0x40053524c73cea69,
0xbcae1badf796d704,
0x4000470591de2ca4,
0x3ca7db8caacb2cea,
0x3ff2bd7609fd98ba,
0x3fe1429ffd1d4d98,
0x3fca7ed7084998e1,
0x3fb16e4dfc30944b,
0x3f94116ae4b57526,
0x3f74897c6a90f61c,
0x3f52ec689c32b3a0,
0x3f2faced20d698fe,
];
let x2 = x * x;
let x4 = x2 * x2;
let mut c9 = dd_fmla(f64::from_bits(P[12]), x, f64::from_bits(P[11])); // degree 9
let c7 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 7
let mut c5 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 5
c9 = dd_fmla(f64::from_bits(P[13]), x2, c9); // degree 9
c5 = dd_fmla(c7, x2, c5); // degree 5
c5 = dd_fmla(c9, x4, c5); // degree 5
let mut p = DoubleDouble::from_exact_mult(c5, x);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[6]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
p.lo += p1.lo + f64::from_bits(P[5]);
p.hi = p1.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
p.lo += p2.lo + f64::from_bits(P[3]);
p.hi = p2.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
p.lo += p2.lo + f64::from_bits(P[1]);
p.hi = p2.hi;
p = DoubleDouble::quick_f64_mult(x, p);
Exp10m1 {
exp: p,
err: f64::from_bits(0x3bb0a00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66
}
}
/// Computes 10^x - 1
///
/// Max found ULP 0.5
pub fn f_exp10m1(d: f64) -> f64 {
let mut x = d;
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ux >= 0xc03041704c068ef0u64 {
// x = -NaN or x <= -0x1.041704c068efp+4
if (ux >> 52) == 0xfff {
// -NaN or -Inf
return if ux > 0xfff0000000000000u64 {
x + x
} else {
-1.0
};
}
// for x <= -0x1.041704c068efp+4, exp10m1(x) rounds to -1 to nearest
return -1.0 + f64::from_bits(0x3c90000000000000);
} else if ax > 0x40734413509f79feu64 {
// x = +NaN or x >= 1024
if (ux >> 52) == 0x7ff {
// +NaN
return x + x;
}
return f64::from_bits(0x7fefffffffffffff) * x;
} else if ax <= 0x3c90000000000000u64
// |x| <= 0x1.0527dbd87e24dp-51
/* then the second term of the Taylor expansion of 2^x-1 at x=0 is
smaller in absolute value than 1/2 ulp(first term):
log(2)*x + log(2)^2*x^2/2 + ... */
{
/* we use special code when log(2)*|x| is very small, in which case
the double-double approximation h+l has its lower part l
"truncated" */
return if ax <= 0x3970000000000000u64
// |x| <= 2^-104
{
// special case for 0
if x == 0. {
return x;
}
if x.abs() == f64::from_bits(0x000086c73059343c) {
return dd_fmla(
-f64::copysign(f64::from_bits(0x1e60010000000000), x),
f64::from_bits(0x1e50000000000000),
f64::copysign(f64::from_bits(0x000136568740cb56), x),
);
}
if x.abs() == f64::from_bits(0x00013a7b70d0248c) {
return dd_fmla(
f64::copysign(f64::from_bits(0x1e5ffe0000000000), x),
f64::from_bits(0x1e50000000000000),
f64::copysign(f64::from_bits(0x0002d41f3b972fc7), x),
);
}
// scale x by 2^106
x *= f64::from_bits(0x4690000000000000);
let mut z = DoubleDouble::from_exact_mult(LN10H, x);
z.lo = dd_fmla(LN10L, x, z.lo);
let mut h2 = z.to_f64(); // round to 53-bit precision
// scale back, hoping to avoid double rounding
h2 *= f64::from_bits(0x3950000000000000);
// now subtract back h2 * 2^106 from h to get the correction term
let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
// add l
h += z.lo;
/* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact,
thus no underflow will be raised. We have underflow for
0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for
0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */
dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
} else {
const C2: f64 = f64::from_bits(0x40053524c73cea69); // log(2)^2/2
let mut z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
/* h+l approximates the first term x*log(2) */
/* we add C2*x^2 last, so that in case there is a cancellation in
LN10L*x+l, it will contribute more bits */
z.lo = dd_fmla(C2 * x, x, z.lo);
z.to_f64()
};
}
/* now -0x1.041704c068efp+4 < x < -2^-54
or 2^-54 < x <= 0x1.34413509f79fep+8 */
/* 10^x-1 is exact for x integer, 1 <= x <= 15 */
if ux << 15 == 0 {
let i = x.floor_finite() as i32;
if x == i as f64 && 1 <= i && i <= 15 {
static EXP10_1_15: [u64; 16] = [
0x0000000000000000,
0x4022000000000000,
0x4058c00000000000,
0x408f380000000000,
0x40c3878000000000,
0x40f869f000000000,
0x412e847e00000000,
0x416312cfe0000000,
0x4197d783fc000000,
0x41cdcd64ff800000,
0x4202a05f1ff80000,
0x42374876e7ff0000,
0x426d1a94a1ffe000,
0x42a2309ce53ffe00,
0x42d6bcc41e8fffc0,
0x430c6bf52633fff8,
];
return f64::from_bits(EXP10_1_15[i as usize]);
}
}
let result = exp10m1_fast(x, ax <= 0x3fb0000000000000u64);
let left = result.exp.hi + (result.exp.lo - result.err);
let right = result.exp.hi + (result.exp.lo + result.err);
if left != right {
return exp10m1_accurate(x);
}
left
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10m1() {
assert_eq!(f_exp10m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002364140972981833),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005443635762124408);
assert_eq!(99., f_exp10m1(2.0));
assert_eq!(315.22776601683796, f_exp10m1(2.5));
assert_eq!(-0.7056827241416722, f_exp10m1(-0.5311842449009418));
}
}

141
vendor/pxfm/src/exponents/exp10m1f.rs vendored Normal file
View File

@@ -0,0 +1,141 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::exp10f::EXP10F_COEFFS;
use crate::polyeval::f_polyeval3;
#[cold]
fn exp10m1f_small(x: f32) -> f32 {
let dx = x as f64;
let dx_sq = dx * dx;
let c0 = dx * f64::from_bits(EXP10F_COEFFS[0]);
let c1 = f_fmla(
dx,
f64::from_bits(EXP10F_COEFFS[2]),
f64::from_bits(EXP10F_COEFFS[1]),
);
let c2 = f_fmla(
dx,
f64::from_bits(EXP10F_COEFFS[4]),
f64::from_bits(EXP10F_COEFFS[3]),
);
// 10^dx - 1 ~ (1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5) - 1
// = COEFFS[0] * dx + ... + COEFFS[4] * dx^5
f_polyeval3(dx_sq, c0, c1, c2) as f32
}
/// Computes 10^x - 1
///
/// Max ULP 0.5
#[inline]
pub fn f_exp10m1f(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffffu32;
// When x >= log10(2^128), or x is nan
if x.is_sign_positive() && x_u >= 0x421a_209bu32 {
// x >= log10(2^128) and 10^x - 1 rounds to +inf, or x is +inf or nan
return x + f32::INFINITY;
}
if x_abs <= 0x3b9a_209bu32 {
// |x| <= 0.004703594
return exp10m1f_small(x);
}
// When x <= log10(2^-25), or x is nan
if x_u >= 0xc0f0d2f1 {
// exp10m1(-inf) = -1
if x.is_infinite() {
return -1.0;
}
// exp10m1(nan) = nan
if x.is_nan() {
return x;
}
if x_u == 0xc0f0d2f1 {
return f32::from_bits(0xbf7fffff); // -1.0f + 0x1.0p-24f
}
return -1.0;
}
// Exact outputs when x = 1, 2, ..., 10.
// Quick check mask: 0x800f'ffffU = ~(bits of 1.0f | ... | bits of 10.0f)
if x_u & 0x800f_ffffu32 == 0 {
match x_u {
0x3f800000u32 => return 9.0, // x = 1.0f
0x40000000u32 => return 99.0, // x = 2.0f
0x40400000u32 => return 999.0, // x = 3.0f
0x40800000u32 => return 9_999.0, // x = 4.0f
0x40a00000u32 => return 99_999.0, // x = 5.0f
0x40c00000u32 => return 999_999.0, // x = 6.0f
0x40e00000u32 => return 9_999_999.0, // x = 7.0f
0x41000000u32 => return 99_999_999.0, // x = 8.0f
0x41100000u32 => return 999_999_999.0, // x = 9.0f
0x41200000u32 => return 9_999_999_999.0, // x = 10.0f
_ => {}
}
}
// Range reduction: 10^x = 2^(mid + hi) * 10^lo
// rr = (2^(mid + hi), lo)
let rr = crate::exponents::exp10f::exp_b_range_reduc(x);
// The low part is approximated by a degree-5 minimax polynomial.
// 10^lo ~ 1 + COEFFS[0] * lo + ... + COEFFS[4] * lo^5
let lo_sq = rr.lo * rr.lo;
let c0 = f_fmla(rr.lo, f64::from_bits(EXP10F_COEFFS[0]), 1.0);
let c1 = f_fmla(
rr.lo,
f64::from_bits(EXP10F_COEFFS[2]),
f64::from_bits(EXP10F_COEFFS[1]),
);
let c2 = f_fmla(
rr.lo,
f64::from_bits(EXP10F_COEFFS[4]),
f64::from_bits(EXP10F_COEFFS[3]),
);
let exp10_lo = f_polyeval3(lo_sq, c0, c1, c2);
// 10^x - 1 = 2^(mid + hi) * 10^lo - 1
// ~ mh * exp10_lo - 1
f_fmla(exp10_lo, rr.hi, -1.0) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10m1f() {
assert_eq!(f_exp10m1f(0.0), 0.0);
assert_eq!(f_exp10m1f(1.0), 9.0);
assert_eq!(f_exp10m1f(1.5), 30.622776);
}
}

234
vendor/pxfm/src/exponents/exp2.rs vendored Normal file
View File

@@ -0,0 +1,234 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
use crate::round_ties_even::RoundTiesEven;
#[inline]
fn exp2_poly_dd(z: f64) -> DoubleDouble {
const C: [(u64, u64); 6] = [
(0x3bbabc9e3b39873e, 0x3f262e42fefa39ef),
(0xbae5e43a53e44950, 0x3e4ebfbdff82c58f),
(0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0),
(0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77),
(0xb83dc47e47beb9dd, 0x3b95d87fe7a66459),
(0xb744fcd51fcb7640, 0x3aa430912f9fb79d),
];
let mut r = DoubleDouble::quick_mul_f64_add(
DoubleDouble::from_bit_pair(C[5]),
z,
DoubleDouble::from_bit_pair(C[4]),
);
r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3]));
r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2]));
r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1]));
DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0]))
}
#[cold]
fn exp2_accurate(x: f64) -> f64 {
let mut ix = x.to_bits();
let sx = 4096.0 * x;
let fx = sx.round_ties_even_finite();
let z = sx - fx;
let k: i64 = unsafe {
fx.to_int_unchecked::<i64>() // this is already finite here
};
let i1 = k & 0x3f;
let i0 = (k >> 6) & 0x3f;
let ie = k >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let dt = DoubleDouble::quick_mult(t0, t1);
let mut f = exp2_poly_dd(z);
f = DoubleDouble::quick_mult_f64(f, z);
if ix <= 0xc08ff00000000000u64 {
// x >= -1022
// for -0x1.71547652b82fep-54 <= x <= 0x1.71547652b82fdp-53,
// exp2(x) round to x to nearest
if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) {
return dd_fmla(x, 0.5, 1.0);
} else if (k & 0xfff) == 0 {
// 4096*x rounds to 4096*integer
let zf = DoubleDouble::from_exact_add(dt.hi, f.hi);
let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo);
f.hi = zf.hi;
f.lo = zfl.hi;
ix = zfl.hi.to_bits();
if ix & 0x000fffffffffffff == 0 {
// fl is a power of 2
if ((ix >> 52) & 0x7ff) != 0 {
// |fl| is Inf
let v = zfl.lo.to_bits();
let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
.wrapping_shl(1)
.wrapping_add(1) as i64;
ix = ix.wrapping_add(d as u64);
f.lo = f64::from_bits(ix);
}
}
} else {
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
}
let hf = DoubleDouble::from_exact_add(f.hi, f.lo);
fast_ldexp(hf.hi, ie as i32)
} else {
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.hi = zve.hi;
f.lo += zve.lo;
to_denormal(f.to_f64())
}
}
/// Computes exp2
///
/// Max found ULP 0.5
pub fn f_exp2(x: f64) -> f64 {
let mut ix = x.to_bits();
let ax = ix.wrapping_shl(1);
if ax == 0 {
return 1.0;
}
if ax >= 0x8120000000000000u64 {
// |x| >= 1024
if ax > 0xffe0000000000000u64 {
return x + x; // nan
}
if ax == 0xffe0000000000000u64 {
return if (ix >> 63) != 0 { 0.0 } else { x };
}
// +/-inf
if (ix >> 63) != 0 {
// x <= -1024
if ix >= 0xc090cc0000000000u64 {
// x <= -1075
const Z: f64 = f64::from_bits(0x0010000000000000);
return Z * Z;
}
} else {
// x >= 1024
return f64::from_bits(0x7fe0000000000000) * x;
}
}
// for |x| <= 0x1.71547652b82fep-54, 2^x rounds to 1 to nearest
// this avoids a spurious underflow in z*z below
if ax <= 0x792e2a8eca5705fcu64 {
return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x);
}
let m = ix.wrapping_shl(12);
let ex = (ax >> 53).wrapping_sub(0x3ff);
let frac = ex >> 63 | m << (ex & 63);
let sx = 4096.0 * x;
let fx = sx.round_ties_even_finite();
let z = sx - fx;
let z2 = z * z;
let k = unsafe {
fx.to_int_unchecked::<i64>() // this already finite here
};
let i1 = k & 0x3f;
let i0 = (k >> 6) & 0x3f;
let ie = k >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let ti0 = DoubleDouble::quick_mult(t0, t1);
const C: [u64; 4] = [
0x3f262e42fefa39ef,
0x3e4ebfbdff82c58f,
0x3d6c6b08d73b3e01,
0x3c83b2ab6fdda001,
];
let tz = ti0.hi * z;
let mut fh = ti0.hi;
let p0 = f_fmla(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
let p1 = f_fmla(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
let p2 = f_fmla(z2, p1, p0);
let mut fl = f_fmla(tz, p2, ti0.lo);
const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
if ix <= 0xc08ff00000000000u64 {
// x >= -1022
if frac != 0 {
let ub = fh + (fl + EPS);
fh += fl - EPS;
if ub != fh {
return exp2_accurate(x);
}
}
fh = fast_ldexp(fh, ie as i32);
} else {
// subnormal case
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
fl += rs.lo;
fh = rs.hi;
if frac != 0 {
let ub = fh + (fl + EPS);
fh += fl - EPS;
if ub != fh {
return exp2_accurate(x);
}
}
// when 2^x is exact, no underflow should be raised
fh = to_denormal(fh);
}
fh
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp2d() {
assert_eq!(f_exp2(2.0), 4.0);
assert_eq!(f_exp2(3.0), 8.0);
assert_eq!(f_exp2(4.0), 16.0);
assert_eq!(f_exp2(0.35f64), 1.2745606273192622);
assert_eq!(f_exp2(-0.6f64), 0.6597539553864471);
assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY);
assert_eq!(f_exp2(f64::NEG_INFINITY), 0.);
assert!(f_exp2(f64::NAN).is_nan());
}
}

323
vendor/pxfm/src/exponents/exp2f.rs vendored Normal file
View File

@@ -0,0 +1,323 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, f_fmlaf, pow2if};
use crate::polyeval::f_polyeval6;
use crate::round::RoundFinite;
use std::hint::black_box;
const TBLSIZE: usize = 64;
#[repr(align(64))]
struct Exp2Table([(u32, u32); TBLSIZE]);
#[rustfmt::skip]
static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]);
/**
Generated by SageMath:
```python
print("[")
for k in range(64):
k = RealField(150)(2)**(RealField(150)(k) / RealField(150)(64))
print(double_to_hex(k) + ",")
print("];")
```
**/
pub(crate) static EXP2F_TABLE: [u64; 64] = [
0x3ff0000000000000,
0x3ff02c9a3e778061,
0x3ff059b0d3158574,
0x3ff0874518759bc8,
0x3ff0b5586cf9890f,
0x3ff0e3ec32d3d1a2,
0x3ff11301d0125b51,
0x3ff1429aaea92de0,
0x3ff172b83c7d517b,
0x3ff1a35beb6fcb75,
0x3ff1d4873168b9aa,
0x3ff2063b88628cd6,
0x3ff2387a6e756238,
0x3ff26b4565e27cdd,
0x3ff29e9df51fdee1,
0x3ff2d285a6e4030b,
0x3ff306fe0a31b715,
0x3ff33c08b26416ff,
0x3ff371a7373aa9cb,
0x3ff3a7db34e59ff7,
0x3ff3dea64c123422,
0x3ff4160a21f72e2a,
0x3ff44e086061892d,
0x3ff486a2b5c13cd0,
0x3ff4bfdad5362a27,
0x3ff4f9b2769d2ca7,
0x3ff5342b569d4f82,
0x3ff56f4736b527da,
0x3ff5ab07dd485429,
0x3ff5e76f15ad2148,
0x3ff6247eb03a5585,
0x3ff6623882552225,
0x3ff6a09e667f3bcd,
0x3ff6dfb23c651a2f,
0x3ff71f75e8ec5f74,
0x3ff75feb564267c9,
0x3ff7a11473eb0187,
0x3ff7e2f336cf4e62,
0x3ff82589994cce13,
0x3ff868d99b4492ed,
0x3ff8ace5422aa0db,
0x3ff8f1ae99157736,
0x3ff93737b0cdc5e5,
0x3ff97d829fde4e50,
0x3ff9c49182a3f090,
0x3ffa0c667b5de565,
0x3ffa5503b23e255d,
0x3ffa9e6b5579fdbf,
0x3ffae89f995ad3ad,
0x3ffb33a2b84f15fb,
0x3ffb7f76f2fb5e47,
0x3ffbcc1e904bc1d2,
0x3ffc199bdd85529c,
0x3ffc67f12e57d14b,
0x3ffcb720dcef9069,
0x3ffd072d4a07897c,
0x3ffd5818dcfba487,
0x3ffda9e603db3285,
0x3ffdfc97337b9b5f,
0x3ffe502ee78b3ff6,
0x3ffea4afa2a490da,
0x3ffefa1bee615a27,
0x3fff50765b6e4540,
0x3fffa7c1819e90d8,
];
/* ULP 0.508 method
let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
let ui = f32::to_bits(d + redux);
let mut i0 = ui;
i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
let k = i0 / TBLSIZE as u32;
i0 &= TBLSIZE as u32 - 1;
let mut uf = f32::from_bits(ui);
uf -= redux;
let item = EXP2FT.0[i0 as usize];
let z0: f32 = f32::from_bits(item.0);
let z1: f32 = f32::from_bits(item.1);
let f: f32 = d - uf - z1;
let mut u = 0.055504108664458832;
u = f_fmlaf(u, f, 0.24022650695908768);
u = f_fmlaf(u, f, 0.69314718055994973);
u *= f;
let i2 = pow2if(k as i32);
f_fmlaf(u, z0, z0) * i2
*/
/// Computing exp2f
///
/// ULP 0.4999994
#[inline]
pub fn f_exp2f(x: f32) -> f32 {
let mut t = x.to_bits();
if (t & 0xffff) == 0 {
// x maybe integer
let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); // 2^k <= |x| < 2^(k+1)
if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 {
// x integer, with 1 <= |x| < 2^9
let msk = (t as i32) >> 31;
let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32;
m = (m ^ msk).wrapping_sub(msk).wrapping_add(127);
if m > 0 && m < 255 {
t = (m as u32).wrapping_shl(23);
return f32::from_bits(t);
} else if m <= 0 && m > -23 {
t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32;
return f32::from_bits(t);
}
}
}
let ux = t.wrapping_shl(1);
if ux >= 0x86000000u32 || ux < 0x65000000u32 {
// |x| >= 128 or x=nan or |x| < 0x1p-26
if ux < 0x65000000u32 {
return 1.0 + x;
} // |x| < 0x1p-26
// if x < -149 or 128 <= x is special
if !(t >= 0xc3000000u32 && t < 0xc3150000u32) {
if ux >= 0xffu32 << 24 {
// x is inf or nan
if ux > (0xffu32 << 24) {
return x + x;
} // x = nan
static IR: [f32; 2] = [f32::INFINITY, 0.];
return IR[(t >> 31) as usize]; // x = +-inf
}
if t >= 0xc3150000u32 {
// x < -149
let z = x;
let mut y = f_fmla(
z as f64 + 149.,
f64::from_bits(0x3690000000000000),
f64::from_bits(0x36a0000000000000),
);
y = y.max(f64::from_bits(0x3680000000000000));
return y as f32;
}
// now x >= 128
let r = black_box(f64::from_bits(0x47e0000000000000))
* black_box(f64::from_bits(0x47e0000000000000));
return r as f32;
}
}
if ux <= 0x7a000000u32 {
// |x| < 1/32
// Generated by Sollya exp2 on range [-1/32;1/32]:
// d = [-1/32, 1/32];
// f_exp2f = (2^y - 1)/y;
// Q = fpminimax(f_exp2f, 5, [|D...|], d, relative, floating);
// See ./notes/exp2f_small.sollya
const C: [u64; 6] = [
0x3fe62e42fefa39f3,
0x3fcebfbdff82c57b,
0x3fac6b08d6f2d7aa,
0x3f83b2ab6fc92f5d,
0x3f55d897cfe27125,
0x3f243090e61e6af1,
];
let xd = x as f64;
let p = f_polyeval6(
xd,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
);
return f_fmla(p, xd, 1.) as f32;
}
let x_d = x as f64;
let kf = (x_d * 64.).round_finite();
let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
// dx = lo = x - (hi + mid) = x - kf * 2^(-6)
let dx = f_fmla(f64::from_bits(0xbf90000000000000), kf, x_d);
const TABLE_BITS: u32 = 6;
const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
// hi = floor(kf * 2^(-5))
// exp_hi = shift hi to the exponent field of double precision.
let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
let mh = f64::from_bits(mh_bits as u64);
// Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
// > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]);
// see ./notes/exp2f.sollya
const C: [u64; 5] = [
0x3fe62e42fefa39ef,
0x3fcebfbdff8131c4,
0x3fac6b08d7061695,
0x3f83b2b1bee74b2a,
0x3f55d88091198529,
];
let dx_sq = dx * dx;
let c1 = f_fmla(dx, f64::from_bits(C[0]), 1.0);
let c2 = f_fmla(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
let c3 = f_fmla(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
let p = f_fmla(dx_sq, c3, c2);
// 2^x = 2^(hi + mid + lo)
// = 2^(hi + mid) * 2^lo
// ~ mh * (1 + lo * P(lo))
// = mh + (mh*lo) * P(lo)
f_fmla(p, dx_sq * mh, c1 * mh) as f32
}
#[inline]
pub(crate) fn dirty_exp2f(d: f32) -> f32 {
let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
let ui = f32::to_bits(d + redux);
let mut i0 = ui;
i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
let k = i0 / TBLSIZE as u32;
i0 &= TBLSIZE as u32 - 1;
let mut uf = f32::from_bits(ui);
uf -= redux;
let item = EXP2FT.0[i0 as usize];
let z0: f32 = f32::from_bits(item.0);
let f: f32 = d - uf;
let mut u = 0.24022650695908768;
u = f_fmlaf(u, f, 0.69314718055994973);
u *= f;
let i2 = pow2if(k as i32);
f_fmlaf(u, z0, z0) * i2
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp2f() {
assert_eq!(f_exp2f(1. / 64.), 1.0108893);
assert_eq!(f_exp2f(2.0), 4.0);
assert_eq!(f_exp2f(3.0), 8.0);
assert_eq!(f_exp2f(4.0), 16.0);
assert_eq!(f_exp2f(10.0), 1024.0);
assert_eq!(f_exp2f(-10.0), 0.0009765625);
assert!(f_exp2f(f32::NAN).is_nan());
assert_eq!(f_exp2f(-0.35), 0.7845841);
assert_eq!(f_exp2f(0.35), 1.2745606);
assert!(f_exp2f(f32::INFINITY).is_infinite());
assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0);
}
#[test]
fn test_dirty_exp2f() {
assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5);
assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5);
}
}

663
vendor/pxfm/src/exponents/exp2m1.rs vendored Normal file
View File

@@ -0,0 +1,663 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::fast_ldexp;
use crate::floor::FloorFinite;
use crate::round_ties_even::RoundTiesEven;
const LN2H: f64 = f64::from_bits(0x3fe62e42fefa39ef);
const LN2L: f64 = f64::from_bits(0x3c7abc9e3b39803f);
struct Exp2m1 {
exp: DoubleDouble,
err: f64,
}
/* For 0 <= i < 64, T1[i] = (h,l) such that h+l is the best double-double
approximation of 2^(i/64). The approximation error is bounded as follows:
|h + l - 2^(i/64)| < 2^-107. */
pub(crate) static EXP_M1_2_TABLE1: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc719083535b085d, 0x3ff02c9a3e778061),
(0x3c8d73e2a475b465, 0x3ff059b0d3158574),
(0x3c6186be4bb284ff, 0x3ff0874518759bc8),
(0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
(0x3c403a1727c57b53, 0x3ff0e3ec32d3d1a2),
(0xbc96c51039449b3a, 0x3ff11301d0125b51),
(0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
(0xbc819041b9d78a76, 0x3ff172b83c7d517b),
(0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
(0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
(0x3c8dc775814a8495, 0x3ff2063b88628cd6),
(0x3c99b07eb6c70573, 0x3ff2387a6e756238),
(0x3c82bd339940e9d9, 0x3ff26b4565e27cdd),
(0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
(0x3c90024754db41d5, 0x3ff2d285a6e4030b),
(0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
(0x3c932721843659a6, 0x3ff33c08b26416ff),
(0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
(0xbc75e436d661f5e3, 0x3ff3a7db34e59ff7),
(0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
(0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
(0x3c489b7a04ef80d0, 0x3ff44e086061892d),
(0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
(0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
(0xbc94b309d25957e3, 0x3ff4f9b2769d2ca7),
(0xbc807abe1db13cad, 0x3ff5342b569d4f82),
(0x3c99bb2c011d93ad, 0x3ff56f4736b527da),
(0x3c96324c054647ad, 0x3ff5ab07dd485429),
(0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
(0xbc9383c17e40b497, 0x3ff6247eb03a5585),
(0xbc9bb60987591c34, 0x3ff6623882552225),
(0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
(0xbc6bbe3a683c88ab, 0x3ff6dfb23c651a2f),
(0xbc816e4786887a99, 0x3ff71f75e8ec5f74),
(0xbc90245957316dd3, 0x3ff75feb564267c9),
(0xbc841577ee04992f, 0x3ff7a11473eb0187),
(0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
(0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
(0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
(0x3c96e9f156864b27, 0x3ff8ace5422aa0db),
(0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
(0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
(0xbc9d185b7c1b85d1, 0x3ff97d829fde4e50),
(0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
(0xbc9359495d1cd533, 0x3ffa0c667b5de565),
(0xbc9d2f6edb8d41e1, 0x3ffa5503b23e255d),
(0x3c90fac90ef7fd31, 0x3ffa9e6b5579fdbf),
(0x3c97a1cd345dcc81, 0x3ffae89f995ad3ad),
(0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
(0xbc75584f7e54ac3b, 0x3ffb7f76f2fb5e47),
(0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
(0x3c811065895048dd, 0x3ffc199bdd85529c),
(0x3c92884dff483cad, 0x3ffc67f12e57d14b),
(0x3c7503cbd1e949db, 0x3ffcb720dcef9069),
(0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
(0x3c82ed02d75b3707, 0x3ffd5818dcfba487),
(0x3c9c2300696db532, 0x3ffda9e603db3285),
(0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
(0x3c839e8980a9cc8f, 0x3ffe502ee78b3ff6),
(0xbc9e9c23179c2893, 0x3ffea4afa2a490da),
(0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
(0x3c99d3e12dd8a18b, 0x3fff50765b6e4540),
(0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
];
/* For 0 <= i < 64, T2[i] = (h,l) such that h+l is the best double-double
approximation of 2^(i/2^12). The approximation error is bounded as follows:
|h + l - 2^(i/2^12)| < 2^-107. */
pub(crate) static EXP_M1_2_TABLE2: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
(0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
(0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
(0xbc8d7c96f201bb2f, 0x3ff002c605e2e8cf),
(0x3c984711d4c35e9f, 0x3ff003779a95f959),
(0xbc80484245243777, 0x3ff0042936faa3d8),
(0xbc94b237da2025f9, 0x3ff004dadb113da0),
(0xbc75e00e62d6b30d, 0x3ff0058c86da1c0a),
(0x3c9a1d6cedbb9481, 0x3ff0063e3a559473),
(0xbc94acf197a00142, 0x3ff006eff583fc3d),
(0xbc6eaf2ea42391a5, 0x3ff007a1b865a8ca),
(0x3c7da93f90835f75, 0x3ff0085382faef83),
(0xbc86a79084ab093c, 0x3ff00905554425d4),
(0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
(0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
(0xbc84f6b2a7609f71, 0x3ff00b1afa5abcbf),
(0xbc7e1a258ea8f71b, 0x3ff00bcceb7707ec),
(0x3c74362ca5bc26f1, 0x3ff00c7ee448ee02),
(0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
(0xbc6406ac4e81a645, 0x3ff00de2ed0ee0f5),
(0x3c9b5a6902767e09, 0x3ff00e94fd0398e0),
(0xbc991b2060859321, 0x3ff00f4714af41d3),
(0x3c8427068ab22306, 0x3ff00ff93412315c),
(0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
(0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
(0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
(0xbc734104ee7edae9, 0x3ff012c1fecd613b),
(0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
(0x3c7a8cd33b8a1bb3, 0x3ff01426927f5278),
(0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
(0x3c857ba2dc7e0c73, 0x3ff0158b4517bb88),
(0x3c9b61299ab8cdb7, 0x3ff0163da9fb3335),
(0xbc990565902c5f44, 0x3ff016f0169949ed),
(0x3c870fc41c5c2d53, 0x3ff017a28af25567),
(0x3c94b9a6e145d76c, 0x3ff018550706ab62),
(0xbc7008eff5142bf9, 0x3ff019078ad6a19f),
(0xbc977669f033c7de, 0x3ff019ba16628de2),
(0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
(0x3c9371231477ece5, 0x3ff01b1f44af9f9e),
(0x3c75e7626621eb5b, 0x3ff01bd1e77170b4),
(0xbc9bc72b100828a5, 0x3ff01c8491f08f08),
(0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
(0x3c816996709da2e2, 0x3ff01de9fe280ac8),
(0xbc8c11f5239bf535, 0x3ff01e9cbfe113ef),
(0x3c8e1d4eb5edc6b3, 0x3ff01f4f8958c1c6),
(0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
(0xbc98f06d8a148a32, 0x3ff020b533856324),
(0xbc82bf310fc54eb6, 0x3ff02168143b0281),
(0xbc9c95a035eb4175, 0x3ff0221afcb09e3e),
(0xbc9491793e46834d, 0x3ff022cdece68c4f),
(0xbc73e8d0d9c49091, 0x3ff02380e4dd22ad),
(0xbc9314aa16278aa3, 0x3ff02433e494b755),
(0x3c848daf888e9651, 0x3ff024e6ec0da046),
(0x3c856dc8046821f4, 0x3ff02599fb483385),
(0x3c945b42356b9d47, 0x3ff0264d1244c719),
(0xbc7082ef51b61d7e, 0x3ff027003103b10e),
(0x3c72106ed0920a34, 0x3ff027b357854772),
(0xbc9fd4cf26ea5d0f, 0x3ff0286685c9e059),
(0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
(0x3c564cbba902ca27, 0x3ff029ccf99d720a),
(0x3c94383ef231d207, 0x3ff02a803f2d170d),
(0x3c94a47a505b3a47, 0x3ff02b338c811703),
(0x3c9e47120223467f, 0x3ff02be6e199c811),
];
// Approximation for the fast path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_1(dz: DoubleDouble) -> DoubleDouble {
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555995d37,
0x3fa55555558489dc,
];
let z = dz.to_f64();
let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
q = f_fmla(q, z, f64::from_bits(Q_1[2]));
let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
p0 = DoubleDouble::quick_mult(dz, p0);
p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
p0
}
#[inline]
fn exp1(x: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
let k = (x.hi * INVLOG2).round_ties_even_finite();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
const LOG2DD: DoubleDouble = DoubleDouble::new(LOG2L, LOG2H);
let zk = DoubleDouble::quick_mult_f64(LOG2DD, k);
let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
yz.lo -= zk.lo;
let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
let im: i64 = (ik >> 12).wrapping_add(0x3ff);
let i2: i64 = (ik >> 6) & 0x3f;
let i1: i64 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p0 = DoubleDouble::quick_mult(t2, t1);
let mut q = q_1(yz);
q = DoubleDouble::quick_mult(p0, q);
/* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus
M = 2047, which encodes Inf */
let mut du = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(du);
q.lo *= f64::from_bits(du);
q
}
#[inline]
fn exp2m1_fast(x: f64, tiny: bool) -> Exp2m1 {
if tiny {
return exp2m1_fast_tiny(x);
}
/* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2))
and subtract 1 */
let mut v = DoubleDouble::from_exact_mult(LN2H, x);
v.lo = f_fmla(x, LN2L, v.lo);
/*
The a_mul() call is exact, and the error of the fma() is bounded by
ulp(l).
We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43,
|t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7,
thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L)
<= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95.
Thus:
|h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)|
<= 2^-95 + 1024*2^-110.4 < 2^-94.9 */
let mut p = exp1(v);
let zf: DoubleDouble = if x >= 0. {
// implies h >= 1 and the fast_two_sum pre-condition holds
DoubleDouble::from_exact_add(p.hi, -1.0)
} else {
DoubleDouble::from_exact_add(-1.0, p.hi)
};
p.lo += zf.lo;
p.hi = zf.hi;
/* The error in the above fast_two_sum is bounded by 2^-105*|h|,
with the new value of h, thus the total absolute error is bounded
by eps1*|h_in|+2^-105*|h|.
Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum
of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05.
We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */
Exp2m1 {
exp: p,
err: f64::from_bits(0x3b84200000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */
}
}
// Approximation for the accurate path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_2(dz: DoubleDouble) -> DoubleDouble {
/* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2.
The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7
in double precision only.
The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6
in double precision only.
The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5
in double precision only. */
/* The following is a degree-7 polynomial generated by Sollya for exp(z)
over [-0.000130273,0.000130273] with absolute error < 2^-113.218
(see file exp_accurate.sollya). Since we use this code only for
|x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1
is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */
const Q_2: [u64; 9] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555555555,
0x3c655555555c4d26,
0x3fa5555555555555,
0x3f81111111111111,
0x3f56c16c3fbb4213,
0x3f2a01a023ede0d7,
];
let z = dz.to_f64();
let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
// multiply q by z and add Q_2[3] + Q_2[4]
let mut p = DoubleDouble::from_exact_mult(q, z);
let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
p.hi = r0.hi;
p.lo += r0.lo + f64::from_bits(Q_2[4]);
// multiply hi+lo by zh+zl and add Q_2[2]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
// multiply hi+lo by zh+zl and add Q_2[1]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
// multiply hi+lo by zh+zl and add Q_2[0]
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
p
}
// returns a double-double approximation hi+lo of exp(x*log(2)) for |x| < 745
#[inline]
fn exp_2(x: f64) -> DoubleDouble {
let k = (x * f64::from_bits(0x40b0000000000000)).round_ties_even_finite();
// since |x| <= 745 we have k <= 3051520
let yhh = f_fmla(-k, f64::from_bits(0x3f30000000000000), x); // exact, |yh| <= 2^-13
/* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(2)
to use the accurate path of exp() */
let ky = DoubleDouble::quick_f64_mult(yhh, DoubleDouble::new(LN2L, LN2H));
let ik: i64 = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let im = (ik >> 12).wrapping_add(0x3ff);
let i2 = (ik >> 6) & 0x3f;
let i1 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p = DoubleDouble::quick_mult(t2, t1);
let mut q = q_2(ky);
q = DoubleDouble::quick_mult(p, q);
let mut ud: u64 = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(ud);
q.lo *= f64::from_bits(ud);
q
}
#[cold]
pub(crate) fn exp2m1_accurate_tiny(x: f64) -> f64 {
let x2 = x * x;
let x4 = x2 * x2;
const Q: [u64; 22] = [
0x3fe62e42fefa39ef,
0x3c7abc9e3b398040,
0x3fcebfbdff82c58f,
0xbc65e43a53e44dcf,
0x3fac6b08d704a0c0,
0xbc4d331627517168,
0x3f83b2ab6fba4e77,
0x3c14e65df0779f8c,
0x3f55d87fe78a6731,
0x3bd0717fbf4bd050,
0x3f2430912f86c787,
0x3bcbd2bdec9bcd42,
0x3eeffcbfc588b0c7,
0xbb8e60aa6d5e4aa9,
0x3eb62c0223a5c824,
0x3e7b5253d395e7d4,
0x3e3e4cf5158b9160,
0x3dfe8cac734c6058,
0x3dbc3bd64f17199d,
0x3d78161a17e05651,
0x3d33150b3d792231,
0x3cec184260bfad7e,
];
let mut c13 = dd_fmla(f64::from_bits(Q[20]), x, f64::from_bits(Q[19])); // degree 13
let c11 = dd_fmla(f64::from_bits(Q[18]), x, f64::from_bits(Q[17])); // degree 11
c13 = dd_fmla(f64::from_bits(Q[21]), x2, c13); // degree 13
// add Q[16]*x+c11*x2+c13*x4 to Q[15] (degree 9)
let mut p = DoubleDouble::from_exact_add(
f64::from_bits(Q[15]),
f_fmla(f64::from_bits(Q[16]), x, f_fmla(c11, x2, c13 * x4)),
);
// multiply h+l by x and add Q[14] (degree 8)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
// multiply h+l by x and add Q[12]+Q[13] (degree 7)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[13]);
p.hi = p0.hi;
// multiply h+l by x and add Q[10]+Q[11] (degree 6)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[11]);
p.hi = p0.hi;
// multiply h+l by x and add Q[8]+Q[9] (degree 5)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[9]);
p.hi = p0.hi;
// multiply h+l by x and add Q[6]+Q[7] (degree 4)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[7]);
p.hi = p0.hi;
// multiply h+l by x and add Q[4]+Q[5] (degree 3)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[5]);
p.hi = p0.hi;
// multiply h+l by x and add Q[2]+Q[3] (degree 2)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[3]);
p.hi = p0.hi;
// multiply h+l by x and add Q[0]+Q[1] (degree 2)
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[1]);
p.hi = p0.hi;
// multiply h+l by x
p = DoubleDouble::quick_f64_mult(x, p);
p.to_f64()
}
#[cold]
fn exp2m1_accurate(x: f64) -> f64 {
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ax <= 0x3fc0000000000000u64 {
// |x| <= 0.125
return exp2m1_accurate_tiny(x);
}
let mut p = exp_2(x);
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
p.lo += zf.lo;
p.hi = zf.hi;
p.to_f64()
}
/* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x),
and return the maximal corresponding absolute error.
We also have |x| > 0x1.0527dbd87e24dp-51.
With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine
exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns
1.63414352331297e-20 < 2^-65.73, and
exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns
1.76283772822891e-20 < 2^-65.62, which proves the relative
error is bounded by 2^-65.62. */
#[inline]
fn exp2m1_fast_tiny(x: f64) -> Exp2m1 {
/* The maximal value of |c4*x^4/exp2m1(x)| over [-0.125,0.125]
is less than 2^-15.109, where c4 is the degree-4 coefficient,
thus we can compute the coefficients of degree 4 or higher
using double precision only. */
const P: [u64; 12] = [
0x3fe62e42fefa39ef,
0x3c7abd1697afcaf8,
0x3fcebfbdff82c58f,
0xbc65e5a1d09e1599,
0x3fac6b08d704a0bf,
0x3f83b2ab6fba4e78,
0x3f55d87fe78a84e6,
0x3f2430912f86a480,
0x3eeffcbfbc1f2b36,
0x3eb62c0226c7f6d1,
0x3e7b539529819e63,
0x3e3e4d552bed5b9c,
];
let x2 = x * x;
let x4 = x2 * x2;
let mut c8 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 8
let c6 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 6
let mut c4 = dd_fmla(f64::from_bits(P[6]), x, f64::from_bits(P[5])); // degree 4
c8 = dd_fmla(f64::from_bits(P[11]), x2, c8); // degree 8
c4 = dd_fmla(c6, x2, c4); // degree 4
c4 = dd_fmla(c8, x4, c4); // degree 4
let mut p = DoubleDouble::from_exact_mult(c4, x);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
p.lo += p1.lo + f64::from_bits(P[3]);
p.hi = p1.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
p.lo += p2.lo + f64::from_bits(P[1]);
p.hi = p2.hi;
p = DoubleDouble::quick_f64_mult(x, p);
Exp2m1 {
exp: p,
err: f64::from_bits(0x3bd4e00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66
}
}
/// Computes 2^x - 1
///
/// Max found ULP 0.5
pub fn f_exp2m1(d: f64) -> f64 {
let mut x = d;
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ux >= 0xc04b000000000000u64 {
// x = -NaN or x <= -54
if (ux >> 52) == 0xfff {
// -NaN or -Inf
return if ux > 0xfff0000000000000u64 {
x + x
} else {
-1.0
};
}
// for x <= -54, exp2m1(x) rounds to -1 to nearest
return -1.0 + f64::from_bits(0x3c90000000000000);
} else if ax >= 0x4090000000000000u64 {
// x = +NaN or x >= 1024
if (ux >> 52) == 0x7ff {
// +NaN
return x + x;
}
/* for x >= 1024, exp2m1(x) rounds to +Inf to nearest,
but for RNDZ/RNDD, we should have no overflow for x=1024 */
return f_fmla(
x,
f64::from_bits(0x7bffffffffffffff),
f64::from_bits(0x7fefffffffffffff),
);
} else if ax <= 0x3cc0527dbd87e24du64
// |x| <= 0x1.0527dbd87e24dp-51
/* then the second term of the Taylor expansion of 2^x-1 at x=0 is
smaller in absolute value than 1/2 ulp(first term):
log(2)*x + log(2)^2*x^2/2 + ... */
{
/* we use special code when log(2)*|x| is very small, in which case
the double-double approximation h+l has its lower part l
"truncated" */
return if ax <= 0x3970000000000000u64
// |x| <= 2^-104
{
// special case for 0
if x == 0. {
return x;
}
// scale x by 2^106
x *= f64::from_bits(0x4690000000000000);
let z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN2L, LN2H), x);
let mut h2 = z.to_f64(); // round to 53-bit precision
// scale back, hoping to avoid double rounding
h2 *= f64::from_bits(0x3950000000000000);
// now subtract back h2 * 2^106 from h to get the correction term
let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
// add l
h += z.lo;
/* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact,
thus no underflow will be raised. We have underflow for
0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for
0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */
dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
} else {
const C2: f64 = f64::from_bits(0x3fcebfbdff82c58f); // log(2)^2/2
let mut z = DoubleDouble::from_exact_mult(LN2H, x);
z.lo = dyad_fmla(LN2L, x, z.lo);
/* h+l approximates the first term x*log(2) */
/* we add C2*x^2 last, so that in case there is a cancellation in
LN2L*x+l, it will contribute more bits */
z.lo += C2 * x * x;
z.to_f64()
};
}
/* now -54 < x < -0x1.0527dbd87e24dp-51
or 0x1.0527dbd87e24dp-51 < x < 1024 */
/* 2^x-1 is exact for x integer, -53 <= x <= 53 */
if ux.wrapping_shl(17) == 0 {
let i = x.floor_finite() as i32;
if x == i as f64 && -53 <= i && i <= 53 {
return if i >= 0 {
((1u64 << i) - 1) as f64
} else {
-1.0 + fast_ldexp(1.0, i)
};
}
}
let result = exp2m1_fast(x, ax <= 0x3fc0000000000000u64);
let left = result.exp.hi + (result.exp.lo - result.err);
let right = result.exp.hi + (result.exp.lo + result.err);
if left != right {
return exp2m1_accurate(x);
}
left
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp2m1() {
assert_eq!(f_exp2m1(5.4172231599824623E-312), 3.75493295981e-312);
assert_eq!(f_exp2m1( 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017800593653177087), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012338431302992956);
assert_eq!(3., f_exp2m1(2.0));
assert_eq!(4.656854249492381, f_exp2m1(2.5));
assert_eq!(-0.30801352040368324, f_exp2m1(-0.5311842449009418));
}
}

Some files were not shown because too many files have changed in this diff Show More