Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

485
vendor/pxfm/src/compound/compound_d.rs vendored Normal file
View File

@@ -0,0 +1,485 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::{log1p_f64_dyadic, log1p_fast_dd};
use crate::pow_exec::{exp_dyadic, pow_exp_dd};
use crate::triple_double::TripleDouble;
/// Computes (1+x)^y
///
pub fn f_compound(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ay = y.to_bits() & 0x7fff_ffff_ffff_ffff;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 1.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 1.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 1.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { 0.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { 0.0 };
}
match y_a {
0x3fe0_0000_0000_0000 => {
// TODO: speed up x^(-1/2) with rsqrt(x) when available.
if x == 0.0 {
return 1.0;
}
let z = DoubleDouble::from_full_exact_add(x, 1.0).sqrt();
return if y_sign {
z.recip().to_f64()
} else {
z.to_f64()
};
}
0x3ff0_0000_0000_0000 => {
return if y_sign {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z = DyadicFloat128::new_from_f64(x) + ONES;
z.reciprocal().fast_as_f64()
} else {
DoubleDouble::from_full_exact_add(x, 1.0).to_f64()
};
}
0x4000_0000_0000_0000 => {
let z0 = DoubleDouble::from_full_exact_add(x, 1.0);
let z = DoubleDouble::quick_mult(z0, z0);
return if y_sign {
z.recip().to_f64()
} else {
f64::copysign(z.to_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
0.0
};
}
// y is finite and non-zero.
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// compound(1, y) = 1
return 2.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
let min_abs = f64::min(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let max_abs = f64::max(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let min_exp = min_abs.wrapping_shr(52);
let max_exp = max_abs.wrapping_shr(52);
if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
let scale_up = min_exp < 128u64;
let scale_down = max_exp > 0x7ffu64 - 128u64;
// At least one input is denormal, multiply both numerator and denominator
// then will go with hard path
if scale_up || scale_down {
return compound_accurate(x, y, s);
}
}
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
// y is integer and in [-102;102] and |x|<2^10
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return compound_accurate(x, y, s);
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_exp_dd(r, s);
let res_min = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), -res.hi, res.lo);
let res_max = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), res.hi, res.lo);
if res_min == res_max {
return res_max;
}
compound_accurate(x, y, s)
}
#[cold]
fn compound_accurate(x: f64, y: f64, s: f64) -> f64 {
/* the idea of returning res_max instead of res_min is due to Laurent
Théry: it is better in case of underflow since res_max = +0 always. */
let f_y = DyadicFloat128::new_from_f64(y);
let r = log1p_f64_dyadic(x) * f_y;
let mut result = exp_dyadic(r);
// 2^R.ex <= R < 2^(R.ex+1)
/* case R < 2^-1075: underflow case */
if result.exponent < -1075 {
return 0.5 * (s * f64::from_bits(0x0000000000000001));
}
if result.exponent >= 1025 {
return 1.0;
}
result.sign = if s == -1.0 {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
result.fast_as_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
let mut s = TripleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
TripleDouble::new(0., 0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = TripleDouble::quick_mult(s, s);
if iter_count % 2 != 0 {
acc = TripleDouble::quick_mult(acc, s);
}
}
if y.is_sign_negative() {
acc.recip().to_f64()
} else {
acc.to_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound() {
assert_eq!(f_compound(4831835136., -13.),0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012780345669344118 );
assert_eq!(
f_compound(11468322278342656., 2.9995136260713475),
1481455956234813000000000000000000000000000000000.
);
assert_eq!(f_compound(0.9999999999999999, 3.), 7.999999999999999);
assert_eq!(
f_compound(1.0039215087890625, 10.000000000349134),
1044.2562119607103
);
assert_eq!(f_compound(10., 18.0), 5559917313492231000.0);
assert_eq!(
f_compound(131071.65137729312, 2.000001423060894),
17180328027.532265
);
assert_eq!(f_compound(2., 5.), 243.);
assert_eq!(f_compound(126.4324324, 126.4324324), 1.4985383310514043e266);
assert_eq!(f_compound(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound(0.0, 0.0), 1.0);
assert_eq!(f_compound(0.0, -1. / 2.), 1.0);
assert_eq!(f_compound(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound(f64::INFINITY, -1. / 2.), 0.0);
assert_eq!(f_compound(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound(46.3828125, 46.3828125), 5.248159634773675e77);
}
#[test]
fn test_compound_exotic_cases() {
assert_eq!(f_compound(0.9999999850987819, -1.), 0.5000000037253046);
assert_eq!(
f_compound(22427285907987670000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1.),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004458854290718438
);
assert_eq!(f_compound(0.786438105629145, 607.999512419221),
1616461095392737200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound( 1.0000002381857613, 960.8218657970428),
17228671476562465000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound(1., 1.0000000000000284), 2.);
assert_eq!(f_compound(1., f64::INFINITY), f64::INFINITY);
assert_eq!(
f_compound(10.000000000000007, -8.),
0.00000000466507380209731
);
}
}

573
vendor/pxfm/src/compound/compound_m1.rs vendored Normal file
View File

@@ -0,0 +1,573 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log1p_fast_dd;
use crate::pow_exec::pow_expm1_1;
/// Computes (1+x)^y - 1
///
/// max found ULP 0.56
pub fn f_compound_m1(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 0.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 0.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 0.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { -1.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { -1.0 };
}
match y_a {
// 0x3fe0_0000_0000_0000 => {
// if x == 0.0 {
// return 0.0;
// }
// let z = Dekker::from_full_exact_add(x, 1.0).sqrt();
// if y_sign {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z.reciprocal() + M_ONES).fast_as_f64()
// } else {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z + M_ONES).fast_as_f64()
// };
// }
0x3ff0_0000_0000_0000 => {
return if y_sign {
let z = DyadicFloat128::new_from_f64(x);
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let p = (z + ONES).reciprocal() + M_ONES;
p.fast_as_f64()
} else {
x
};
}
0x4000_0000_0000_0000 => {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z0 = DyadicFloat128::new_from_f64(x) + ONES;
let z = z0 * z0;
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
return if y_sign {
(z.reciprocal() + M_ONES).fast_as_f64()
} else {
f64::copysign((z + M_ONES).fast_as_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x_a == 0x3ff0_0000_0000_0000 {
// pow(+-1, +-Inf) = 1.0
return 0.0;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
-1.0
};
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return -1.0;
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -1.0 } else { 1.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return -1.;
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
}
// evaluate (1+x)^y explicitly for integer y in [-1024,1024] range and |x|<2^64
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let mut dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
dz = DoubleDouble::full_add_f64(dz, -1.);
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
// approximate log1p(x)
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return 0.;
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_expm1_1(r, s);
res.to_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
const ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let mut s = DyadicFloat128::new_from_f64(x) + ONE;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { ONE };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc = acc * s;
}
}
if y.is_sign_negative() {
(acc.reciprocal() + M_ONE).fast_as_f64()
} else {
(acc + M_ONE).fast_as_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound_exotic() {
assert_eq!(
f_compound_m1(0.000152587890625, -8.484374999999998),
-0.0012936766014690006
);
assert_eq!(
f_compound_m1(
0.00000000000000799360578102344,
-0.000000000000000000000001654361225106131
),
-0.000000000000000000000000000000000000013224311452909338
);
assert_eq!(
f_compound_m1( 4.517647064592699, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055329046628180653),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009449932890153435
);
assert_eq!(f_compound_m1(
11944758478933760000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1242262631503757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
), -1.);
}
#[test]
fn test_compound_m1() {
assert_eq!(
f_compound_m1(0.0000000000000009991998751296936, -4.),
-0.000000000000003996799500518764
);
assert_eq!(f_compound_m1(-0.003173828125, 25.), -0.0763960132649781);
assert_eq!(f_compound_m1(3., 2.8927001953125), 54.154259038961406);
assert_eq!(
f_compound_m1(-0.43750000000000044, 19.),
-0.9999821216263793
);
assert_eq!(
f_compound_m1(127712., -2.0000000000143525),
-0.9999999999386903
);
assert_eq!(
f_compound_m1(-0.11718749767214207, 2893226081485815000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(2418441935074801400000000., 512.),
f64::INFINITY
);
assert_eq!(
f_compound_m1(32.50198364245834, 128000.00000000093),
f64::INFINITY
);
assert_eq!(
f_compound_m1(1.584716796877785, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004168916810703412),
0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003958869879428553
);
assert_eq!(
f_compound_m1(
-0.000000000000000000000000000000001997076793037533,
366577337071337140000000000000000f64
),
-0.5190938261758579
);
assert_eq!(f_compound_m1(2.1075630259863374, 0.5), 00.7628281328553664);
assert_eq!(f_compound_m1(2.1078916412661783, 0.5), 0.7629213372315222);
assert_eq!(f_compound_m1(3.0000000000001115, -0.5), -0.500000000000007);
assert_eq!(
f_compound_m1(0.0004873839215895903, 3.),
0.0014628645098045245
);
assert_eq!(f_compound_m1(-0.483765364602732, 3.), -0.862424399516842);
assert_eq!(f_compound_m1(3.0000001192092896, -2.), -0.9375000037252902);
assert_eq!(f_compound_m1(29.38323424607434, -1.), -0.9670871115332561);
assert_eq!(f_compound_m1(-0.4375, 4.), -0.8998870849609375);
assert_eq!(
f_compound_m1(-0.0039033182037826464, 3.),
-0.011664306402886494
);
assert_eq!(
f_compound_m1(0.000000000000000000000000000000000000007715336350455947,
-262034087537726030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.,
);
assert_eq!(f_compound_m1(10.000000059604645, 10.), 25937426005.44638);
assert_eq!(f_compound_m1(10., -308.25471555814863), -1.0);
assert_eq!(
f_compound_m1(5.4172231599824623E-312, 9.4591068440831498E+164),
5.124209266851586e-147
);
assert_eq!(
f_compound_m1(5.8776567263633397E-39, 3.4223548116804511E-310),
0.0
);
assert_eq!(
f_compound_m1(5.8639503496997932E-148, -7.1936801558778956E+305),
0.0
);
assert_eq!(
f_compound_m1(0.9908447265624999,
-19032028850336152000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006952247559980936,
5069789834563405000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
3.524643400695958e-163
);
assert_eq!(
f_compound_m1(1.000000000000341,
-69261261804788370000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(
0.0000000000000001053438024827798,
0.0000000000000001053438024827798
),
0.000000000000000000000000000000011097316721530923
);
assert_eq!(
f_compound_m1(
0.00000000000000010755285551056508,
0.00000000000000010755285551056508
),
0.00000000000000000000000000000001156761672847649
);
assert_eq!(f_compound_m1(2.4324324, 1.4324324), 4.850778380908823);
assert_eq!(f_compound_m1(2., 5.), 242.);
assert_eq!(f_compound_m1(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound_m1(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound_m1(0.0, 0.0), 0.0);
assert_eq!(f_compound_m1(0.0, -1. / 2.), 0.0);
assert_eq!(f_compound_m1(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(f64::INFINITY, -1. / 2.), -1.0);
assert_eq!(f_compound_m1(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(46.3828125, 46.3828125), 5.248159634773675e77);
}
}

438
vendor/pxfm/src/compound/compound_m1f.rs vendored Normal file
View File

@@ -0,0 +1,438 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, compoundf_exp2_poly2, compoundf_log2p1_accurate,
compoundf_log2p1_fast,
};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1_accurate_tiny;
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
// INVLOG2 = 1/log(2) * (1 + eps1) with |eps1| < 2^-55.976
const INVLOG2: f64 = f64::from_bits(0x3ff71547652b82fe);
#[cold]
#[inline(never)]
fn as_compoundm1f_special(x: f32, y: f32) -> f32 {
let nx = x.to_bits();
let ny = y.to_bits();
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ay == 0 {
// x or y is 0
if ax == 0 {
// compound(0,y) = 1 except for y = sNaN
return if y.is_nan() { x + y } else { 0.0 };
}
if ay == 0 {
// compound (x, 0)
if x.is_nan() {
return x + y;
} // x = sNaN
return if x < -1.0 {
f32::NAN // rule (g)
} else {
0.0
}; // rule (a)
}
}
let mone = (-1.0f32).to_bits();
if ay >= 0xffu32 << 24 {
// y=Inf/NaN
// the case x=0 was already checked above
if ax > 0xffu32 << 24 {
return x + y;
} // x=NaN
if ay == 0xffu32 << 24 {
// y = +/-Inf
if nx > mone {
return f32::NAN;
} // rule (g)
let sy = ny >> 31; // sign bit of y
if nx == mone {
return if sy == 0 {
-1. // Rule (c)
} else {
f32::INFINITY // Rule (b)
};
}
if x < 0.0 {
return if sy == 0 { -1. } else { f32::INFINITY };
}
if x > 0.0 {
return if sy != 0 { -1. } else { f32::INFINITY };
}
return 0.0;
}
return x + y; // case y=NaN
}
if nx >= mone || nx >= 0xffu32 << 23 {
// x is Inf, NaN or <= -1
if ax == 0xffu32 << 24 {
// x is +Inf or -Inf
if (nx >> 31) != 0 {
return f32::NAN;
} // x = -Inf, rule (g)
// (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
return (if (ny >> 31) != 0 { 1.0 / x } else { x }) - 1.;
}
if ax > 0xffu32 << 24 {
return x + y;
} // x is NaN
if nx > mone {
return f32::NAN; // x < -1.0: rule (g)
}
// now x = -1
return if (ny >> 31) != 0 {
// y < 0
f32::INFINITY
} else {
// y > 0
-1.0
};
}
-1.
}
/* for |z| <= 2^-6, returns an approximation of 2^z
with absolute error < 2^-43.540 */
#[inline]
pub(crate) fn compoundf_expf_poly(z: f64) -> f64 {
/* Q is a degree-4 polynomial generated by Sollya (cf compoundf_expf.sollya)
with absolute error < 2^-43.549 */
const Q: [u64; 5] = [
0x3fe62e42fefa39ef,
0x3fcebfbdff8098eb,
0x3fac6b08d7045dc3,
0x3f83b2b276ce985d,
0x3f55d8849c67ace4,
];
let z2 = z * z;
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
dd_fmla(c2, z2, c0) * z
}
/* return the correct rounding of (1+x)^y, otherwise -1.0
where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
assuming 0x1.7154759a0df53p-24 <= |t| <= 150
exact is non-zero iff (1+x)^y is exact or midpoint */
fn exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe { ((k.to_int_unchecked::<i64>() as u64).wrapping_add(0x3ffu64)) << 52 }; // k is already integer
s *= f64::from_bits(su);
let q_poly = compoundf_expf_poly(r);
v = q_poly.to_bits();
/* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
bounded by ulp(v) + 2^0.5s * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
< 2^-40.849. */
let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(f64::from_bits(v), s, s - 1f64).to_bits();
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(f64::from_bits(v), s);
v = DoubleDouble::add(z, p0).to_f64().to_bits();
}
// in case of potential underflow, we defer to the accurate path
if f64::from_bits(v) < f64::from_bits(0x3d61d00000000000) {
return -1.0;
}
err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
if lb != rb {
return -1.0;
} // rounding test failed
f64::from_bits(v)
}
fn compoundf_exp2m1_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
if y == 1.0 {
let res = x;
return res;
}
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
// if x_dd.hi.abs() <= f64::from_bits(0x3fc0000000000000u64) {
// /* the relative error between h and y*log2(1+x) is bounded by
// (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
// 2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
// The above threshold is such that h*(1+2^-48.444) < H0. */
// return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
// }
let k = x_dd.hi.round_ties_even_finite(); // |k| <= 150
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
/* the relative error between h and y*log2(1+x) is bounded by
(1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
The above threshold is such that h*(1+2^-48.444) < H0. */
// let z0 = 1.0 + x_dd.hi * 0.5;
// let k = Dekker::from_exact_sub(z0, 1.);
// return k.to_f64() as f32;
return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
}
let r = x_dd.hi - k; // |r| <= 1/2, exact
// since r is an integer multiple of ulp(h), fast_two_sum() below is exact
let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
// h is near (i-16)/2^5
v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |h| <= 2^-6
// 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
let q = compoundf_exp2_poly2(v_dd);
/* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
|qh + ql - 2^(h+l)| < 2^-85.210 */
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
let mut q = DoubleDouble::quick_mult(exp2u, q);
q = DoubleDouble::from_exact_add(q.hi, q.lo);
let mut du = unsafe {
k.to_int_unchecked::<i64>()
.wrapping_add(0x3ff)
.wrapping_shl(52) as u64
};
du = f64::from_bits(du).to_bits();
let scale = f64::from_bits(du);
q.hi *= scale;
q.lo *= scale;
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(q.hi, -1.0);
q.lo += zf.lo;
q.hi = zf.hi;
v = q.to_f64().to_bits();
f64::from_bits(v) as f32
}
// at input, exact is non-zero iff (1+x)^y is exact
// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
#[cold]
#[inline(never)]
fn compoundm1f_accurate(x: f32, y: f32) -> f32 {
let mut v = compoundf_log2p1_accurate(x as f64);
v = DoubleDouble::quick_mult_f64(v, y as f64);
compoundf_exp2m1_accurate(v, x, y)
}
/// Computes compound (1.0 + x)^y - 1
///
/// Max ULP 0.5
#[inline]
pub fn f_compound_m1f(x: f32, y: f32) -> f32 {
/* Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let mone = (-1.0f32).to_bits();
let nx = x.to_bits();
let ny = y.to_bits();
if nx >= mone {
return as_compoundm1f_special(x, y);
} // x <= -1
// now x > -1
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
return as_compoundm1f_special(x, y);
} // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
// evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
if ax <= 0x62000000u32 {
return 1.0 + y * x;
} // does it work for |x|<2^-29 and |y|<=16?
let mut s = x as f64 + 1.;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc *= s;
}
}
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
return DoubleDouble::from_full_exact_add(dz, -1.).to_f64() as f32;
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = if ax < 0x62000000u32 {
// |x| < 2^-29
/* |log2(1+x) - 1/log(2) * (x - x^2/2)| < 2^-59.584 * |log2(1+x)|
(cf compoundf.sollya) */
let t = xd - (xd * xd) * 0.5;
/* since x is epresentable in binary32, x*x is exact, and so is (x * x) * 0.5.
Thus the only error in the computation of t is the final rounding, which
is bounded by ulp(t): t = (x - x^2/2) * (1 + eps2) with |eps2| < 2^-52
*/
INVLOG2 * t
/* since INVLOG2 = 1/log(2) * (1 + eps1) and
and t = (x - x^2/2) * (1 + eps2)
let u = o(INVLOG2 * t) then u = INVLOG2 * t * (1 + eps3) with |eps3|<2^-53
thus u = 1/log(2) * (x - x^2/2) * (1 + eps1)*(1 + eps2)*(1 + eps3)
= 1/log(2) * (x - x^2/2) * (1 + eps4) with |eps4| < 2^-50.954
Now Sollya says the relative error by approximating log2(1+x) by
1/log(2) * (x - x^2/2) for |x| < 2^-29 is bounded by 2^-59.584
(file compoundf.sollya), thus:
u = log2(1+x) * (1+eps4)*(1+eps5) with |eps5| < 2^-59.584
= log2(1+x) * (1+eps6) with |eps6| < 2^-50.950 */
} else {
compoundf_log2p1_fast(f64::from_bits(tx))
};
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let t: u64 = (l * f64::from_bits(ty)).to_bits();
/* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = exp2m1_fast(f64::from_bits(t));
if res != -1.0 {
return res as f32;
}
compoundm1f_accurate(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::compound::compound_m1f::{compoundf_exp2m1_accurate, exp2m1_fast};
use crate::double_double::DoubleDouble;
#[test]
fn test_compoundf() {
assert_eq!(
f_compound_m1f(-0.000000000000001191123, -0.000000000000001191123),
0.0000000000000000000000000000014187741
);
assert_eq!(f_compound_m1f(-0.000000000000001191123, 16.), 1.0);
assert_eq!(f_compound_m1f(0.91123, 16.), 31695.21);
assert_eq!(f_compound_m1f(0.91123, -16.), -0.99996847);
}
#[test]
fn test_compoundf_expm1_fast() {
assert_eq!(exp2m1_fast(3.764), 12.585539943149435);
}
#[test]
fn test_compoundf_expm1_accurate() {
assert_eq!(
compoundf_exp2m1_accurate(DoubleDouble::new(0., 2.74), 12., 53.),
5.680703,
);
}
}

1000
vendor/pxfm/src/compound/compoundf.rs vendored Normal file

File diff suppressed because it is too large Load Diff

41
vendor/pxfm/src/compound/mod.rs vendored Normal file
View File

@@ -0,0 +1,41 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod compound_d;
mod compound_m1;
mod compound_m1f;
mod compoundf;
mod powm1;
mod powm1f;
pub use compound_d::f_compound;
pub use compound_m1::f_compound_m1;
pub use compound_m1f::f_compound_m1f;
pub use compoundf::f_compoundf;
pub use powm1::f_powm1;
pub use powm1f::f_powm1f;

224
vendor/pxfm/src/compound/powm1.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXPM1_T0, EXPM1_T1, ldexp};
use crate::pow_exec::pow_log_1;
use crate::round_ties_even::RoundTiesEven;
/// Computes x^y - 1
pub fn f_powm1(x: f64, y: f64) -> f64 {
let ax: u64 = x.to_bits().wrapping_shl(1);
let ay: u64 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0x7ffu64 << 53 || ay == 0 || ay >= 0x7ff64 << 53 {
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f64::INFINITY;
} else if y > 0.0 {
f64::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f64::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integer(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f64::INFINITY
} else {
f64::NEG_INFINITY
};
pow - 1.0
} else {
f64::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f64::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f64::INFINITY
}
} else {
// |x| == 1
f64::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f64::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integer(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f64::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integer(y) {
negative_parity = true;
}
}
let (mut l, _) = pow_log_1(x);
l = DoubleDouble::from_exact_add(l.hi, l.lo);
let r = DoubleDouble::quick_mult_f64(l, y);
if r.hi < -37.42994775023705 {
// underflow
return -1.;
}
let res = powm1_expm1_1(r);
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
DoubleDouble::full_add_f64(-res, -2.).to_f64()
} else {
res.to_f64()
}
}
#[inline]
pub(crate) fn powm1_expm1_1(r: DoubleDouble) -> DoubleDouble {
let ax = r.hi.to_bits() & 0x7fffffffffffffffu64;
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
if ax <= 0x3f80000000000000 {
// |x| < 2^-7
if ax < 0x3970000000000000 {
// |x| < 2^-104
return r;
}
let d = crate::pow_exec::expm1_poly_dd_tiny(r);
return d;
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).round_ties_even_finite();
let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
let tbh = DoubleDouble::quick_mult(t1, t0);
let mut de = tbh;
// exp(k)=2^k*exp(r) + (2^k - 1)
let q = crate::pow_exec::expm1_poly_fast(z);
de = DoubleDouble::quick_mult(de, q);
de = DoubleDouble::add(tbh, de);
let ie = mk - 0x3ff;
let off: f64 = f64::from_bits((2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64);
let e: f64;
if ie < 53 {
let fhz = DoubleDouble::from_exact_add(off, de.hi);
de.hi = fhz.hi;
e = fhz.lo;
} else if ie < 104 {
let fhz = DoubleDouble::from_exact_add(de.hi, off);
de.hi = fhz.hi;
e = fhz.lo;
} else {
e = 0.;
}
de.lo += e;
de.hi = ldexp(de.to_f64(), ie as i32);
de.lo = 0.;
de
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1() {
assert_eq!(f_powm1(f64::INFINITY, f64::INFINITY), f64::INFINITY);
assert_eq!(f_powm1(50850368932909610000000000., 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000023201985303960773), 1.3733470789307166e-303);
assert_eq!(f_powm1(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1(1.83329e-40, 2.4645883e-32), -2.255031542428047e-30);
assert_eq!(f_powm1(3., 2.), 8.);
assert_eq!(f_powm1(3., 3.), 26.);
assert_eq!(f_powm1(5., 2.), 24.);
assert_eq!(f_powm1(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1(-5., 2.), 24.);
assert_eq!(f_powm1(-5., 3.), -126.);
assert_eq!(
f_powm1(196560., 0.000000000000000000000000000000000000001193773),
1.4550568430468268e-38
);
}
}

260
vendor/pxfm/src/compound/powm1f.rs vendored Normal file
View File

@@ -0,0 +1,260 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compound_m1f::compoundf_expf_poly;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
#[inline]
fn powm1f_log2_fast(x: f64) -> f64 {
/* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
for x < 0, 1+x is exact when -1 < x <= 2^-30 */
// double u = (x >= 0x1p53) ? x : 1.0 + x;
/* For x < 0x1p53, x + 1 is exact thus u = x+1.
For x >= 2^53, we estimate log2(x) instead of log2(1+x),
since log2(1+x) = log2(x) + log2(1+1/x),
log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
error is bounded by 2^-52.471/53 < 2^-58.198 */
let mut v = x.to_bits();
let m: u64 = v & 0xfffffffffffffu64;
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
// 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
v = v.wrapping_sub((e << 52) as u64);
let t = f64::from_bits(v);
// u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
// thus log2(u) = e + log2(t)
v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
// we approximates log2(t) by -log2(r) + log2(r*t)
let p = crate::compound::compoundf::log2p1_polyeval_1(z);
// p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}
/// Computes x^y - 1
pub fn f_powm1f(x: f32, y: f32) -> f32 {
let ax: u32 = x.to_bits().wrapping_shl(1);
let ay: u32 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
if x.is_nan() || y.is_nan() {
return f32::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f32::INFINITY;
} else if y > 0.0 {
f32::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integerf(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f32::INFINITY
} else {
f32::NEG_INFINITY
};
pow - 1.0
} else {
f32::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f32::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f32::INFINITY
}
} else {
// |x| == 1
f32::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f32::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integerf(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f32::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integerf(y) {
negative_parity = true;
}
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let dt = l * f64::from_bits(ty);
let t: u64 = dt.to_bits();
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return -1.;
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = powm1_exp2m1_fast(f64::from_bits(t));
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
(-res - 2.) as f32
} else {
res as f32
}
}
#[inline]
pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe {
k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
};
s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
let q_poly = compoundf_expf_poly(r);
v = q_poly;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(v, s, s - 1f64);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::double_double::DoubleDouble;
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(v, s);
v = DoubleDouble::add(z, p0).to_f64();
}
v
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1f() {
assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1f(3., 2.), 8.);
assert_eq!(f_powm1f(3., 3.), 26.);
assert_eq!(f_powm1f(5., 2.), 24.);
assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1f(-5., 2.), 24.);
assert_eq!(f_powm1f(-5., 3.), -126.);
assert_eq!(
f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
1.455057e-38
);
assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
}
}