Files
another-boids-in-rust/vendor/pxfm/src/powf.rs

658 lines
22 KiB
Rust

/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::biased_exponent_f64;
use crate::common::*;
use crate::double_double::DoubleDouble;
use crate::exponents::expf;
use crate::logf;
use crate::logs::LOG2_R;
use crate::polyeval::{f_polyeval3, f_polyeval6, f_polyeval10};
use crate::pow_tables::EXP2_MID1;
use crate::powf_tables::{LOG2_R_TD, LOG2_R2_DD, POWF_R2};
use crate::round::RoundFinite;
/// Power function for given value for const context.
/// This is simplified version just to make a good approximation on const context.
#[inline]
pub const fn powf(d: f32, n: f32) -> f32 {
let value = d.abs();
let c = expf(n * logf(value));
if n == 1. {
return d;
}
if d < 0.0 {
let y = n as i32;
if y % 2 == 0 { c } else { -c }
} else {
c
}
}
#[inline]
const fn larger_exponent(a: f64, b: f64) -> bool {
biased_exponent_f64(a) >= biased_exponent_f64(b)
}
// Calculate 2^(y * log2(x)) in double-double precision.
// At this point we can reuse the following values:
// idx_x: index for extra precision of log2 for the middle part of log2(x).
// dx: the reduced argument for log2(x)
// y6: 2^6 * y.
// lo6_hi: the high part of 2^6 * (y - (hi + mid))
// exp2_hi_mid: high part of 2^(hi + mid)
#[cold]
#[inline(never)]
fn powf_dd(idx_x: i32, dx: f64, y6: f64, lo6_hi: f64, exp2_hi_mid: DoubleDouble) -> f64 {
// Perform a second range reduction step:
// idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6)
// dx2 = (1 + dx) * r2 - 1
// Output range:
// -0x1.3ffcp-15 <= dx2 <= 0x1.3e3dp-15
let idx2 = f_fmla(
dx,
f64::from_bits(0x40d0000000000000),
f64::from_bits(0x4050000000000000),
)
.round_finite() as usize;
let dx2 = f_fmla(1.0 + dx, f64::from_bits(POWF_R2[idx2]), -1.0); // Exact
const COEFFS: [(u64, u64); 6] = [
(0x3c7777d0ffda25e0, 0x3ff71547652b82fe),
(0xbc6777d101cf0a84, 0xbfe71547652b82fe),
(0x3c7ce04b5140d867, 0x3fdec709dc3a03fd),
(0x3c7137b47e635be5, 0xbfd71547652b82fb),
(0xbc5b5a30b3bdb318, 0x3fd2776c516a92a2),
(0x3c62d2fbd081e657, 0xbfcec70af1929ca6),
];
let dx_dd = DoubleDouble::new(0.0, dx2);
let p = f_polyeval6(
dx_dd,
DoubleDouble::from_bit_pair(COEFFS[0]),
DoubleDouble::from_bit_pair(COEFFS[1]),
DoubleDouble::from_bit_pair(COEFFS[2]),
DoubleDouble::from_bit_pair(COEFFS[3]),
DoubleDouble::from_bit_pair(COEFFS[4]),
DoubleDouble::from_bit_pair(COEFFS[5]),
);
// log2(1 + dx2) ~ dx2 * P(dx2)
let log2_x_lo = DoubleDouble::quick_mult_f64(p, dx2);
// Lower parts of (e_x - log2(r1)) of the first range reduction constant
let log2_r_td = LOG2_R_TD[idx_x as usize];
let log2_x_mid = DoubleDouble::new(f64::from_bits(log2_r_td.0), f64::from_bits(log2_r_td.1));
// -log2(r2) + lower part of (e_x - log2(r1))
let log2_x_m = DoubleDouble::add(DoubleDouble::from_bit_pair(LOG2_R2_DD[idx2]), log2_x_mid);
// log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1))
// Since we don't know which one has larger exponent to apply Fast2Sum
// algorithm, we need to check them before calling double-double addition.
let log2_x = if larger_exponent(log2_x_m.hi, log2_x_lo.hi) {
DoubleDouble::add(log2_x_m, log2_x_lo)
} else {
DoubleDouble::add(log2_x_lo, log2_x_m)
};
let lo6_hi_dd = DoubleDouble::new(0.0, lo6_hi);
// 2^6 * y * (log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1)))
let prod = DoubleDouble::quick_mult_f64(log2_x, y6);
// 2^6 * (y * log2(x) - (hi + mid)) = 2^6 * lo
let lo6 = if larger_exponent(prod.hi, lo6_hi) {
DoubleDouble::add(prod, lo6_hi_dd)
} else {
DoubleDouble::add(lo6_hi_dd, prod)
};
const EXP2_COEFFS: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c1abc9e3b398024, 0x3f862e42fefa39ef),
(0xbba5e43a5429bddb, 0x3f0ebfbdff82c58f),
(0xbb2d33162491268f, 0x3e8c6b08d704a0c0),
(0x3a94fb32d240a14e, 0x3e03b2ab6fba4e77),
(0x39ee84e916be83e0, 0x3d75d87fe78a6731),
(0xb989a447bfddc5e6, 0x3ce430912f86bfb8),
(0xb8e31a55719de47f, 0x3c4ffcbfc588ded9),
(0xb850ba57164eb36b, 0x3bb62c034beb8339),
(0xb7b8483eabd9642d, 0x3b1b5251ff97bee1),
];
let pp = f_polyeval10(
lo6,
DoubleDouble::from_bit_pair(EXP2_COEFFS[0]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[1]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[2]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[3]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[4]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[5]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[6]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[7]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[8]),
DoubleDouble::from_bit_pair(EXP2_COEFFS[9]),
);
let rr = DoubleDouble::quick_mult(exp2_hi_mid, pp);
// Make sure the sum is normalized:
let r = DoubleDouble::from_exact_add(rr.hi, rr.lo);
const FRACTION_MASK: u64 = (1u64 << 52) - 1;
let mut r_bits = r.hi.to_bits();
if ((r_bits & 0xfff_ffff) == 0) && (r.lo != 0.0) {
let hi_sign = r.hi.to_bits() >> 63;
let lo_sign = r.lo.to_bits() >> 63;
if hi_sign == lo_sign {
r_bits = r_bits.wrapping_add(1);
} else if (r_bits & FRACTION_MASK) > 0 {
r_bits = r_bits.wrapping_sub(1);
}
}
f64::from_bits(r_bits)
}
/// Power function
///
/// Max found ULP 0.5
#[inline]
pub fn f_powf(x: f32, y: f32) -> f32 {
let mut x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffff;
let mut y = y;
let y_u = y.to_bits();
let y_abs = y_u & 0x7fff_ffff;
let mut x = x;
if ((y_abs & 0x0007_ffff) == 0) || (y_abs > 0x4f170000) {
// y is signaling NaN
if x.is_nan() || y.is_nan() {
if y.abs() == 0. {
return 1.;
}
if x == 1. {
return 1.;
}
return f32::NAN;
}
// Exceptional exponents.
if y == 0.0 {
return 1.0;
}
match y_abs {
0x7f80_0000 => {
if x_abs > 0x7f80_0000 {
// pow(NaN, +-Inf) = NaN
return x;
}
if x_abs == 0x3f80_0000 {
// pow(+-1, +-Inf) = 1.0f
return 1.0;
}
if x == 0.0 && y_u == 0xff80_0000 {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f32::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0f
// pow (|x| > 1, -inf) = 0.0f
// pow (|x| > 1, +inf) = +inf
return if (x_abs < 0x3f80_0000) == (y_u == 0xff80_0000) {
f32::INFINITY
} else {
0.
};
}
_ => {
match y_u {
0x3f00_0000 => {
// pow(x, 1/2) = sqrt(x)
if x == 0.0 || x_u == 0xff80_0000 {
// pow(-0, 1/2) = +0
// pow(-inf, 1/2) = +inf
// Make sure it is correct for FTZ/DAZ.
return x * x;
}
let r = x.sqrt();
return if r.to_bits() != 0x8000_0000 { r } else { 0.0 };
}
0x3f80_0000 => {
return x;
} // y = 1.0f
0x4000_0000 => return x * x, // y = 2.0f
_ => {
let is_int = is_integerf(y);
if is_int && (y_u > 0x4000_0000) && (y_u <= 0x41c0_0000) {
// Check for exact cases when 2 < y < 25 and y is an integer.
let mut msb: i32 = if x_abs == 0 {
32 - 2
} else {
x_abs.leading_zeros() as i32
};
msb = if msb > 8 { msb } else { 8 };
let mut lsb: i32 = if x_abs == 0 {
0
} else {
x_abs.trailing_zeros() as i32
};
lsb = if lsb > 23 { 23 } else { lsb };
let extra_bits: i32 = 32 - 2 - lsb - msb;
let iter = y as i32;
if extra_bits * iter <= 23 + 2 {
// The result is either exact or exactly half-way.
// But it is exactly representable in double precision.
let x_d = x as f64;
let mut result = x_d;
for _ in 1..iter {
result *= x_d;
}
return result as f32;
}
}
if y_abs > 0x4f17_0000 {
// if y is NaN
if y_abs > 0x7f80_0000 {
if x_u == 0x3f80_0000 {
// x = 1.0f
// pow(1, NaN) = 1
return 1.0;
}
// pow(x, NaN) = NaN
return y;
}
// x^y will be overflow / underflow in single precision. Set y to a
// large enough exponent but not too large, so that the computations
// won't be overflow in double precision.
y = f32::from_bits((y_u & 0x8000_0000).wrapping_add(0x4f800000u32));
}
}
}
}
}
}
const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
let mut ex = -(E_BIAS as i32);
let mut sign: u64 = 0;
if ((x_u & 0x801f_ffffu32) == 0) || x_u >= 0x7f80_0000u32 || x_u < 0x0080_0000u32 {
if x.is_nan() {
return f32::NAN;
}
if x_u == 0x3f80_0000 {
return 1.;
}
let x_is_neg = x.to_bits() > 0x8000_0000;
if x == 0.0 {
let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
if y_u > 0x8000_0000u32 {
// pow(0, negative number) = inf
return if x_is_neg {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_abs == 0x7f80_0000u32 {
// x = +-Inf
let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u));
if y_u >= 0x7fff_ffff {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
if x_abs > 0x7f80_0000 {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// Normalize denormal inputs.
if x_abs < 0x0080_0000u32 {
ex = ex.wrapping_sub(64);
x *= f32::from_bits(0x5f800000);
}
// x is finite and negative, and y is a finite integer.
if x.is_sign_negative() {
if is_integerf(y) {
x = -x;
if is_odd_integerf(y) {
sign = 0x8000_0000_0000_0000u64;
}
} else {
// pow( negative, non-integer ) = NaN
return f32::NAN;
}
}
}
// x^y = 2^( y * log2(x) )
// = 2^( y * ( e_x + log2(m_x) ) )
// First we compute log2(x) = e_x + log2(m_x)
x_u = x.to_bits();
// Extract exponent field of x.
ex = ex.wrapping_add((x_u >> 23) as i32);
let e_x = ex as f64;
// Use the highest 7 fractional bits of m_x as the index for look up tables.
let x_mant = x_u & ((1u32 << 23) - 1);
let idx_x = (x_mant >> (23 - 7)) as i32;
// Add the hidden bit to the mantissa.
// 1 <= m_x < 2
let m_x = f32::from_bits(x_mant | 0x3f800000);
// Reduced argument for log2(m_x):
// dx = r * m_x - 1.
// The computation is exact, and -2^-8 <= dx < 2^-7.
// Then m_x = (1 + dx) / r, and
// log2(m_x) = log2( (1 + dx) / r )
// = log2(1 + dx) - log2(r).
let dx;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::logs::LOG_REDUCTION_F32;
dx = f_fmlaf(
m_x,
f32::from_bits(LOG_REDUCTION_F32.0[idx_x as usize]),
-1.0,
) as f64; // Exact.
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::logs::LOG_RANGE_REDUCTION;
dx = f_fmla(
m_x as f64,
f64::from_bits(LOG_RANGE_REDUCTION[idx_x as usize]),
-1.0,
); // Exact
}
// Degree-5 polynomial approximation:
// dx * P(dx) ~ log2(1 + dx)
// Generated by Sollya with:
// > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
// > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
// 0x1.653...p-52
const COEFFS: [u64; 6] = [
0x3ff71547652b82fe,
0xbfe71547652b7a07,
0x3fdec709dc458db1,
0xbfd715479c2266c9,
0x3fd2776ae1ddf8f0,
0xbfce7b2178870157,
];
let dx2 = dx * dx; // Exact
let c0 = f_fmla(dx, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
let c1 = f_fmla(dx, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
let c2 = f_fmla(dx, f64::from_bits(COEFFS[5]), f64::from_bits(COEFFS[4]));
let p = f_polyeval3(dx2, c0, c1, c2);
// s = e_x - log2(r) + dx * P(dx)
// Approximation errors:
// |log2(x) - s| < ulp(e_x) + (bounds on dx) * (error bounds of P(dx))
// = ulp(e_x) + 2^-7 * 2^-51
// < 2^8 * 2^-52 + 2^-7 * 2^-43
// ~ 2^-44 + 2^-50
let s = f_fmla(dx, p, f64::from_bits(LOG2_R[idx_x as usize]) + e_x);
// To compute 2^(y * log2(x)), we break the exponent into 3 parts:
// y * log(2) = hi + mid + lo, where
// hi is an integer
// mid * 2^6 is an integer
// |lo| <= 2^-7
// Then:
// x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
// In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
// and 2^lo ~ 1 + lo * P(lo).
// Thus, we have:
// hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
// If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
// bits, hence, if we use double precision to perform
// round( 2^6 * y * log2(x))
// the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
// In the following computations:
// y6 = 2^6 * y
// hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
// lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
let y6 = (y * f32::from_bits(0x42800000)) as f64; // Exact.
let hm = (s * y6).round_finite();
// let log2_rr = LOG2_R2_DD[idx_x as usize];
// // lo6 = 2^6 * lo.
// let lo6_hi = f_fmla(y6, e_x + f64::from_bits(log2_rr.1), -hm); // Exact
// // Error bounds:
// // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo)
// // < 2^-51 + 2^-75
// let lo6 = f_fmla(y6, f_fmla(dx, p, f64::from_bits(log2_rr.0)), lo6_hi);
// lo6 = 2^6 * lo.
let lo6_hi = f_fmla(y6, e_x + f64::from_bits(LOG2_R_TD[idx_x as usize].2), -hm); // Exact
// Error bounds:
// | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo)
// < 2^-51 + 2^-75
let lo6 = f_fmla(
y6,
f_fmla(dx, p, f64::from_bits(LOG2_R_TD[idx_x as usize].1)),
lo6_hi,
);
// |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
// Clamp the exponent part into smaller range that fits double precision.
// For those exponents that are out of range, the final conversion will round
// them correctly to inf/max float or 0/min float accordingly.
let mut hm_i = unsafe { hm.to_int_unchecked::<i64>() };
hm_i = if hm_i > (1i64 << 15) {
1 << 15
} else if hm_i < (-(1i64 << 15)) {
-(1 << 15)
} else {
hm_i
};
let idx_y = hm_i & 0x3f;
// 2^hi
let exp_hi_i = (hm_i >> 6).wrapping_shl(52);
// 2^mid
let exp_mid_i = EXP2_MID1[idx_y as usize].1;
// (-1)^sign * 2^hi * 2^mid
// Error <= 2^hi * 2^-53
let exp2_hi_mid_i = (exp_hi_i.wrapping_add(exp_mid_i as i64) as u64).wrapping_add(sign);
let exp2_hi_mid = f64::from_bits(exp2_hi_mid_i);
// Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
// Generated by Sollya with:
// > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
// > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
// 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
const EXP2_COEFFS: [u64; 6] = [
0x3ff0000000000000,
0x3f862e42fefa39ef,
0x3f0ebfbdff82a23a,
0x3e8c6b08d7076268,
0x3e03b2ad33f8b48b,
0x3d75d870c4d84445,
];
let lo6_sqr = lo6 * lo6;
let d0 = f_fmla(
lo6,
f64::from_bits(EXP2_COEFFS[1]),
f64::from_bits(EXP2_COEFFS[0]),
);
let d1 = f_fmla(
lo6,
f64::from_bits(EXP2_COEFFS[3]),
f64::from_bits(EXP2_COEFFS[2]),
);
let d2 = f_fmla(
lo6,
f64::from_bits(EXP2_COEFFS[5]),
f64::from_bits(EXP2_COEFFS[4]),
);
let pp = f_polyeval3(lo6_sqr, d0, d1, d2);
let r = pp * exp2_hi_mid;
let r_u = r.to_bits();
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
const ERR: u64 = 64;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
const ERR: u64 = 128;
let r_upper = f64::from_bits(r_u + ERR) as f32;
let r_lower = f64::from_bits(r_u - ERR) as f32;
if r_upper == r_lower {
return r_upper;
}
// Scale lower part of 2^(hi + mid)
let exp2_hi_mid_dd = DoubleDouble {
lo: if idx_y != 0 {
f64::from_bits((exp_hi_i as u64).wrapping_add(EXP2_MID1[idx_y as usize].0))
} else {
0.
},
hi: exp2_hi_mid,
};
let r_dd = powf_dd(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd);
r_dd as f32
}
/// Dirty fast pow
#[inline]
pub fn dirty_powf(d: f32, n: f32) -> f32 {
use crate::exponents::dirty_exp2f;
use crate::logs::dirty_log2f;
let value = d.abs();
let lg = dirty_log2f(value);
let c = dirty_exp2f(n * lg);
if d < 0.0 {
let y = n as i32;
if y % 2 == 0 { c } else { -c }
} else {
c
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn powf_test() {
assert!(
(powf(2f32, 3f32) - 8f32).abs() < 1e-6,
"Invalid result {}",
powf(2f32, 3f32)
);
assert!(
(powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
"Invalid result {}",
powf(0.5f32, 2f32)
);
}
#[test]
fn f_powf_test() {
assert!(
(f_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
"Invalid result {}",
f_powf(2f32, 3f32)
);
assert!(
(f_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
"Invalid result {}",
f_powf(0.5f32, 2f32)
);
assert_eq!(f_powf(0.5f32, 1.5432f32), 0.34312353);
assert_eq!(
f_powf(f32::INFINITY, 0.00000000000000000000000000000000038518824),
f32::INFINITY
);
assert_eq!(f_powf(f32::NAN, 0.0), 1.);
assert_eq!(f_powf(1., f32::NAN), 1.);
}
#[test]
fn dirty_powf_test() {
assert!(
(dirty_powf(2f32, 3f32) - 8f32).abs() < 1e-6,
"Invalid result {}",
dirty_powf(2f32, 3f32)
);
assert!(
(dirty_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6,
"Invalid result {}",
dirty_powf(0.5f32, 2f32)
);
}
}