/* * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::bits::biased_exponent_f64; use crate::common::*; use crate::double_double::DoubleDouble; use crate::exponents::expf; use crate::logf; use crate::logs::LOG2_R; use crate::polyeval::{f_polyeval3, f_polyeval6, f_polyeval10}; use crate::pow_tables::EXP2_MID1; use crate::powf_tables::{LOG2_R_TD, LOG2_R2_DD, POWF_R2}; use crate::round::RoundFinite; /// Power function for given value for const context. /// This is simplified version just to make a good approximation on const context. #[inline] pub const fn powf(d: f32, n: f32) -> f32 { let value = d.abs(); let c = expf(n * logf(value)); if n == 1. { return d; } if d < 0.0 { let y = n as i32; if y % 2 == 0 { c } else { -c } } else { c } } #[inline] const fn larger_exponent(a: f64, b: f64) -> bool { biased_exponent_f64(a) >= biased_exponent_f64(b) } // Calculate 2^(y * log2(x)) in double-double precision. // At this point we can reuse the following values: // idx_x: index for extra precision of log2 for the middle part of log2(x). // dx: the reduced argument for log2(x) // y6: 2^6 * y. // lo6_hi: the high part of 2^6 * (y - (hi + mid)) // exp2_hi_mid: high part of 2^(hi + mid) #[cold] #[inline(never)] fn powf_dd(idx_x: i32, dx: f64, y6: f64, lo6_hi: f64, exp2_hi_mid: DoubleDouble) -> f64 { // Perform a second range reduction step: // idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6) // dx2 = (1 + dx) * r2 - 1 // Output range: // -0x1.3ffcp-15 <= dx2 <= 0x1.3e3dp-15 let idx2 = f_fmla( dx, f64::from_bits(0x40d0000000000000), f64::from_bits(0x4050000000000000), ) .round_finite() as usize; let dx2 = f_fmla(1.0 + dx, f64::from_bits(POWF_R2[idx2]), -1.0); // Exact const COEFFS: [(u64, u64); 6] = [ (0x3c7777d0ffda25e0, 0x3ff71547652b82fe), (0xbc6777d101cf0a84, 0xbfe71547652b82fe), (0x3c7ce04b5140d867, 0x3fdec709dc3a03fd), (0x3c7137b47e635be5, 0xbfd71547652b82fb), (0xbc5b5a30b3bdb318, 0x3fd2776c516a92a2), (0x3c62d2fbd081e657, 0xbfcec70af1929ca6), ]; let dx_dd = DoubleDouble::new(0.0, dx2); let p = f_polyeval6( dx_dd, DoubleDouble::from_bit_pair(COEFFS[0]), DoubleDouble::from_bit_pair(COEFFS[1]), DoubleDouble::from_bit_pair(COEFFS[2]), DoubleDouble::from_bit_pair(COEFFS[3]), DoubleDouble::from_bit_pair(COEFFS[4]), DoubleDouble::from_bit_pair(COEFFS[5]), ); // log2(1 + dx2) ~ dx2 * P(dx2) let log2_x_lo = DoubleDouble::quick_mult_f64(p, dx2); // Lower parts of (e_x - log2(r1)) of the first range reduction constant let log2_r_td = LOG2_R_TD[idx_x as usize]; let log2_x_mid = DoubleDouble::new(f64::from_bits(log2_r_td.0), f64::from_bits(log2_r_td.1)); // -log2(r2) + lower part of (e_x - log2(r1)) let log2_x_m = DoubleDouble::add(DoubleDouble::from_bit_pair(LOG2_R2_DD[idx2]), log2_x_mid); // log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1)) // Since we don't know which one has larger exponent to apply Fast2Sum // algorithm, we need to check them before calling double-double addition. let log2_x = if larger_exponent(log2_x_m.hi, log2_x_lo.hi) { DoubleDouble::add(log2_x_m, log2_x_lo) } else { DoubleDouble::add(log2_x_lo, log2_x_m) }; let lo6_hi_dd = DoubleDouble::new(0.0, lo6_hi); // 2^6 * y * (log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1))) let prod = DoubleDouble::quick_mult_f64(log2_x, y6); // 2^6 * (y * log2(x) - (hi + mid)) = 2^6 * lo let lo6 = if larger_exponent(prod.hi, lo6_hi) { DoubleDouble::add(prod, lo6_hi_dd) } else { DoubleDouble::add(lo6_hi_dd, prod) }; const EXP2_COEFFS: [(u64, u64); 10] = [ (0x0000000000000000, 0x3ff0000000000000), (0x3c1abc9e3b398024, 0x3f862e42fefa39ef), (0xbba5e43a5429bddb, 0x3f0ebfbdff82c58f), (0xbb2d33162491268f, 0x3e8c6b08d704a0c0), (0x3a94fb32d240a14e, 0x3e03b2ab6fba4e77), (0x39ee84e916be83e0, 0x3d75d87fe78a6731), (0xb989a447bfddc5e6, 0x3ce430912f86bfb8), (0xb8e31a55719de47f, 0x3c4ffcbfc588ded9), (0xb850ba57164eb36b, 0x3bb62c034beb8339), (0xb7b8483eabd9642d, 0x3b1b5251ff97bee1), ]; let pp = f_polyeval10( lo6, DoubleDouble::from_bit_pair(EXP2_COEFFS[0]), DoubleDouble::from_bit_pair(EXP2_COEFFS[1]), DoubleDouble::from_bit_pair(EXP2_COEFFS[2]), DoubleDouble::from_bit_pair(EXP2_COEFFS[3]), DoubleDouble::from_bit_pair(EXP2_COEFFS[4]), DoubleDouble::from_bit_pair(EXP2_COEFFS[5]), DoubleDouble::from_bit_pair(EXP2_COEFFS[6]), DoubleDouble::from_bit_pair(EXP2_COEFFS[7]), DoubleDouble::from_bit_pair(EXP2_COEFFS[8]), DoubleDouble::from_bit_pair(EXP2_COEFFS[9]), ); let rr = DoubleDouble::quick_mult(exp2_hi_mid, pp); // Make sure the sum is normalized: let r = DoubleDouble::from_exact_add(rr.hi, rr.lo); const FRACTION_MASK: u64 = (1u64 << 52) - 1; let mut r_bits = r.hi.to_bits(); if ((r_bits & 0xfff_ffff) == 0) && (r.lo != 0.0) { let hi_sign = r.hi.to_bits() >> 63; let lo_sign = r.lo.to_bits() >> 63; if hi_sign == lo_sign { r_bits = r_bits.wrapping_add(1); } else if (r_bits & FRACTION_MASK) > 0 { r_bits = r_bits.wrapping_sub(1); } } f64::from_bits(r_bits) } /// Power function /// /// Max found ULP 0.5 #[inline] pub fn f_powf(x: f32, y: f32) -> f32 { let mut x_u = x.to_bits(); let x_abs = x_u & 0x7fff_ffff; let mut y = y; let y_u = y.to_bits(); let y_abs = y_u & 0x7fff_ffff; let mut x = x; if ((y_abs & 0x0007_ffff) == 0) || (y_abs > 0x4f170000) { // y is signaling NaN if x.is_nan() || y.is_nan() { if y.abs() == 0. { return 1.; } if x == 1. { return 1.; } return f32::NAN; } // Exceptional exponents. if y == 0.0 { return 1.0; } match y_abs { 0x7f80_0000 => { if x_abs > 0x7f80_0000 { // pow(NaN, +-Inf) = NaN return x; } if x_abs == 0x3f80_0000 { // pow(+-1, +-Inf) = 1.0f return 1.0; } if x == 0.0 && y_u == 0xff80_0000 { // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO return f32::INFINITY; } // pow (|x| < 1, -inf) = +inf // pow (|x| < 1, +inf) = 0.0f // pow (|x| > 1, -inf) = 0.0f // pow (|x| > 1, +inf) = +inf return if (x_abs < 0x3f80_0000) == (y_u == 0xff80_0000) { f32::INFINITY } else { 0. }; } _ => { match y_u { 0x3f00_0000 => { // pow(x, 1/2) = sqrt(x) if x == 0.0 || x_u == 0xff80_0000 { // pow(-0, 1/2) = +0 // pow(-inf, 1/2) = +inf // Make sure it is correct for FTZ/DAZ. return x * x; } let r = x.sqrt(); return if r.to_bits() != 0x8000_0000 { r } else { 0.0 }; } 0x3f80_0000 => { return x; } // y = 1.0f 0x4000_0000 => return x * x, // y = 2.0f _ => { let is_int = is_integerf(y); if is_int && (y_u > 0x4000_0000) && (y_u <= 0x41c0_0000) { // Check for exact cases when 2 < y < 25 and y is an integer. let mut msb: i32 = if x_abs == 0 { 32 - 2 } else { x_abs.leading_zeros() as i32 }; msb = if msb > 8 { msb } else { 8 }; let mut lsb: i32 = if x_abs == 0 { 0 } else { x_abs.trailing_zeros() as i32 }; lsb = if lsb > 23 { 23 } else { lsb }; let extra_bits: i32 = 32 - 2 - lsb - msb; let iter = y as i32; if extra_bits * iter <= 23 + 2 { // The result is either exact or exactly half-way. // But it is exactly representable in double precision. let x_d = x as f64; let mut result = x_d; for _ in 1..iter { result *= x_d; } return result as f32; } } if y_abs > 0x4f17_0000 { // if y is NaN if y_abs > 0x7f80_0000 { if x_u == 0x3f80_0000 { // x = 1.0f // pow(1, NaN) = 1 return 1.0; } // pow(x, NaN) = NaN return y; } // x^y will be overflow / underflow in single precision. Set y to a // large enough exponent but not too large, so that the computations // won't be overflow in double precision. y = f32::from_bits((y_u & 0x8000_0000).wrapping_add(0x4f800000u32)); } } } } } } const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32; let mut ex = -(E_BIAS as i32); let mut sign: u64 = 0; if ((x_u & 0x801f_ffffu32) == 0) || x_u >= 0x7f80_0000u32 || x_u < 0x0080_0000u32 { if x.is_nan() { return f32::NAN; } if x_u == 0x3f80_0000 { return 1.; } let x_is_neg = x.to_bits() > 0x8000_0000; if x == 0.0 { let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u)); if y_u > 0x8000_0000u32 { // pow(0, negative number) = inf return if x_is_neg { f32::NEG_INFINITY } else { f32::INFINITY }; } // pow(0, positive number) = 0 return if out_is_neg { -0.0 } else { 0.0 }; } if x_abs == 0x7f80_0000u32 { // x = +-Inf let out_is_neg = x_is_neg && is_odd_integerf(f32::from_bits(y_u)); if y_u >= 0x7fff_ffff { return if out_is_neg { -0.0 } else { 0.0 }; } return if out_is_neg { f32::NEG_INFINITY } else { f32::INFINITY }; } if x_abs > 0x7f80_0000 { // x is NaN. // pow (aNaN, 0) is already taken care above. return x; } // Normalize denormal inputs. if x_abs < 0x0080_0000u32 { ex = ex.wrapping_sub(64); x *= f32::from_bits(0x5f800000); } // x is finite and negative, and y is a finite integer. if x.is_sign_negative() { if is_integerf(y) { x = -x; if is_odd_integerf(y) { sign = 0x8000_0000_0000_0000u64; } } else { // pow( negative, non-integer ) = NaN return f32::NAN; } } } // x^y = 2^( y * log2(x) ) // = 2^( y * ( e_x + log2(m_x) ) ) // First we compute log2(x) = e_x + log2(m_x) x_u = x.to_bits(); // Extract exponent field of x. ex = ex.wrapping_add((x_u >> 23) as i32); let e_x = ex as f64; // Use the highest 7 fractional bits of m_x as the index for look up tables. let x_mant = x_u & ((1u32 << 23) - 1); let idx_x = (x_mant >> (23 - 7)) as i32; // Add the hidden bit to the mantissa. // 1 <= m_x < 2 let m_x = f32::from_bits(x_mant | 0x3f800000); // Reduced argument for log2(m_x): // dx = r * m_x - 1. // The computation is exact, and -2^-8 <= dx < 2^-7. // Then m_x = (1 + dx) / r, and // log2(m_x) = log2( (1 + dx) / r ) // = log2(1 + dx) - log2(r). let dx; #[cfg(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") ))] { use crate::logs::LOG_REDUCTION_F32; dx = f_fmlaf( m_x, f32::from_bits(LOG_REDUCTION_F32.0[idx_x as usize]), -1.0, ) as f64; // Exact. } #[cfg(not(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") )))] { use crate::logs::LOG_RANGE_REDUCTION; dx = f_fmla( m_x as f64, f64::from_bits(LOG_RANGE_REDUCTION[idx_x as usize]), -1.0, ); // Exact } // Degree-5 polynomial approximation: // dx * P(dx) ~ log2(1 + dx) // Generated by Sollya with: // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]); // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]); // 0x1.653...p-52 const COEFFS: [u64; 6] = [ 0x3ff71547652b82fe, 0xbfe71547652b7a07, 0x3fdec709dc458db1, 0xbfd715479c2266c9, 0x3fd2776ae1ddf8f0, 0xbfce7b2178870157, ]; let dx2 = dx * dx; // Exact let c0 = f_fmla(dx, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0])); let c1 = f_fmla(dx, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2])); let c2 = f_fmla(dx, f64::from_bits(COEFFS[5]), f64::from_bits(COEFFS[4])); let p = f_polyeval3(dx2, c0, c1, c2); // s = e_x - log2(r) + dx * P(dx) // Approximation errors: // |log2(x) - s| < ulp(e_x) + (bounds on dx) * (error bounds of P(dx)) // = ulp(e_x) + 2^-7 * 2^-51 // < 2^8 * 2^-52 + 2^-7 * 2^-43 // ~ 2^-44 + 2^-50 let s = f_fmla(dx, p, f64::from_bits(LOG2_R[idx_x as usize]) + e_x); // To compute 2^(y * log2(x)), we break the exponent into 3 parts: // y * log(2) = hi + mid + lo, where // hi is an integer // mid * 2^6 is an integer // |lo| <= 2^-7 // Then: // x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo, // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements, // and 2^lo ~ 1 + lo * P(lo). // Thus, we have: // hi + mid = 2^-6 * round( 2^6 * y * log2(x) ) // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6) // bits, hence, if we use double precision to perform // round( 2^6 * y * log2(x)) // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38 // In the following computations: // y6 = 2^6 * y // hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s) // lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm. let y6 = (y * f32::from_bits(0x42800000)) as f64; // Exact. let hm = (s * y6).round_finite(); // let log2_rr = LOG2_R2_DD[idx_x as usize]; // // lo6 = 2^6 * lo. // let lo6_hi = f_fmla(y6, e_x + f64::from_bits(log2_rr.1), -hm); // Exact // // Error bounds: // // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo) // // < 2^-51 + 2^-75 // let lo6 = f_fmla(y6, f_fmla(dx, p, f64::from_bits(log2_rr.0)), lo6_hi); // lo6 = 2^6 * lo. let lo6_hi = f_fmla(y6, e_x + f64::from_bits(LOG2_R_TD[idx_x as usize].2), -hm); // Exact // Error bounds: // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo) // < 2^-51 + 2^-75 let lo6 = f_fmla( y6, f_fmla(dx, p, f64::from_bits(LOG2_R_TD[idx_x as usize].1)), lo6_hi, ); // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2 // Clamp the exponent part into smaller range that fits double precision. // For those exponents that are out of range, the final conversion will round // them correctly to inf/max float or 0/min float accordingly. let mut hm_i = unsafe { hm.to_int_unchecked::() }; hm_i = if hm_i > (1i64 << 15) { 1 << 15 } else if hm_i < (-(1i64 << 15)) { -(1 << 15) } else { hm_i }; let idx_y = hm_i & 0x3f; // 2^hi let exp_hi_i = (hm_i >> 6).wrapping_shl(52); // 2^mid let exp_mid_i = EXP2_MID1[idx_y as usize].1; // (-1)^sign * 2^hi * 2^mid // Error <= 2^hi * 2^-53 let exp2_hi_mid_i = (exp_hi_i.wrapping_add(exp_mid_i as i64) as u64).wrapping_add(sign); let exp2_hi_mid = f64::from_bits(exp2_hi_mid_i); // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo). // Generated by Sollya with: // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]); // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]); // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60 const EXP2_COEFFS: [u64; 6] = [ 0x3ff0000000000000, 0x3f862e42fefa39ef, 0x3f0ebfbdff82a23a, 0x3e8c6b08d7076268, 0x3e03b2ad33f8b48b, 0x3d75d870c4d84445, ]; let lo6_sqr = lo6 * lo6; let d0 = f_fmla( lo6, f64::from_bits(EXP2_COEFFS[1]), f64::from_bits(EXP2_COEFFS[0]), ); let d1 = f_fmla( lo6, f64::from_bits(EXP2_COEFFS[3]), f64::from_bits(EXP2_COEFFS[2]), ); let d2 = f_fmla( lo6, f64::from_bits(EXP2_COEFFS[5]), f64::from_bits(EXP2_COEFFS[4]), ); let pp = f_polyeval3(lo6_sqr, d0, d1, d2); let r = pp * exp2_hi_mid; let r_u = r.to_bits(); #[cfg(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") ))] const ERR: u64 = 64; #[cfg(not(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") )))] const ERR: u64 = 128; let r_upper = f64::from_bits(r_u + ERR) as f32; let r_lower = f64::from_bits(r_u - ERR) as f32; if r_upper == r_lower { return r_upper; } // Scale lower part of 2^(hi + mid) let exp2_hi_mid_dd = DoubleDouble { lo: if idx_y != 0 { f64::from_bits((exp_hi_i as u64).wrapping_add(EXP2_MID1[idx_y as usize].0)) } else { 0. }, hi: exp2_hi_mid, }; let r_dd = powf_dd(idx_x, dx, y6, lo6_hi, exp2_hi_mid_dd); r_dd as f32 } /// Dirty fast pow #[inline] pub fn dirty_powf(d: f32, n: f32) -> f32 { use crate::exponents::dirty_exp2f; use crate::logs::dirty_log2f; let value = d.abs(); let lg = dirty_log2f(value); let c = dirty_exp2f(n * lg); if d < 0.0 { let y = n as i32; if y % 2 == 0 { c } else { -c } } else { c } } #[cfg(test)] mod tests { use super::*; #[test] fn powf_test() { assert!( (powf(2f32, 3f32) - 8f32).abs() < 1e-6, "Invalid result {}", powf(2f32, 3f32) ); assert!( (powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, "Invalid result {}", powf(0.5f32, 2f32) ); } #[test] fn f_powf_test() { assert!( (f_powf(2f32, 3f32) - 8f32).abs() < 1e-6, "Invalid result {}", f_powf(2f32, 3f32) ); assert!( (f_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, "Invalid result {}", f_powf(0.5f32, 2f32) ); assert_eq!(f_powf(0.5f32, 1.5432f32), 0.34312353); assert_eq!( f_powf(f32::INFINITY, 0.00000000000000000000000000000000038518824), f32::INFINITY ); assert_eq!(f_powf(f32::NAN, 0.0), 1.); assert_eq!(f_powf(1., f32::NAN), 1.); } #[test] fn dirty_powf_test() { assert!( (dirty_powf(2f32, 3f32) - 8f32).abs() < 1e-6, "Invalid result {}", dirty_powf(2f32, 3f32) ); assert!( (dirty_powf(0.5f32, 2f32) - 0.25f32).abs() < 1e-6, "Invalid result {}", dirty_powf(0.5f32, 2f32) ); } }