/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer}; use crate::double_double::DoubleDouble; use crate::polyeval::{f_polyeval3, f_polyeval4}; use crate::round::RoundFinite; use crate::sin::SinCos; use crate::sincospi_tables::SINPI_K_PI_OVER_64; /** Cospi(x) on [0; 0.000244140625] Generated by Sollya: ```text d = [0, 0.000244140625]; f_cos = cos(y*pi); Q = fpminimax(f_cos, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating); ``` See ./notes/cospi_zero_dd.sollya **/ #[cold] fn as_cospi_zero(x: f64) -> f64 { const C: [(u64, u64); 5] = [ (0xbcb692b71366cc04, 0xc013bd3cc9be45de), (0xbcb32b33fb803bd5, 0x40103c1f081b5ac4), (0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9), (0x3c30023d540b9350, 0x3fce1f506446cb66), (0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c), ]; let x2 = DoubleDouble::from_exact_mult(x, x); let mut p = DoubleDouble::quick_mul_add( x2, DoubleDouble::from_bit_pair(C[3]), DoubleDouble::from_bit_pair(C[3]), ); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2])); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1])); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0])); p = DoubleDouble::mul_add_f64(x2, p, 1.); p.to_f64() } /** Sinpi on range [0.0, 0.03515625] Generated poly by Sollya: ```text d = [0, 0.03515625]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating); ``` See ./notes/sinpi_zero_dd.sollya **/ #[cold] fn as_sinpi_zero(x: f64) -> f64 { const C: [(u64, u64); 6] = [ (0x3ca1a626311d9056, 0x400921fb54442d18), (0x3cb055f12c462211, 0xc014abbce625be53), (0xbc9789ea63534250, 0x400466bc6775aae1), (0xbc78b86de6962184, 0xbfe32d2cce62874e), (0x3c4eddf7cd887302, 0x3fb507833e2b781f), (0x3bf180c9d4af2894, 0xbf7e2ea4e143707e), ]; let x2 = DoubleDouble::from_exact_mult(x, x); let mut p = DoubleDouble::quick_mul_add( x2, DoubleDouble::from_bit_pair(C[5]), DoubleDouble::from_bit_pair(C[4]), ); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3])); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2])); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1])); p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0])); p = DoubleDouble::quick_mult_f64(p, x); p.to_f64() } // Return k and y, where // k = round(x * 64 / pi) and y = (x * 64 / pi) - k. #[inline] pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) { let kd = (x * 64.).round_finite(); let y = dd_fmla(kd, -1. / 64., x); (y, unsafe { kd.to_int_unchecked::() // indeterminate values is always filtered out before this call, as well only lowest bits are used }) } #[inline] pub(crate) fn sincospi_eval(x: f64) -> SinCos { let x2 = x * x; /* sinpi(pi*x) poly generated by Sollya: d = [0, 0.0078128]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|107, D...|], d, relative, floating); See ./notes/sinpi.sollya */ let sin_lop = f_polyeval3( x2, f64::from_bits(0xc014abbce625be4d), f64::from_bits(0x400466bc6767f259), f64::from_bits(0xbfe32d176b0b3baf), ) * x2; // We're splitting polynomial in two parts, since first term dominates // we compute: (a0_lo + a0_hi) * x + x * (a1 * x^2 + a2 + x^4) ... let sin_lo = dd_fmla(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x); let sin_hi = x * f64::from_bits(0x400921fb54442d18); /* cospi(pi*x) poly generated by Sollya: d = [0, 0.015625]; f_cos = cos(y*pi); Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, D...|], d, relative, floating); See ./notes/cospi.sollya */ let p = f_polyeval3( x2, f64::from_bits(0xc013bd3cc9be45cf), f64::from_bits(0x40103c1f08085ad1), f64::from_bits(0xbff55d1e43463fc3), ); // We're splitting polynomial in two parts, since first term dominates // we compute: (a0_lo + a0_hi) + (a1 * x^2 + a2 + x^4)... let cos_lo = dd_fmla(p, x2, f64::from_bits(0xbbdf72adefec0800)); let cos_hi = f64::from_bits(0x3ff0000000000000); let err = f_fmla( x2, f64::from_bits(0x3cb0000000000000), // 2^-52 f64::from_bits(0x3c40000000000000), // 2^-59 ); SinCos { v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo), v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo), err, } } #[inline] pub(crate) fn sincospi_eval_dd(x: f64) -> SinCos { let x2 = DoubleDouble::from_exact_mult(x, x); // Sin coeffs // poly sin(pi*x) generated by Sollya: // d = [0, 0.0078128]; // f_sin = sin(y*pi)/y; // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating); // see ./notes/sinpi_dd.sollya const SC: [(u64, u64); 5] = [ (0x3ca1a626330ccf19, 0x400921fb54442d18), (0x3cb05540f6323de9, 0xc014abbce625be53), (0xbc9050fdd1229756, 0x400466bc6775aadf), (0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1), (0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182), ]; let mut sin_y = DoubleDouble::quick_mul_add( x2, DoubleDouble::from_bit_pair(SC[4]), DoubleDouble::from_bit_pair(SC[3]), ); sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2])); sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1])); sin_y = DoubleDouble::quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0])); sin_y = DoubleDouble::quick_mult_f64(sin_y, x); // Cos coeffs // d = [0, 0.0078128]; // f_cos = cos(y*pi); // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating); // See ./notes/cospi_dd.sollya const CC: [(u64, u64); 5] = [ (0xbaaa70a580000000, 0x3ff0000000000000), (0xbcb69211d8dd1237, 0xc013bd3cc9be45de), (0xbcbd96cfd637eeb7, 0x40103c1f081b5abf), (0x3c994d75c577f029, 0xbff55d3c7e2e4ba5), (0xbc5c542d998a4e48, 0x3fce1f2f5f747411), ]; let mut cos_y = DoubleDouble::quick_mul_add( x2, DoubleDouble::from_bit_pair(CC[4]), DoubleDouble::from_bit_pair(CC[3]), ); cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2])); cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1])); cos_y = DoubleDouble::quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0])); SinCos { v_sin: sin_y, v_cos: cos_y, err: 0., } } #[cold] fn sinpi_dd(x: f64, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 { let r_sincos = sincospi_eval_dd(x); let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); let rr = DoubleDouble::mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y); rr.to_f64() } #[cold] fn sincospi_dd( x: f64, sin_sin_k: DoubleDouble, sin_cos_k: DoubleDouble, cos_sin_k: DoubleDouble, cos_cos_k: DoubleDouble, ) -> (f64, f64) { let r_sincos = sincospi_eval_dd(x); let cos_k_sin_y = DoubleDouble::quick_mult(sin_cos_k, r_sincos.v_sin); let rr_sin = DoubleDouble::mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y); let cos_k_sin_y = DoubleDouble::quick_mult(cos_cos_k, r_sincos.v_sin); let rr_cos = DoubleDouble::mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y); (rr_sin.to_f64(), rr_cos.to_f64()) } // [sincospi_eval] gives precision around 2^-66 what is not enough for DD case this method gives 2^-84 #[inline] fn sincospi_eval_extended(x: f64) -> SinCos { let x2 = DoubleDouble::from_exact_mult(x, x); /* sinpi(pi*x) poly generated by Sollya: d = [0, 0.0078128]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating); See ./notes/sinpi.sollya */ let sin_lop = f_polyeval3( x2.hi, f64::from_bits(0x400466bc67763662), f64::from_bits(0xbfe32d2cce5aad86), f64::from_bits(0x3fb5077099a1f35b), ); let mut v_sin = DoubleDouble::mul_f64_add( x2, sin_lop, DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)), ); v_sin = DoubleDouble::mul_add( x2, v_sin, DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)), ); v_sin = DoubleDouble::quick_mult_f64(v_sin, x); /* cospi(pi*x) poly generated by Sollya: d = [0, 0.015625]; f_cos = cos(y*pi); Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating); See ./notes/cospi_fast_dd.sollya */ let p = f_polyeval3( x2.hi, f64::from_bits(0x40103c1f081b5abf), f64::from_bits(0xbff55d3c7e2edd89), f64::from_bits(0x3fce1f2fd9d79484), ); let mut v_cos = DoubleDouble::mul_f64_add( x2, p, DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)), ); v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000)); SinCos { v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo), v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo), err: 0., } } pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble { let ix = x.to_bits(); let ax = ix & 0x7fff_ffff_ffff_ffff; if ax == 0 { return DoubleDouble::new(0., 0.); } let e: i32 = (ax >> 52) as i32; let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52); let sgn: i64 = (ix as i64) >> 63; let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn); let mut s: i32 = 1063i32.wrapping_sub(e); if s < 0 { s = -s - 1; if s > 10 { return DoubleDouble::new(0., f64::copysign(0.0, x)); } let iq: u64 = (m as u64).wrapping_shl(s as u32); if (iq & 2047) == 0 { return DoubleDouble::new(0., f64::copysign(0.0, x)); } } if ax <= 0x3fa2000000000000u64 { // |x| <= 0.03515625 const PI: DoubleDouble = DoubleDouble::new( f64::from_bits(0x3ca1a62633145c07), f64::from_bits(0x400921fb54442d18), ); if ax < 0x3c90000000000000 { // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those // of the function pi*x, and if x is a worst case, then 2*x is another // one in the next binade. For this reason, worst cases are only included // for the binade [2^-1022, 2^-1021). For larger binades, // up to [2^-54,2^-53), worst cases should be deduced by multiplying // by some power of 2. if ax < 0x0350000000000000 { let t = x * f64::from_bits(0x4690000000000000); let z = DoubleDouble::quick_mult_f64(PI, t); let r = z.to_f64(); let rs = r * f64::from_bits(0x3950000000000000); let rt = rs * f64::from_bits(0x4690000000000000); return DoubleDouble::new( 0., dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs), ); } let z = DoubleDouble::quick_mult_f64(PI, x); return z; } /* Poly generated by Sollya: d = [0, 0.03515625]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, 107, D...|], d, relative, floating); See ./notes/sinpi_zero_fast_dd.sollya */ const C: [u64; 4] = [ 0xbfe32d2cce62bd85, 0x3fb50783487eb73d, 0xbf7e3074f120ad1f, 0x3f3e8d9011340e5a, ]; let x2 = DoubleDouble::from_exact_mult(x, x); const C_PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18)); let p = f_polyeval4( x2.hi, f64::from_bits(C[0]), f64::from_bits(C[1]), f64::from_bits(C[2]), f64::from_bits(C[3]), ); let mut r = DoubleDouble::mul_f64_add( x2, p, DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)), ); r = DoubleDouble::mul_add( x2, r, DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)), ); r = DoubleDouble::mul_add(r, x2, C_PI); r = DoubleDouble::quick_mult_f64(r, x); let k = DoubleDouble::from_exact_add(r.hi, r.lo); return k; } let si = e.wrapping_sub(1011); if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 { // x is integer or half-integer if (m0.wrapping_shl(si as u32)) == 0 { return DoubleDouble::new(0., f64::copysign(0.0, x)); // x is integer } let t = (m0.wrapping_shl((si - 1) as u32)) >> 63; // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2 return DoubleDouble::new( 0., if t == 0 { f64::copysign(1.0, x) } else { -f64::copysign(1.0, x) }, ); } let (y, k) = reduce_pi_64(x); // // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64). let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]); let cos_k = DoubleDouble::from_bit_pair( SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize], ); let r_sincos = sincospi_eval_extended(y); let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos); let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); // sin_k_cos_y is always >> cos_k_sin_y let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; DoubleDouble::from_exact_add(rr.hi, rr.lo) } /// Computes sin(PI*x) /// /// Max ULP 0.5 pub fn f_sinpi(x: f64) -> f64 { let ix = x.to_bits(); let ax = ix & 0x7fff_ffff_ffff_ffff; if ax == 0 { return x; } let e: i32 = (ax >> 52) as i32; let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52); let sgn: i64 = (ix as i64) >> 63; let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn); let mut s: i32 = 1063i32.wrapping_sub(e); if s < 0 { if e == 0x7ff { if (ix << 12) == 0 { return f64::NAN; } return x + x; // case x=NaN } s = -s - 1; if s > 10 { return f64::copysign(0.0, x); } let iq: u64 = (m as u64).wrapping_shl(s as u32); if (iq & 2047) == 0 { return f64::copysign(0.0, x); } } if ax <= 0x3fa2000000000000u64 { // |x| <= 0.03515625 const PI: DoubleDouble = DoubleDouble::new( f64::from_bits(0x3ca1a62633145c07), f64::from_bits(0x400921fb54442d18), ); if ax < 0x3c90000000000000 { // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those // of the function pi*x, and if x is a worst case, then 2*x is another // one in the next binade. For this reason, worst cases are only included // for the binade [2^-1022, 2^-1021). For larger binades, // up to [2^-54,2^-53), worst cases should be deduced by multiplying // by some power of 2. if ax < 0x0350000000000000 { let t = x * f64::from_bits(0x4690000000000000); let z = DoubleDouble::quick_mult_f64(PI, t); let r = z.to_f64(); let rs = r * f64::from_bits(0x3950000000000000); let rt = rs * f64::from_bits(0x4690000000000000); return dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs); } let z = DoubleDouble::quick_mult_f64(PI, x); return z.to_f64(); } /* Poly generated by Sollya: d = [0, 0.03515625]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating); See ./notes/sinpi_zero.sollya */ let x2 = x * x; let x3 = x2 * x; let x4 = x2 * x2; let eps = x * f_fmla( x2, f64::from_bits(0x3d00000000000000), // 2^-47 f64::from_bits(0x3bd0000000000000), // 2^-66 ); const C: [u64; 4] = [ 0xc014abbce625be51, 0x400466bc67754b46, 0xbfe32d2cc12a51f4, 0x3fb5060540058476, ]; const C_PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18)); let mut z = DoubleDouble::quick_mult_f64(C_PI, x); let zl0 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0])); let zl1 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2])); z.lo = f_fmla(x3, f_fmla(x4, zl1, zl0), z.lo); let lb = z.hi + (z.lo - eps); let ub = z.hi + (z.lo + eps); if lb == ub { return lb; } return as_sinpi_zero(x); } let si = e.wrapping_sub(1011); if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 { // x is integer or half-integer if (m0.wrapping_shl(si as u32)) == 0 { return f64::copysign(0.0, x); // x is integer } let t = (m0.wrapping_shl((si - 1) as u32)) >> 63; // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2 return if t == 0 { f64::copysign(1.0, x) } else { -f64::copysign(1.0, x) }; } let (y, k) = reduce_pi_64(x); // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64). let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]); let cos_k = DoubleDouble::from_bit_pair( SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize], ); let r_sincos = sincospi_eval(y); let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos); let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); // sin_k_cos_y is always >> cos_k_sin_y let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR); let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR); if ub == lb { return rr.to_f64(); } sinpi_dd(y, sin_k, cos_k) } /// Computes cos(PI*x) /// /// Max found ULP 0.5 pub fn f_cospi(x: f64) -> f64 { let ix = x.to_bits(); let ax = ix & 0x7fff_ffff_ffff_ffff; if ax == 0 { return 1.0; } let e: i32 = (ax >> 52) as i32; // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022) let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64; let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s) if s < 0 { // |x| >= 2^41 if e == 0x7ff { // NaN or Inf if ix.wrapping_shl(12) == 0 { return f64::NAN; } return x + x; // NaN } s = -s - 1; // now 2^(41+s) <= |x| < 2^(42+s) if s > 11 { return 1.0; } // |x| >= 2^53 let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024); if (iq & 2047) == 0 { return 0.0; } } if ax <= 0x3f30000000000000u64 { // |x| <= 2^-12, |x| <= 0.000244140625 if ax <= 0x3e2ccf6429be6621u64 { return 1.0 - f64::from_bits(0x3c80000000000000); } let x2 = x * x; let x4 = x2 * x2; let eps = x2 * f64::from_bits(0x3cfa000000000000); /* Generated by Sollya: d = [0, 0.000244140625]; f_cos = cos(y*pi); Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating); See ./notes/cospi.sollya */ const C: [u64; 4] = [ 0xc013bd3cc9be45de, 0x40103c1f081b5ac4, 0xbff55d3c7ff79b60, 0x3fd24c7b6f7d0690, ]; let p0 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2])); let p1 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0])); let p = x2 * f_fmla(x4, p0, p1); let lb = (p - eps) + 1.; let ub = (p + eps) + 1.; if lb == ub { return lb; } return as_cospi_zero(x); } let si: i32 = e.wrapping_sub(1011); if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 { return 0.0; } let (y, k) = reduce_pi_64(x); // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64). let msin_k = DoubleDouble::from_bit_pair( SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize], ); let cos_k = DoubleDouble::from_bit_pair( SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize], ); let r_sincos = sincospi_eval(y); let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k); let cos_k_msin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k); // cos_k_cos_y is always >> cos_k_msin_y let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi); rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo; let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR); let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR); if ub == lb { return rr.to_f64(); } sinpi_dd(y, cos_k, msin_k) } /// Computes sin(PI*x) and cos(PI*x) /// /// Max found ULP 0.5 pub fn f_sincospi(x: f64) -> (f64, f64) { let ix = x.to_bits(); let ax = ix & 0x7fff_ffff_ffff_ffff; if ax == 0 { return (x, 1.0); } let e: i32 = (ax >> 52) as i32; // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022) let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52); let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64; let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s) if s < 0 { // |x| >= 2^41 if e == 0x7ff { // NaN or Inf if ix.wrapping_shl(12) == 0 { return (f64::NAN, f64::NAN); } return (x + x, x + x); // NaN } s = -s - 1; if s > 10 { static CF: [f64; 2] = [1., -1.]; let is_odd = is_odd_integer(f64::from_bits(ax)); let cos_x = CF[is_odd as usize]; return (f64::copysign(0.0, x), cos_x); } // |x| >= 2^53 let iq: u64 = (m as u64).wrapping_shl(s as u32); // sinpi = 0 when multiple of 2048 let sin_zero = (iq & 2047) == 0; // cospi = 0 when offset-by-half multiple of 2048 let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0; if sin_zero && cos_zero { // both zero (only possible if NaN or something degenerate) } else if sin_zero { static CF: [f64; 2] = [1., -1.]; let is_odd = is_odd_integer(f64::from_bits(ax)); let cos_x = CF[is_odd as usize]; return (0.0, cos_x); // sin = 0, cos = ±1 } else if cos_zero { // x = k / 2 * PI let si = e.wrapping_sub(1011); let t = (m0.wrapping_shl((si - 1) as u32)) >> 63; // making sin decision based on quadrant return if t == 0 { (f64::copysign(1.0, x), 0.0) } else { (-f64::copysign(1.0, x), 0.0) }; // sin = ±1, cos = 0 } } if ax <= 0x3f30000000000000u64 { // |x| <= 2^-12, |x| <= 0.000244140625 if ax <= 0x3c90000000000000u64 { const PI: DoubleDouble = DoubleDouble::new( f64::from_bits(0x3ca1a62633145c07), f64::from_bits(0x400921fb54442d18), ); let sin_x = if ax < 0x0350000000000000 { let t = x * f64::from_bits(0x4690000000000000); let z = DoubleDouble::quick_mult_f64(PI, t); let r = z.to_f64(); let rs = r * f64::from_bits(0x3950000000000000); let rt = rs * f64::from_bits(0x4690000000000000); dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs) } else { let z = DoubleDouble::quick_mult_f64(PI, x); z.to_f64() }; return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000)); } let x2 = x * x; let x4 = x2 * x2; let cos_eps = x2 * f64::from_bits(0x3cfa000000000000); /* Generated by Sollya: d = [0, 0.000244140625]; f_cos = cos(y*pi); Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating); See ./notes/cospi.sollya */ const COS_C: [u64; 4] = [ 0xc013bd3cc9be45de, 0x40103c1f081b5ac4, 0xbff55d3c7ff79b60, 0x3fd24c7b6f7d0690, ]; let p0 = f_fmla(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2])); let p1 = f_fmla(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0])); let p = x2 * f_fmla(x4, p0, p1); let cos_lb = (p - cos_eps) + 1.; let cos_ub = (p + cos_eps) + 1.; let cos_x = if cos_lb == cos_ub { cos_lb } else { as_cospi_zero(x) }; /* Poly generated by Sollya: d = [0, 0.03515625]; f_sin = sin(y*pi)/y; Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating); See ./notes/sinpi_zero.sollya */ const SIN_C: [u64; 4] = [ 0xc014abbce625be51, 0x400466bc67754b46, 0xbfe32d2cc12a51f4, 0x3fb5060540058476, ]; const C_PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18)); let mut z = DoubleDouble::quick_mult_f64(C_PI, x); let x3 = x2 * x; let zl0 = f_fmla(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0])); let zl1 = f_fmla(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2])); let sin_eps = x * f_fmla( x2, f64::from_bits(0x3d00000000000000), // 2^-47 f64::from_bits(0x3bd0000000000000), // 2^-66 ); z.lo = f_fmla(x3, f_fmla(x4, zl1, zl0), z.lo); let sin_lb = z.hi + (z.lo - sin_eps); let sin_ub = z.hi + (z.lo + sin_eps); let sin_x = if sin_lb == sin_ub { sin_lb } else { as_sinpi_zero(x) }; return (sin_x, cos_x); } let si = e.wrapping_sub(1011); if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 { // x is integer or half-integer if (m0.wrapping_shl(si as u32)) == 0 { static CF: [f64; 2] = [1., -1.]; let is_odd = is_odd_integer(f64::from_bits(ax)); let cos_x = CF[is_odd as usize]; return (f64::copysign(0.0, x), cos_x); // x is integer } // x is half-integer let t = (m0.wrapping_shl((si - 1) as u32)) >> 63; // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2 return if t == 0 { (f64::copysign(1.0, x), 0.0) } else { (-f64::copysign(1.0, x), 0.0) }; } let (y, k) = reduce_pi_64(x); // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64). let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]); let cos_k = DoubleDouble::from_bit_pair( SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize], ); let msin_k = -sin_k; let r_sincos = sincospi_eval(y); let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos); let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin); let cos_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, cos_k); let msin_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, msin_k); // sin_k_cos_y is always >> cos_k_sin_y let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi); rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo; let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); // (rr.lo + ERR); let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err); // (rr.lo - ERR); let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi); rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo; let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); // (rr.lo + ERR); let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err); // (rr.lo - ERR); if sin_ub == sin_lb && cos_lb == cos_ub { return (rr_sin.to_f64(), rr_cos.to_f64()); } sincospi_dd(y, sin_k, cos_k, cos_k, msin_k) } #[cfg(test)] mod tests { use super::*; #[test] fn test_sinpi() { assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883); assert_eq!(f_sinpi(7124076477593855.), 0.); assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.); assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0); assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004); assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763); assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724); assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457); assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661); assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751); assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316); assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563); assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927); assert_eq!( f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005 ); assert_eq!( f_sinpi(0.000000000000000000000000000000000000007413093439574428), 2.3288919890141717e-38 ); assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578); assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003); assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186); assert!(f_sinpi(f64::INFINITY).is_nan()); assert!(f_sinpi(f64::NEG_INFINITY).is_nan()); assert!(f_sinpi(f64::NAN).is_nan()); } #[test] fn test_sincospi() { let v0 = f_sincospi(1.0156097449358867); assert_eq!(v0.0, f_sinpi(1.0156097449358867)); assert_eq!(v0.1, f_cospi(1.0156097449358867)); let v1 = f_sincospi(4503599627370496.); assert_eq!(v1.0, f_sinpi(4503599627370496.)); assert_eq!(v1.1, f_cospi(4503599627370496.)); let v1 = f_sincospi(-108.); assert_eq!(v1.0, f_sinpi(-108.)); assert_eq!(v1.1, f_cospi(-108.)); let v1 = f_sincospi(3.); assert_eq!(v1.0, f_sinpi(3.)); assert_eq!(v1.1, f_cospi(3.)); let v1 = f_sincospi(13.5); assert_eq!(v1.0, f_sinpi(13.5)); assert_eq!(v1.1, f_cospi(13.5)); let v1 = f_sincospi(7124076477593855.); assert_eq!(v1.0, f_sinpi(7124076477593855.)); assert_eq!(v1.1, f_cospi(7124076477593855.)); let v1 = f_sincospi(2533419148247186.5); assert_eq!(v1.0, f_sinpi(2533419148247186.5)); assert_eq!(v1.1, f_cospi(2533419148247186.5)); let v1 = f_sincospi(2.2250653705240375E-308); assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308)); assert_eq!(v1.1, f_cospi(2.2250653705240375E-308)); let v1 = f_sincospi(2533420818956351.); assert_eq!(v1.0, f_sinpi(2533420818956351.)); assert_eq!(v1.1, f_cospi(2533420818956351.)); let v1 = f_sincospi(2533822406803233.5); assert_eq!(v1.0, f_sinpi(2533822406803233.5)); assert_eq!(v1.1, f_cospi(2533822406803233.5)); let v1 = f_sincospi(-3040685725640478.5); assert_eq!(v1.0, f_sinpi(-3040685725640478.5)); assert_eq!(v1.1, f_cospi(-3040685725640478.5)); let v1 = f_sincospi(2533419148247186.5); assert_eq!(v1.0, f_sinpi(2533419148247186.5)); assert_eq!(v1.1, f_cospi(2533419148247186.5)); let v1 = f_sincospi(2533420819267583.5); assert_eq!(v1.0, f_sinpi(2533420819267583.5)); assert_eq!(v1.1, f_cospi(2533420819267583.5)); let v1 = f_sincospi(6979704728846336.); assert_eq!(v1.0, f_sinpi(6979704728846336.)); assert_eq!(v1.1, f_cospi(6979704728846336.)); let v1 = f_sincospi(7124076477593855.); assert_eq!(v1.0, f_sinpi(7124076477593855.)); assert_eq!(v1.1, f_cospi(7124076477593855.)); let v1 = f_sincospi(-0.00000000002728839192371484); assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484)); assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484)); let v1 = f_sincospi(0.00002465398569495569); assert_eq!(v1.0, f_sinpi(0.00002465398569495569)); assert_eq!(v1.1, f_cospi(0.00002465398569495569)); } #[test] fn test_cospi() { assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445)); assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445)); assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445)); assert!(f_cospi(f64::INFINITY).is_nan()); assert!(f_cospi(f64::NEG_INFINITY).is_nan()); assert!(f_cospi(f64::NAN).is_nan()); } }