1001 lines
35 KiB
Rust
1001 lines
35 KiB
Rust
/*
|
|
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
|
|
* //
|
|
* // Redistribution and use in source and binary forms, with or without modification,
|
|
* // are permitted provided that the following conditions are met:
|
|
* //
|
|
* // 1. Redistributions of source code must retain the above copyright notice, this
|
|
* // list of conditions and the following disclaimer.
|
|
* //
|
|
* // 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* // this list of conditions and the following disclaimer in the documentation
|
|
* // and/or other materials provided with the distribution.
|
|
* //
|
|
* // 3. Neither the name of the copyright holder nor the names of its
|
|
* // contributors may be used to endorse or promote products derived from
|
|
* // this software without specific prior written permission.
|
|
* //
|
|
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
use crate::common::{dd_fmla, is_integerf};
|
|
use crate::double_double::DoubleDouble;
|
|
use crate::round_ties_even::RoundTiesEven;
|
|
use std::hint::black_box;
|
|
|
|
#[cold]
|
|
#[inline(never)]
|
|
fn as_compoundf_special(x: f32, y: f32) -> f32 {
|
|
let nx = x.to_bits();
|
|
let ny = y.to_bits();
|
|
let ax: u32 = nx.wrapping_shl(1);
|
|
let ay: u32 = ny.wrapping_shl(1);
|
|
|
|
if ax == 0 || ay == 0 {
|
|
// x or y is 0
|
|
if ax == 0 {
|
|
// compound(0,y) = 1 except for y = sNaN
|
|
return if y.is_nan() { x + y } else { 1.0 };
|
|
}
|
|
|
|
if ay == 0 {
|
|
// compound (x, 0)
|
|
if x.is_nan() {
|
|
return x + y;
|
|
} // x = sNaN
|
|
return if x < -1.0 {
|
|
f32::NAN // rule (g)
|
|
} else {
|
|
1.0
|
|
}; // rule (a)
|
|
}
|
|
}
|
|
|
|
let mone = (-1.0f32).to_bits();
|
|
if ay >= 0xffu32 << 24 {
|
|
// y=Inf/NaN
|
|
// the case x=0 was already checked above
|
|
if ax > 0xffu32 << 24 {
|
|
return x + y;
|
|
} // x=NaN
|
|
if ay == 0xffu32 << 24 {
|
|
// y = +/-Inf
|
|
if nx > mone {
|
|
return f32::NAN;
|
|
} // rule (g)
|
|
let sy = ny >> 31; // sign bit of y
|
|
if nx == mone {
|
|
return if sy == 0 {
|
|
0.0 // Rule (c)
|
|
} else {
|
|
f32::INFINITY // Rule (b)
|
|
};
|
|
}
|
|
if x < 0.0 {
|
|
return if sy == 0 { 0.0 } else { f32::INFINITY };
|
|
}
|
|
if x > 0.0 {
|
|
return if sy != 0 { 0.0 } else { f32::INFINITY };
|
|
}
|
|
return 1.0;
|
|
}
|
|
return x + y; // case y=NaN
|
|
}
|
|
|
|
if nx >= mone || nx >= 0xffu32 << 23 {
|
|
// x is Inf, NaN or <= -1
|
|
if ax == 0xffu32 << 24 {
|
|
// x is +Inf or -Inf
|
|
if (nx >> 31) != 0 {
|
|
return f32::NAN;
|
|
} // x = -Inf, rule (g)
|
|
// (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
|
|
return if (ny >> 31) != 0 { 1.0 / x } else { x };
|
|
}
|
|
if ax > 0xffu32 << 24 {
|
|
return x + y;
|
|
} // x is NaN
|
|
if nx > mone {
|
|
return f32::NAN; // x < -1.0: rule (g)
|
|
}
|
|
// now x = -1
|
|
return if (ny >> 31) != 0 {
|
|
// y < 0
|
|
f32::INFINITY
|
|
} else {
|
|
// y > 0
|
|
0.0
|
|
};
|
|
}
|
|
0.0
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
|
|
// we include P[0] = 0 so that P[i] corresponds to degree i
|
|
// this degree-8 polynomial generated by Sollya (cf p1.sollya)
|
|
// has relative error < 2^-50.98
|
|
const P: [u64; 8] = [
|
|
0x0000000000000000,
|
|
0x3ff71547652b82fe,
|
|
0xbfe71547652b8d11,
|
|
0x3fdec709dc3a5014,
|
|
0xbfd715475b144983,
|
|
0x3fd2776c3fda300e,
|
|
0xbfcec990162358ce,
|
|
0x3fca645337c29e27,
|
|
];
|
|
|
|
let z2 = z * z;
|
|
let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
|
|
let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
|
|
let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
|
|
let z4 = z2 * z2;
|
|
c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
|
|
c1 = dd_fmla(c3, z2, c1);
|
|
c1 = dd_fmla(c5, z4, c1);
|
|
z * c1
|
|
}
|
|
|
|
// for 0<=i<46, inv[i] approximates 1/t for 1/2+(i+13)/64 <= t < 1/2+(i+14)/64
|
|
pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
|
|
0x3ff6800000000000,
|
|
0x3ff6000000000000,
|
|
0x3ff5800000000000,
|
|
0x3ff5000000000000,
|
|
0x3ff4c00000000000,
|
|
0x3ff4400000000000,
|
|
0x3ff4000000000000,
|
|
0x3ff3800000000000,
|
|
0x3ff3400000000000,
|
|
0x3ff2c00000000000,
|
|
0x3ff2800000000000,
|
|
0x3ff2000000000000,
|
|
0x3ff1c00000000000,
|
|
0x3ff1800000000000,
|
|
0x3ff1400000000000,
|
|
0x3ff1000000000000,
|
|
0x3ff0c00000000000,
|
|
0x3ff0800000000000,
|
|
0x3ff0000000000000,
|
|
0x3ff0000000000000,
|
|
0x3fef400000000000,
|
|
0x3feec00000000000,
|
|
0x3fee400000000000,
|
|
0x3fee000000000000,
|
|
0x3fed800000000000,
|
|
0x3fed000000000000,
|
|
0x3feca00000000000,
|
|
0x3fec400000000000,
|
|
0x3febe00000000000,
|
|
0x3feb800000000000,
|
|
0x3feb200000000000,
|
|
0x3feac00000000000,
|
|
0x3fea800000000000,
|
|
0x3fea200000000000,
|
|
0x3fe9c00000000000,
|
|
0x3fe9800000000000,
|
|
0x3fe9200000000000,
|
|
0x3fe8c00000000000,
|
|
0x3fe8800000000000,
|
|
0x3fe8400000000000,
|
|
0x3fe8000000000000,
|
|
0x3fe7c00000000000,
|
|
0x3fe7600000000000,
|
|
0x3fe7200000000000,
|
|
0x3fe6e00000000000,
|
|
0x3fe6a00000000000,
|
|
];
|
|
|
|
/* log2inv[i][0]+log2inv[i][1] is a double-double approximation of
|
|
-log2(inv[i]), with log2inv[i][0] having absolute error < 2^-54.462,
|
|
and log2inv[i][0]+log2inv[i][1] absolute error < 2^-109.101 */
|
|
pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
|
|
(0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
|
|
(0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
|
|
(0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
|
|
(0x3c72d352bea51e59, 0xbfd91bba891f1709),
|
|
(0xbc69575b04fa6fbd, 0xbfd800a563161c54),
|
|
(0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
|
|
(0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
|
|
(0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
|
|
(0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
|
|
(0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
|
|
(0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
|
|
(0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
|
|
(0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
|
|
(0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
|
|
(0xbc58ecb169b9465f, 0xbfbbc84240adabba),
|
|
(0xbc5f3314e0985116, 0xbfb663f6fac91316),
|
|
(0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
|
|
(0xbc389b03784b5be1, 0xbfa6bad3758efd87),
|
|
(0x0000000000000000, 0x0000000000000000),
|
|
(0x0000000000000000, 0x0000000000000000),
|
|
(0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
|
|
(0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
|
|
(0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
|
|
(0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
|
|
(0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
|
|
(0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
|
|
(0xbc61562d61af73f8, 0x3fc494f863b8df35),
|
|
(0xbc60798d1aa21694, 0x3fc7046031c79f85),
|
|
(0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
|
|
(0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
|
|
(0xbc6086fce864a1f6, 0x3fce857d3d361368),
|
|
(0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
|
|
(0x3c7c8d43e017579b, 0x3fd169c05363f158),
|
|
(0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
|
|
(0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
|
|
(0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
|
|
(0x3c7ac9080333c605, 0x3fd6552b49986277),
|
|
(0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
|
|
(0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
|
|
(0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
|
|
(0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
|
|
(0xbc501d98c3531027, 0x3fdb877c57b1b070),
|
|
(0x3c78a38b4175d665, 0x3fdcffae611ad12b),
|
|
(0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
|
|
(0x3c76d261f1753e0b, 0x3fdefec61b011f85),
|
|
(0xbc87398fe685f171, 0x3fe0014332be0033),
|
|
];
|
|
|
|
/* for |z| <= 2^-6, returns an approximation of 2^z
|
|
with absolute error < 2^-43.540 */
|
|
#[inline]
|
|
fn compoundf_expf_poly(z: f64) -> f64 {
|
|
/* Q is a degree-4 polynomial generated by Sollya (cf q1.sollya)
|
|
with absolute error < 2^-43.549 */
|
|
const Q: [u64; 5] = [
|
|
0x3ff0000000000000,
|
|
0x3fe62e42fef6d01a,
|
|
0x3fcebfbdff7feeba,
|
|
0x3fac6b167e579bee,
|
|
0x3f83b2b3428d06de,
|
|
];
|
|
let z2 = z * z;
|
|
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
|
|
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
|
|
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
|
|
dd_fmla(c2, z2, c0)
|
|
}
|
|
|
|
pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
|
|
/* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
|
|
for x < 0, 1+x is exact when -1 < x <= 2^-30 */
|
|
|
|
// double u = (x >= 0x1p53) ? x : 1.0 + x;
|
|
let u = 1.0 + x;
|
|
/* For x < 0x1p53, x + 1 is exact thus u = x+1.
|
|
For x >= 2^53, we estimate log2(x) instead of log2(1+x),
|
|
since log2(1+x) = log2(x) + log2(1+1/x),
|
|
log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
|
|
error is bounded by 2^-52.471/53 < 2^-58.198 */
|
|
|
|
let mut v = u.to_bits();
|
|
let m: u64 = v & 0xfffffffffffffu64;
|
|
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
|
|
// 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
|
|
v = v.wrapping_sub((e << 52) as u64);
|
|
let t = f64::from_bits(v);
|
|
// u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
|
|
// thus log2(u) = e + log2(t)
|
|
v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
|
|
let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
|
|
let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
|
|
let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
|
|
// we approximates log2(t) by -log2(r) + log2(r*t)
|
|
let p = log2p1_polyeval_1(z);
|
|
// p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
|
|
e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
|
|
}
|
|
|
|
pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
|
|
0xbfe0000000000000,
|
|
0xbfde000000000000,
|
|
0xbfdc000000000000,
|
|
0xbfda000000000000,
|
|
0xbfd8000000000000,
|
|
0xbfd6000000000000,
|
|
0xbfd4000000000000,
|
|
0xbfd2000000000000,
|
|
0xbfd0000000000000,
|
|
0xbfcc000000000000,
|
|
0xbfc8000000000000,
|
|
0xbfc4000000000000,
|
|
0xbfc0000000000000,
|
|
0xbfb8000000000000,
|
|
0xbfb0000000000000,
|
|
0xbfa0000000000000,
|
|
0x0000000000000000,
|
|
0x3fa0000000000000,
|
|
0x3fb0000000000000,
|
|
0x3fb8000000000000,
|
|
0x3fc0000000000000,
|
|
0x3fc4000000000000,
|
|
0x3fc8000000000000,
|
|
0x3fcc000000000000,
|
|
0x3fd0000000000000,
|
|
0x3fd2000000000000,
|
|
0x3fd4000000000000,
|
|
0x3fd6000000000000,
|
|
0x3fd8000000000000,
|
|
0x3fda000000000000,
|
|
0x3fdc000000000000,
|
|
0x3fde000000000000,
|
|
0x3fe0000000000000,
|
|
];
|
|
|
|
/* exp2_U[i] is a double-double approximation h+l of 2^exp2_T[i]
|
|
so that h approximates 2^exp2_T[i] with absolute error < 2^-53.092,
|
|
and h+l approximates 2^exp2_T[i] with absolute error < 2^-107.385 */
|
|
pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
|
|
(0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
|
|
(0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
|
|
(0xbc741577ee04992f, 0x3fe7a11473eb0187),
|
|
(0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
|
|
(0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
|
|
(0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
|
|
(0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
|
|
(0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
|
|
(0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
|
|
(0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
|
|
(0x3c711065895048dd, 0x3fec199bdd85529c),
|
|
(0x3c6503cbd1e949db, 0x3fecb720dcef9069),
|
|
(0x3c72ed02d75b3707, 0x3fed5818dcfba487),
|
|
(0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
|
|
(0xbc8e9c23179c2893, 0x3feea4afa2a490da),
|
|
(0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
|
|
(0x0000000000000000, 0x3ff0000000000000),
|
|
(0x3c8d73e2a475b465, 0x3ff059b0d3158574),
|
|
(0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
|
|
(0xbc96c51039449b3a, 0x3ff11301d0125b51),
|
|
(0xbc819041b9d78a76, 0x3ff172b83c7d517b),
|
|
(0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
|
|
(0x3c99b07eb6c70573, 0x3ff2387a6e756238),
|
|
(0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
|
|
(0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
|
|
(0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
|
|
(0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
|
|
(0x3c489b7a04ef80d0, 0x3ff44e086061892d),
|
|
(0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
|
|
(0xbc807abe1db13cad, 0x3ff5342b569d4f82),
|
|
(0x3c96324c054647ad, 0x3ff5ab07dd485429),
|
|
(0xbc9383c17e40b497, 0x3ff6247eb03a5585),
|
|
(0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
|
|
];
|
|
|
|
/* return the correct rounding of (1+x)^y, otherwise -1.0
|
|
where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
|
|
assuming 0x1.7154759a0df53p-24 <= |t| <= 150
|
|
exact is non-zero iff (1+x)^y is exact or midpoint */
|
|
fn exp2_fast(t: f64) -> f64 {
|
|
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
|
|
let mut r = t - k; // |r| <= 1/2, exact
|
|
let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
|
|
// we add 2^-6 so that i is rounded to nearest
|
|
let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
|
|
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
|
|
// now |r| <= 2^-6
|
|
// 2^t = 2^k * exp2_U[i][0] * 2^r
|
|
v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
|
|
/* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
|
|
exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
|
|
with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
|
|
bounded by ulp(v) + 2^0.5 * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
|
|
Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
|
|
2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
|
|
The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
|
|
< 2^-40.849. */
|
|
let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
|
|
v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; // scale v by 2^k, k is already integer
|
|
|
|
// in case of potential underflow, we defer to the accurate path
|
|
if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
|
|
return -1.0;
|
|
}
|
|
err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
|
|
let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
|
|
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
|
|
if lb != rb {
|
|
return -1.0;
|
|
} // rounding test failed
|
|
|
|
f64::from_bits(v)
|
|
}
|
|
|
|
// 2^e/sqrt(2) < h < 2^e*sqrt(2), with -29 <= e <= 128
|
|
// divide h, l by 2^e
|
|
pub(crate) static LOG2P1_SCALE: [u64; 158] = [
|
|
0x41c0000000000000,
|
|
0x41b0000000000000,
|
|
0x41a0000000000000,
|
|
0x4190000000000000,
|
|
0x4180000000000000,
|
|
0x4170000000000000,
|
|
0x4160000000000000,
|
|
0x4150000000000000,
|
|
0x4140000000000000,
|
|
0x4130000000000000,
|
|
0x4120000000000000,
|
|
0x4110000000000000,
|
|
0x4100000000000000,
|
|
0x40f0000000000000,
|
|
0x40e0000000000000,
|
|
0x40d0000000000000,
|
|
0x40c0000000000000,
|
|
0x40b0000000000000,
|
|
0x40a0000000000000,
|
|
0x4090000000000000,
|
|
0x4080000000000000,
|
|
0x4070000000000000,
|
|
0x4060000000000000,
|
|
0x4050000000000000,
|
|
0x4040000000000000,
|
|
0x4030000000000000,
|
|
0x4020000000000000,
|
|
0x4010000000000000,
|
|
0x4000000000000000,
|
|
0x3ff0000000000000,
|
|
0x3fe0000000000000,
|
|
0x3fd0000000000000,
|
|
0x3fc0000000000000,
|
|
0x3fb0000000000000,
|
|
0x3fa0000000000000,
|
|
0x3f90000000000000,
|
|
0x3f80000000000000,
|
|
0x3f70000000000000,
|
|
0x3f60000000000000,
|
|
0x3f50000000000000,
|
|
0x3f40000000000000,
|
|
0x3f30000000000000,
|
|
0x3f20000000000000,
|
|
0x3f10000000000000,
|
|
0x3f00000000000000,
|
|
0x3ef0000000000000,
|
|
0x3ee0000000000000,
|
|
0x3ed0000000000000,
|
|
0x3ec0000000000000,
|
|
0x3eb0000000000000,
|
|
0x3ea0000000000000,
|
|
0x3e90000000000000,
|
|
0x3e80000000000000,
|
|
0x3e70000000000000,
|
|
0x3e60000000000000,
|
|
0x3e50000000000000,
|
|
0x3e40000000000000,
|
|
0x3e30000000000000,
|
|
0x3e20000000000000,
|
|
0x3e10000000000000,
|
|
0x3e00000000000000,
|
|
0x3df0000000000000,
|
|
0x3de0000000000000,
|
|
0x3dd0000000000000,
|
|
0x3dc0000000000000,
|
|
0x3db0000000000000,
|
|
0x3da0000000000000,
|
|
0x3d90000000000000,
|
|
0x3d80000000000000,
|
|
0x3d70000000000000,
|
|
0x3d60000000000000,
|
|
0x3d50000000000000,
|
|
0x3d40000000000000,
|
|
0x3d30000000000000,
|
|
0x3d20000000000000,
|
|
0x3d10000000000000,
|
|
0x3d00000000000000,
|
|
0x3cf0000000000000,
|
|
0x3ce0000000000000,
|
|
0x3cd0000000000000,
|
|
0x3cc0000000000000,
|
|
0x3cb0000000000000,
|
|
0x3ca0000000000000,
|
|
0x3c90000000000000,
|
|
0x3c80000000000000,
|
|
0x3c70000000000000,
|
|
0x3c60000000000000,
|
|
0x3c50000000000000,
|
|
0x3c40000000000000,
|
|
0x3c30000000000000,
|
|
0x3c20000000000000,
|
|
0x3c10000000000000,
|
|
0x3c00000000000000,
|
|
0x3bf0000000000000,
|
|
0x3be0000000000000,
|
|
0x3bd0000000000000,
|
|
0x3bc0000000000000,
|
|
0x3bb0000000000000,
|
|
0x3ba0000000000000,
|
|
0x3b90000000000000,
|
|
0x3b80000000000000,
|
|
0x3b70000000000000,
|
|
0x3b60000000000000,
|
|
0x3b50000000000000,
|
|
0x3b40000000000000,
|
|
0x3b30000000000000,
|
|
0x3b20000000000000,
|
|
0x3b10000000000000,
|
|
0x3b00000000000000,
|
|
0x3af0000000000000,
|
|
0x3ae0000000000000,
|
|
0x3ad0000000000000,
|
|
0x3ac0000000000000,
|
|
0x3ab0000000000000,
|
|
0x3aa0000000000000,
|
|
0x3a90000000000000,
|
|
0x3a80000000000000,
|
|
0x3a70000000000000,
|
|
0x3a60000000000000,
|
|
0x3a50000000000000,
|
|
0x3a40000000000000,
|
|
0x3a30000000000000,
|
|
0x3a20000000000000,
|
|
0x3a10000000000000,
|
|
0x3a00000000000000,
|
|
0x39f0000000000000,
|
|
0x39e0000000000000,
|
|
0x39d0000000000000,
|
|
0x39c0000000000000,
|
|
0x39b0000000000000,
|
|
0x39a0000000000000,
|
|
0x3990000000000000,
|
|
0x3980000000000000,
|
|
0x3970000000000000,
|
|
0x3960000000000000,
|
|
0x3950000000000000,
|
|
0x3940000000000000,
|
|
0x3930000000000000,
|
|
0x3920000000000000,
|
|
0x3910000000000000,
|
|
0x3900000000000000,
|
|
0x38f0000000000000,
|
|
0x38e0000000000000,
|
|
0x38d0000000000000,
|
|
0x38c0000000000000,
|
|
0x38b0000000000000,
|
|
0x38a0000000000000,
|
|
0x3890000000000000,
|
|
0x3880000000000000,
|
|
0x3870000000000000,
|
|
0x3860000000000000,
|
|
0x3850000000000000,
|
|
0x3840000000000000,
|
|
0x3830000000000000,
|
|
0x3820000000000000,
|
|
0x3810000000000000,
|
|
0x3800000000000000,
|
|
0x37f0000000000000,
|
|
];
|
|
|
|
/* put in h+l an approximation of log2(1+zh+zl)
|
|
for |zh| <= 1/64 + 2^-51.508, |zl| < 2^-58 and |zl| < ulp(zh).
|
|
We have |h|, |h+l| < 2^-5.459, |l| < 2^-56.162,
|
|
the relative error is bounded by 2^-91.196,
|
|
and |l| < 2^-50.523 |h| (see analyze_p2() in compoundf.sage).
|
|
*/
|
|
|
|
/* degree-13 polynomial generated by Sollya which approximates
|
|
log2(1+z) for |z| <= 1/64 with relative error < 2^-93.777
|
|
(cf file p2.sollya)
|
|
*/
|
|
static LOG2P1_LOG2_POLY: [u64; 18] = [
|
|
0x3ff71547652b82fe,
|
|
0x3c7777d0ffda0d80,
|
|
0xbfe71547652b82fe,
|
|
0xbc6777d0fd20b49c,
|
|
0x3fdec709dc3a03fd,
|
|
0x3c7d27f05171b74a,
|
|
0xbfd71547652b82fe,
|
|
0xbc57814e70b828b0,
|
|
0x3fd2776c50ef9bfe,
|
|
0x3c7e4f63e12bff83,
|
|
0xbfcec709dc3a03f4,
|
|
0x3fca61762a7adecc,
|
|
0xbfc71547652d8849,
|
|
0x3fc484b13d7e7029,
|
|
0xbfc2776c1b2a40fd,
|
|
0x3fc0c9a80f9b7c1c,
|
|
0xbfbecc6801121200,
|
|
0x3fbc6e4b91fd10e5,
|
|
];
|
|
|
|
fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
|
|
/* since we can't expect a relative accuracy better than 2^-93.777,
|
|
the lower part of the double-double approximation only needs to
|
|
have about 94-53 = 41 accurate bits. Since |p7*z^7/p1| < 2^-44,
|
|
we evaluate terms of degree 7 or more in double precision only. */
|
|
let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); // degree 13
|
|
|
|
for i in 7..=12 {
|
|
h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
|
|
}
|
|
|
|
let mut v = DoubleDouble::quick_mult_f64(z, h);
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo;
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
|
|
v.hi = t.hi;
|
|
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);
|
|
|
|
v = DoubleDouble::quick_mult(v, z);
|
|
|
|
v
|
|
}
|
|
|
|
/* assuming -1 < x < 2^128, and x is representable in binary32,
|
|
put in h+l a double-double approximation of log2(1+x),
|
|
with relative error bounded by 2^-91.123, and |l| < 2^-48.574 |h|
|
|
(see analyze_log2p1_accurate() in compoundf.sage) */
|
|
pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
|
|
let mut v_dd = if 1.0 >= x {
|
|
// then 1.0 >= |x| since x > -1
|
|
if (x as f32).abs() >= f32::from_bits(0x25000000) {
|
|
DoubleDouble::from_exact_add(1.0, x)
|
|
} else {
|
|
DoubleDouble::new(x, 1.0)
|
|
}
|
|
} else {
|
|
// fast_two_sum() is exact when |x| < 2^54 by Lemma 1 condition (ii) of [1]
|
|
DoubleDouble::from_exact_add(x, 1.0)
|
|
};
|
|
|
|
// now h + l = 1 + x + eps with |eps| <= 2^-105 |h| and |l| <= ulp(h)
|
|
let mut v = v_dd.hi.to_bits();
|
|
let m = v & 0xfffffffffffffu64;
|
|
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
|
|
|
|
let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
|
|
v_dd.hi *= scale;
|
|
v_dd.lo *= scale;
|
|
|
|
// now |h| < sqrt(2) and |l| <= ulp(h) <= 2^-52
|
|
|
|
// now 1 + x ~ 2^e * (h + l) thus log2(1+x) ~ e + log2(h+l)
|
|
|
|
v = (2.0 + v_dd.hi).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
|
|
let i: i32 = (v >> 45) as i32 - 0x2002d; // h is near 1/2+(i+13)/64
|
|
let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
|
|
let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); // exact, -1/64 <= zh <= 1/64
|
|
// since |r| <= 0x1.68p+0 and |l| <= 2^-52, |zl| <= 2^-51.508
|
|
// zh + zl = r*(h+l)-1
|
|
// log2(h+l) = -log2(r) + log2(r*(h+l)) = -log2(r) + log2(1+zh+zl)
|
|
z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
|
|
// now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
|
|
/* the relative error of fast_two_sum() is bounded by 2^-105,
|
|
this amplified the relative error on p2() as follows:
|
|
(1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */
|
|
|
|
// now |zh| <= 1/64 + 2^-51.508 and |zl| < 2^-58
|
|
/* the relative error of fast_two_sum() is bounded by 2^-105,
|
|
this amplified the relative error on p2() as follows:
|
|
(1+2^-91.196)*(1+2^-105)-1 < 2^-91.195. */
|
|
let log_p = log2_poly2(z_dd);
|
|
// ph + pl approximates log2(1+zh+zl) with relative error < 2^-93.471
|
|
|
|
/* since |log2inv[i][0]| < 1 and e is integer, the precondition of
|
|
fast_two_sum is fulfilled: either |e| >= 1, or e=0 and fast_two_sum
|
|
is exact */
|
|
let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
|
|
v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
|
|
v_dd.lo += f64::from_bits(log2_inv.0);
|
|
let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
|
|
p.lo += v_dd.lo + log_p.lo;
|
|
p
|
|
}
|
|
|
|
pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
|
|
/* Q2 is a degree-8 polynomial generated by Sollya (cf q2.sollya)
|
|
with absolute error < 2^-85.218 */
|
|
|
|
static Q2: [u64; 12] = [
|
|
0x3ff0000000000000,
|
|
0x3fe62e42fefa39ef,
|
|
0x3c7abc9d45534d06,
|
|
0x3fcebfbdff82c58f,
|
|
0xbc65e4383cf9ddf7,
|
|
0x3fac6b08d704a0c0,
|
|
0xbc46cbc55586c8f1,
|
|
0x3f83b2ab6fba4e77,
|
|
0x3f55d87fe789aec5,
|
|
0x3f2430912f879daa,
|
|
0x3eeffcc774b2367a,
|
|
0x3eb62c017b9bdfe6,
|
|
];
|
|
let h2 = z.hi * z.hi;
|
|
let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
|
|
let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
|
|
c5 = dd_fmla(c7, h2, c5);
|
|
// since ulp(c5*h^5) <= 2^-86, we still compute c5*z as double
|
|
let z_vqh = c5 * z.hi;
|
|
let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
|
|
// multiply by z
|
|
q = DoubleDouble::quick_mult(q, z);
|
|
// add coefficient of degree 3
|
|
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
|
|
q.hi = t.hi;
|
|
q.lo += t.lo + f64::from_bits(Q2[6]);
|
|
// multiply by z and add coefficient of degree 2
|
|
q = DoubleDouble::quick_mult(q, z);
|
|
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
|
|
q.hi = t.hi;
|
|
q.lo += t.lo + f64::from_bits(Q2[4]);
|
|
// multiply by h+l and add coefficient of degree 1
|
|
q = DoubleDouble::quick_mult(q, z);
|
|
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
|
|
q.hi = t.hi;
|
|
q.lo += t.lo + f64::from_bits(Q2[2]);
|
|
// multiply by h+l and add coefficient of degree 0
|
|
q = DoubleDouble::quick_mult(q, z);
|
|
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
|
|
q.hi = t.hi;
|
|
q.lo += t.lo;
|
|
q
|
|
}
|
|
|
|
/* return the correct rounding of (1+x)^y or -1 if the rounding test failed,
|
|
where t is an approximation of y*log2(1+x).
|
|
We assume |h+l| < 150, |l/h| < 2^-48.445 |h|,
|
|
and the relative error between h+l and y*log2(1+x) is < 2^-91.120.
|
|
x and y are the original inputs of compound. */
|
|
fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
|
|
if y == 1.0 {
|
|
let res = 1.0 + x;
|
|
return res;
|
|
}
|
|
let k = x_dd.hi.round_ties_even_finite(); // |k| <= 150
|
|
|
|
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
|
|
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
|
|
/* the relative error between h and y*log2(1+x) is bounded by
|
|
(1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
|
|
2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
|
|
The above threshold is such that h*(1+2^-48.444) < H0. */
|
|
return (1.0 + x_dd.hi * 0.5) as f32;
|
|
}
|
|
|
|
let r = x_dd.hi - k; // |r| <= 1/2, exact
|
|
// since r is an integer multiple of ulp(h), fast_two_sum() below is exact
|
|
let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
|
|
let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
|
|
// we add 2^-6 so that i is rounded to nearest
|
|
let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
|
|
// h is near (i-16)/2^5
|
|
v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
|
|
|
|
// now |h| <= 2^-6
|
|
// 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
|
|
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
|
|
let q = compoundf_exp2_poly2(v_dd);
|
|
|
|
/* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
|
|
|qh + ql - 2^(h+l)| < 2^-85.210 */
|
|
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
|
|
let mut q = DoubleDouble::quick_mult(exp2u, q);
|
|
q = DoubleDouble::from_exact_add(q.hi, q.lo);
|
|
/* Total error:
|
|
* at input we have a relative error between h+l and y*log2(1+x) bounded
|
|
by 2^-91.120: h + l = y*log2(1+x) * (1 + eps1) with |eps1| < 2^-91.120.
|
|
Since |h+l| <= 150, this yields an absolute error bounded
|
|
by 150*2^-91.120 < 2^-83.891:
|
|
h + l = y*log2(1+x) + eps2 with |eps2| <= 150*2^-91.120 < 2^-83.891.
|
|
* the absolute error in q2() is bounded by 2^-85.210
|
|
and is multiplied by exp2_U[i] < 1.415
|
|
* the absolute d_mul() error is bounded by 2^-102.199
|
|
* the fast_two_sum() error is bounded by 2^-105
|
|
All this yields an absolute error on qh+ql bounded by:
|
|
2^-83.891 + 2^-85.210*1.415 + 2^-102.199 + 2^-105 < 2^-83.242.
|
|
|
|
We distinguish the "small" case when at input |h+l| <= 2^-9.
|
|
In this case k=0, i=16, thus exp2_T[i]=0, exp2_U[i]=1,
|
|
and absolute error in q2() is bounded by 2^-102.646,
|
|
and remains unchanged since the d_mul() call does not change qh, ql.
|
|
*/
|
|
|
|
/* Rounding test: since |ql| < ulp(qh), and the error is less than ulp(qh),
|
|
the rounding test can fail only when the last 53-25 = 28 bits of qh
|
|
represent a signed number in [-1,1] (when it is -2 or 2, adding ql and
|
|
the error cannot cross a rounding boundary). */
|
|
let mut w = q.hi.to_bits();
|
|
if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
|
|
static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
|
|
let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
|
|
let err = f64::from_bits(ERR[small as usize]);
|
|
|
|
w = (q.hi + (q.lo + err)).to_bits();
|
|
w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
|
|
}
|
|
|
|
/* multiply qh+ql by 2^k: since 0.989 < qh_in < 1.011 and
|
|
0.707 < exp2_U[i] < 1.415, we have 0.69 < qh+ql < 1.44 */
|
|
v = (q.hi + q.lo).to_bits();
|
|
/* For RNDN, if qh fits exactly in 25 bits, and ql is tiny, so that
|
|
qh + ql rounds to qh, then we might have a double-rounding issue. */
|
|
if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
|
|
v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); // simulate round to odd
|
|
}
|
|
v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
|
|
// there is no underflow/overflow in the scaling by 2^k since |k| <= 150
|
|
f64::from_bits(v) as f32
|
|
}
|
|
|
|
// at input, exact is non-zero iff (1+x)^y is exact
|
|
// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
|
|
// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
|
|
// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
|
|
#[cold]
|
|
#[inline(never)]
|
|
fn compoundf_accurate(x: f32, y: f32) -> f32 {
|
|
let mut v = compoundf_log2p1_accurate(x as f64);
|
|
/* h + l is a double-double approximation of log(1+x),
|
|
with relative error bounded by 2^-91.123,
|
|
and |l| < 2^-48.574 |h| */
|
|
v = DoubleDouble::quick_mult_f64(v, y as f64);
|
|
/* h + l is a double-double approximation of y*log(1+x).
|
|
Since 2^-149 <= |h_in+l_in| < 128 and 2^-149 <= |y| < 2^128, we have
|
|
2^-298 <= |h+l| < 2^135, thus no underflow/overflow in double is possible.
|
|
The s_mul() error is bounded by ulp(l). Since |l_in| < 2^-48.574 |h_in|,
|
|
and the intermediate variable lo in s_mul() satisfies |lo| < ulp(h),
|
|
we have |l| < ulp(h) + |y l_in| <= ulp(h) + 2^-48.574 |y h_in|
|
|
< (2^-52 + 2^-48.574) |h| < 2^-48.445 |h|. The s_mul() error is thus
|
|
bounded by 2^-48.445*2^-52 = 2^-100.445 |h|. This yields a total relative
|
|
error bounded by (1+2^-91.123)*(1+2^-100.445)-1 < 2^-91.120. */
|
|
compoundf_exp2_accurate(v, x, y)
|
|
}
|
|
|
|
/// Computes compound function (1.0 + x)^y
|
|
///
|
|
/// Max ULP 0.5
|
|
#[inline]
|
|
pub fn f_compoundf(x: f32, y: f32) -> f32 {
|
|
/* Rules from IEEE 754-2019 for compound (x, n) with n integer:
|
|
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
|
|
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
|
|
(c) compound (-1, n) is +0 for n > 0
|
|
(d) compound (+/-0, n) is 1
|
|
(e) compound (+Inf, n) is +Inf for n > 0
|
|
(f) compound (+Inf, n) is +0 for n < 0
|
|
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
|
|
(h) compound (qNaN, n) is qNaN for n <> 0.
|
|
*/
|
|
let mone = (-1.0f32).to_bits();
|
|
let nx = x.to_bits();
|
|
let ny = y.to_bits();
|
|
if nx >= mone {
|
|
return as_compoundf_special(x, y);
|
|
} // x <= -1
|
|
// now x > -1
|
|
|
|
let ax: u32 = nx.wrapping_shl(1);
|
|
let ay: u32 = ny.wrapping_shl(1);
|
|
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
|
|
return as_compoundf_special(x, y);
|
|
} // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
|
|
|
|
// evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
|
|
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
|
|
if ax <= 0x62000000u32 {
|
|
return 1.0 + y * x;
|
|
} // does it work for |x|<2^-29 and |y|<=16?
|
|
let mut s = x as f64 + 1.;
|
|
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
|
|
|
|
// exponentiation by squaring: O(log(y)) complexity
|
|
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
|
|
|
|
while {
|
|
iter_count >>= 1;
|
|
iter_count
|
|
} != 0
|
|
{
|
|
s = s * s;
|
|
if iter_count % 2 != 0 {
|
|
acc *= s;
|
|
}
|
|
}
|
|
|
|
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
|
|
return dz as f32;
|
|
}
|
|
|
|
let xd = x as f64;
|
|
let yd = y as f64;
|
|
let tx = xd.to_bits();
|
|
let ty = yd.to_bits();
|
|
|
|
let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));
|
|
|
|
/* l approximates log2(1+x) with relative error < 2^-47.997,
|
|
and 2^-149 <= |l| < 128 */
|
|
|
|
let t: u64 = (l * f64::from_bits(ty)).to_bits();
|
|
/* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
|
|
2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
|
|
The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
|
|
|
|
// detect overflow/underflow
|
|
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
|
|
// |t| >= 128
|
|
if t >= 0x3018bu64 << 46 {
|
|
// t <= -150
|
|
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
|
|
} else if (t >> 63) == 0 {
|
|
// t >= 128: overflow
|
|
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
|
|
}
|
|
}
|
|
|
|
/* since |t| < 150, the absolute error on t is bounded by
|
|
150*2^-47.909 < 2^-40.680 */
|
|
|
|
// 2^t rounds to 1 to nearest when |t| <= 0x1.715476ba97f14p-25
|
|
if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
|
|
return if (t >> 63) != 0 {
|
|
black_box(1.0) - black_box(f32::from_bits(0x33000000))
|
|
} else {
|
|
black_box(1.0) + black_box(f32::from_bits(0x33000000))
|
|
};
|
|
}
|
|
|
|
let res = exp2_fast(f64::from_bits(t));
|
|
if res != -1.0 {
|
|
return res as f32;
|
|
}
|
|
compoundf_accurate(x, y)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_compoundf() {
|
|
assert_eq!(
|
|
f_compoundf(
|
|
0.000000000000000000000000000000000000011754944,
|
|
-170502050000000000000000000000000000000.
|
|
),
|
|
1.
|
|
);
|
|
assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
|
|
assert_eq!(f_compoundf(2., 3.0), 27.);
|
|
assert!(f_compoundf(-2., 5.0).is_nan());
|
|
assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
|
|
assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
|
|
}
|
|
}
|