705 lines
26 KiB
Rust
705 lines
26 KiB
Rust
/*
|
|
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
|
|
* //
|
|
* // Redistribution and use in source and binary forms, with or without modification,
|
|
* // are permitted provided that the following conditions are met:
|
|
* //
|
|
* // 1. Redistributions of source code must retain the above copyright notice, this
|
|
* // list of conditions and the following disclaimer.
|
|
* //
|
|
* // 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* // this list of conditions and the following disclaimer in the documentation
|
|
* // and/or other materials provided with the distribution.
|
|
* //
|
|
* // 3. Neither the name of the copyright holder nor the names of its
|
|
* // contributors may be used to endorse or promote products derived from
|
|
* // this software without specific prior written permission.
|
|
* //
|
|
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
use crate::common::f_fmla;
|
|
use crate::double_double::DoubleDouble;
|
|
use crate::logs::{fast_log_d_to_dd, fast_log_dd};
|
|
use crate::polyeval::{f_polyeval4, f_polyeval5};
|
|
|
|
#[cold]
|
|
fn inverf_0p06_to_0p75(x: DoubleDouble) -> DoubleDouble {
|
|
// First step rational approximant is generated, but it's ill-conditioned, thus
|
|
// we're using taylor expansion to create Newton form at the point.
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[x]/x
|
|
// g[x_] =f[Sqrt[x]];
|
|
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
|
|
// num=Numerator[approx][[1]];
|
|
// den=Denominator[approx][[1]];
|
|
// poly=den;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
// x0=SetPrecision[0.5625,75];
|
|
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
|
|
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
|
|
const P: [(u64, u64); 10] = [
|
|
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
|
|
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
|
|
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
|
|
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
|
|
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
|
|
(0xbc72fc55f73765f6, 0xbff433be821423d0),
|
|
(0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4),
|
|
(0x3c56dfb85e83a2c5, 0xbfb770b6827e0829),
|
|
(0x3bff1472ecdfa403, 0x3f7a98a2980282bb),
|
|
(0x3baffb33d69d6276, 0xbf142a246fd2c07c),
|
|
];
|
|
let x2 = DoubleDouble::quick_mult(x, x);
|
|
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
|
|
|
|
let vx2 = vz * vz;
|
|
let vx4 = vx2 * vx2;
|
|
let vx8 = vx4 * vx4;
|
|
|
|
let p0 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(P[1]),
|
|
DoubleDouble::from_bit_pair(P[0]),
|
|
);
|
|
let p1 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(P[3]),
|
|
DoubleDouble::from_bit_pair(P[2]),
|
|
);
|
|
let p2 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(P[5]),
|
|
DoubleDouble::from_bit_pair(P[4]),
|
|
);
|
|
let p3 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(P[7]),
|
|
DoubleDouble::from_bit_pair(P[6]),
|
|
);
|
|
let p4 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(P[9]),
|
|
DoubleDouble::from_bit_pair(P[8]),
|
|
);
|
|
|
|
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
|
|
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
|
|
|
|
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
|
|
let num = DoubleDouble::mul_add(vx8, p4, r0);
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[x]/x
|
|
// g[x_] =f[Sqrt[x]];
|
|
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
|
|
// num=Numerator[approx][[1]];
|
|
// den=Denominator[approx][[1]];
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
// x0=SetPrecision[0.5625,75];
|
|
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
|
|
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
|
|
const Q: [(u64, u64); 10] = [
|
|
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
|
|
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
|
|
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
|
|
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
|
|
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
|
|
(0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c),
|
|
(0x3c7861f549c6aa61, 0x3fe91b12cf47da3a),
|
|
(0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da),
|
|
(0x3c1552b0ec0ba7b3, 0x3f939ada247f7609),
|
|
(0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6),
|
|
];
|
|
|
|
let p0 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(Q[1]),
|
|
DoubleDouble::from_bit_pair(Q[0]),
|
|
);
|
|
let p1 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(Q[3]),
|
|
DoubleDouble::from_bit_pair(Q[2]),
|
|
);
|
|
let p2 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(Q[5]),
|
|
DoubleDouble::from_bit_pair(Q[4]),
|
|
);
|
|
let p3 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(Q[7]),
|
|
DoubleDouble::from_bit_pair(Q[6]),
|
|
);
|
|
let p4 = DoubleDouble::mul_add(
|
|
vz,
|
|
DoubleDouble::from_bit_pair(Q[9]),
|
|
DoubleDouble::from_bit_pair(Q[8]),
|
|
);
|
|
|
|
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
|
|
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
|
|
|
|
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
|
|
let den = DoubleDouble::mul_add(vx8, p4, r0);
|
|
|
|
let r = DoubleDouble::div(num, den);
|
|
DoubleDouble::quick_mult(r, x)
|
|
}
|
|
|
|
#[inline]
|
|
fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
|
|
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90]
|
|
// num=Numerator[approx];
|
|
// den=Denominator[approx];
|
|
// poly=num;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
const P: [(u64, u64); 11] = [
|
|
(0x3c936555853a8b2c, 0x3ff0001df06a2515),
|
|
(0x3cea488e802db3c3, 0x404406ba373221da),
|
|
(0xbce27d42419754e3, 0x407b0442e38a9597),
|
|
(0xbd224a407624cbdf, 0x409c9277e31ef446),
|
|
(0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a),
|
|
(0x3d105bc37bc61b58, 0x40b46be8f860f4d9),
|
|
(0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7),
|
|
(0x3d1d52013ba8aa38, 0x40aae93a603cf3ea),
|
|
(0xbd07a75306df0fc3, 0x4098ab8357dc2e51),
|
|
(0x3d1bb6770bb7a27e, 0x407ebead00879010),
|
|
(0xbbfcbff4a9737936, 0x3f8936117ccbff83),
|
|
];
|
|
|
|
let z2 = DoubleDouble::quick_mult(z, z);
|
|
let z4 = DoubleDouble::quick_mult(z2, z2);
|
|
let z8 = DoubleDouble::quick_mult(z4, z4);
|
|
|
|
let q0 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(P[1]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[0]),
|
|
);
|
|
let q1 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(P[3]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[2]),
|
|
);
|
|
let q2 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(P[5]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[4]),
|
|
);
|
|
let q3 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(P[7]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[6]),
|
|
);
|
|
let q4 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(P[9]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[8]),
|
|
);
|
|
|
|
let r0 = DoubleDouble::mul_add(z2, q1, q0);
|
|
let r1 = DoubleDouble::mul_add(z2, q3, q2);
|
|
|
|
let s0 = DoubleDouble::mul_add(z4, r1, r0);
|
|
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4);
|
|
let num = DoubleDouble::mul_add(z8, s1, s0);
|
|
|
|
// See numerator generation above:
|
|
// poly=den;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
const Q: [(u64, u64); 11] = [
|
|
(0x0000000000000000, 0x3ff0000000000000),
|
|
(0xbc75b1109d4a3262, 0x40440782efaab17f),
|
|
(0x3d1f7775b207d84f, 0x407b2da74b0d39f2),
|
|
(0xbd3291fdbab49501, 0x409dac8d9e7c90b2),
|
|
(0xbd58d8fdd27707a9, 0x40b178dfeffa3192),
|
|
(0xbd57fc74ad705ce0, 0x40bad19b686f219f),
|
|
(0x3d4075510031f2cd, 0x40be70a598208cea),
|
|
(0xbd5442e109152efb, 0x40b9683ef36ae330),
|
|
(0x3d5398192933962e, 0x40b04b7c4c3ca8ee),
|
|
(0x3d2d04d03598e303, 0x409bd0080799fbf1),
|
|
(0x3d2a988eb552ef44, 0x40815a46f12bafe3),
|
|
];
|
|
|
|
let q0 = DoubleDouble::mul_add_f64(
|
|
DoubleDouble::from_bit_pair(Q[1]),
|
|
z,
|
|
f64::from_bits(0x3ff0000000000000),
|
|
);
|
|
let q1 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(Q[3]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[2]),
|
|
);
|
|
let q2 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(Q[5]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[4]),
|
|
);
|
|
let q3 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(Q[7]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[6]),
|
|
);
|
|
let q4 = DoubleDouble::mul_add(
|
|
DoubleDouble::from_bit_pair(Q[9]),
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[8]),
|
|
);
|
|
|
|
let r0 = DoubleDouble::mul_add(z2, q1, q0);
|
|
let r1 = DoubleDouble::mul_add(z2, q3, q2);
|
|
|
|
let s0 = DoubleDouble::mul_add(z4, r1, r0);
|
|
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4);
|
|
let den = DoubleDouble::mul_add(z8, s1, s0);
|
|
let r = DoubleDouble::div(num, den);
|
|
DoubleDouble::quick_mult(r, zeta_sqrt)
|
|
}
|
|
|
|
// branch for |x| > 0.9999 for extreme tail
|
|
#[cold]
|
|
fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
|
|
// First step rational approximant is generated, but it's ill-conditioned, thus
|
|
// we're using taylor expansion to create Newton form at the point.
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
|
|
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90]
|
|
// num=Numerator[approx][[1]];
|
|
// den=Denominator[approx][[1]];
|
|
// poly=num;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
const P: [(u64, u64); 14] = [
|
|
(0x3c97612f9b24a614, 0x3ff0000ba84cc7a5),
|
|
(0xbcee8fe2da463412, 0x40515246546f5d88),
|
|
(0x3d2fa4a2b891b526, 0x40956b6837159b11),
|
|
(0x3d5d673ffad4f817, 0x40c5a1aa3be58652),
|
|
(0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75),
|
|
(0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2),
|
|
(0xbda78e569c0d237f, 0x410a385c627c461c),
|
|
(0xbdab3123ebc465d7, 0x4110f05ca2b65fe5),
|
|
(0x3d960def35955192, 0x4110bb079af2fe08),
|
|
(0xbd97904816054836, 0x410911c24610c11c),
|
|
(0xbd937745e9192593, 0x40fc603244adca35),
|
|
(0xbd65fbc476d63050, 0x40e6399103188c21),
|
|
(0xbd61016ef381cce6, 0x40c6482b44995b89),
|
|
(0x3c326105c49e5a1a, 0xbfab44bd8b4e3138),
|
|
];
|
|
|
|
let z2 = z * z;
|
|
let z4 = z2 * z2;
|
|
let z8 = z4 * z4;
|
|
|
|
let g0 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[1]),
|
|
DoubleDouble::from_bit_pair(P[0]),
|
|
);
|
|
let g1 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[3]),
|
|
DoubleDouble::from_bit_pair(P[2]),
|
|
);
|
|
let g2 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[5]),
|
|
DoubleDouble::from_bit_pair(P[4]),
|
|
);
|
|
let g3 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[7]),
|
|
DoubleDouble::from_bit_pair(P[6]),
|
|
);
|
|
let g4 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[9]),
|
|
DoubleDouble::from_bit_pair(P[8]),
|
|
);
|
|
let g5 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[11]),
|
|
DoubleDouble::from_bit_pair(P[10]),
|
|
);
|
|
let g6 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(P[13]),
|
|
DoubleDouble::from_bit_pair(P[12]),
|
|
);
|
|
|
|
let h0 = DoubleDouble::mul_add(z2, g1, g0);
|
|
let h1 = DoubleDouble::mul_add(z2, g3, g2);
|
|
let h2 = DoubleDouble::mul_add(z2, g5, g4);
|
|
|
|
let q0 = DoubleDouble::mul_add(z4, h1, h0);
|
|
let q1 = DoubleDouble::mul_add(z4, g6, h2);
|
|
|
|
let num = DoubleDouble::mul_add(z8, q1, q0);
|
|
|
|
// See numerator generation above:
|
|
// poly=den;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
const Q: [(u64, u64); 14] = [
|
|
(0x0000000000000000, 0x3ff0000000000000),
|
|
(0xbcfc7b886ee61417, 0x405152838f711f3c),
|
|
(0xbd33f933c14e831a, 0x409576cb78cab36e),
|
|
(0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced),
|
|
(0x3d7be430c664bf7e, 0x40e766fdc8c7638c),
|
|
(0x3dac662e74cdfc0e, 0x4100276b5f47b5f1),
|
|
(0x3da67d06e82a8495, 0x410f843887f8a24a),
|
|
(0x3dbbf2e22fc2550a, 0x4116d04271703e08),
|
|
(0xbdb2fb3aed100853, 0x4119aff4ed32b74b),
|
|
(0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd),
|
|
(0x3dab2d8b8c1937eb, 0x410f71c38e84cb34),
|
|
(0xbda4e2e8a50b7370, 0x4100ca04b0f36b94),
|
|
(0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7),
|
|
(0x3d6938ea702c0328, 0x40c923ee1ab270c4),
|
|
];
|
|
|
|
let g0 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[1]),
|
|
DoubleDouble::from_bit_pair(Q[0]),
|
|
);
|
|
let g1 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[3]),
|
|
DoubleDouble::from_bit_pair(Q[2]),
|
|
);
|
|
let g2 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[5]),
|
|
DoubleDouble::from_bit_pair(Q[4]),
|
|
);
|
|
let g3 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[7]),
|
|
DoubleDouble::from_bit_pair(Q[6]),
|
|
);
|
|
let g4 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[9]),
|
|
DoubleDouble::from_bit_pair(Q[8]),
|
|
);
|
|
let g5 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[11]),
|
|
DoubleDouble::from_bit_pair(Q[10]),
|
|
);
|
|
let g6 = DoubleDouble::mul_add(
|
|
z,
|
|
DoubleDouble::from_bit_pair(Q[13]),
|
|
DoubleDouble::from_bit_pair(Q[12]),
|
|
);
|
|
|
|
let h0 = DoubleDouble::mul_add(z2, g1, g0);
|
|
let h1 = DoubleDouble::mul_add(z2, g3, g2);
|
|
let h2 = DoubleDouble::mul_add(z2, g5, g4);
|
|
|
|
let q0 = DoubleDouble::mul_add(z4, h1, h0);
|
|
let q1 = DoubleDouble::mul_add(z4, g6, h2);
|
|
|
|
let den = DoubleDouble::mul_add(z8, q1, q0);
|
|
let r = DoubleDouble::div(num, den);
|
|
|
|
DoubleDouble::quick_mult(r, zeta_sqrt)
|
|
}
|
|
|
|
#[inline]
|
|
fn erf_core(x: DoubleDouble) -> DoubleDouble {
|
|
// x is always positive, here, should be filtered out before the call
|
|
|
|
if x.hi <= 0.0095 {
|
|
// 0.0095
|
|
// for small |x| using taylor series first 3 terms
|
|
// Generated by SageMath:
|
|
// from mpmath import mp, erf
|
|
//
|
|
// mp.prec = 100
|
|
//
|
|
// def inverf_series(n_terms):
|
|
// from mpmath import taylor
|
|
// series_erf = taylor(mp.erfinv, 0, n_terms)
|
|
// return series_erf
|
|
//
|
|
// ser = inverf_series(10)
|
|
// for i in range(1, len(ser), 2):
|
|
// k = ser[i]
|
|
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
|
|
let z2 = DoubleDouble::quick_mult(x, x);
|
|
let p = f_fmla(
|
|
z2.hi,
|
|
f64::from_bits(0x3fb62847c47dda48),
|
|
f64::from_bits(0x3fc053c2c0ab91c5),
|
|
);
|
|
let mut r = DoubleDouble::mul_f64_add(
|
|
z2,
|
|
p,
|
|
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
|
|
);
|
|
r = DoubleDouble::mul_add(
|
|
z2,
|
|
r,
|
|
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
|
|
);
|
|
// (rh + rl) * z = rh * z + rl*z
|
|
let v = DoubleDouble::quick_mult(r, x);
|
|
return v;
|
|
} else if x.hi <= 0.06 {
|
|
// 0.06
|
|
// for |x| < 0.06 using taylor series first 5 terms
|
|
// Generated by SageMath:
|
|
// from mpmath import mp, erf
|
|
//
|
|
// mp.prec = 100
|
|
//
|
|
// def inverf_series(n_terms):
|
|
// from mpmath import taylor
|
|
// series_erf = taylor(mp.erfinv, 0, n_terms)
|
|
// return series_erf
|
|
//
|
|
// ser = inverf_series(10)
|
|
// for i in range(1, len(ser), 2):
|
|
// k = ser[i]
|
|
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
|
|
let z2 = DoubleDouble::quick_mult(x, x);
|
|
let p = f_polyeval4(
|
|
z2.hi,
|
|
f64::from_bits(0x3fb62847c47dda48),
|
|
f64::from_bits(0x3fb0a13189c6ef7a),
|
|
f64::from_bits(0x3faa7c85c89bb08b),
|
|
f64::from_bits(0x3fa5eeb1d488e312),
|
|
);
|
|
let mut r = DoubleDouble::mul_f64_add(
|
|
z2,
|
|
p,
|
|
DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)),
|
|
);
|
|
r = DoubleDouble::mul_add(
|
|
z2,
|
|
r,
|
|
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
|
|
);
|
|
r = DoubleDouble::mul_add(
|
|
z2,
|
|
r,
|
|
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
|
|
);
|
|
// (rh + rl) * z = rh * z + rl*z
|
|
let v = DoubleDouble::quick_mult(r, x);
|
|
return v;
|
|
}
|
|
|
|
if x.hi <= 0.75 {
|
|
// |x| < 0.75
|
|
|
|
// First step rational approximant is generated, but it's ill-conditioned, thus
|
|
// we're using taylor expansion to create Newton form at the point.
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[x]/x
|
|
// g[x_] =f[Sqrt[x]];
|
|
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
|
|
// num=Numerator[approx][[1]];
|
|
// den=Denominator[approx][[1]];
|
|
// poly=den;
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
// x0=SetPrecision[0.5625,75];
|
|
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
|
|
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
|
|
const P: [(u64, u64); 5] = [
|
|
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
|
|
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
|
|
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
|
|
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
|
|
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
|
|
];
|
|
let x2 = DoubleDouble::quick_mult(x, x);
|
|
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
|
|
let ps_num = f_polyeval5(
|
|
vz.hi,
|
|
f64::from_bits(0xbff433be821423d0),
|
|
f64::from_bits(0x3fdf15f19e9d8da4),
|
|
f64::from_bits(0xbfb770b6827e0829),
|
|
f64::from_bits(0x3f7a98a2980282bb),
|
|
f64::from_bits(0xbf142a246fd2c07c),
|
|
);
|
|
let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4]));
|
|
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3]));
|
|
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2]));
|
|
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1]));
|
|
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0]));
|
|
|
|
// Generated in Wolfram Mathematica:
|
|
// <<FunctionApproximations`
|
|
// ClearAll["Global`*"]
|
|
// f[x_]:=InverseErf[x]/x
|
|
// g[x_] =f[Sqrt[x]];
|
|
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
|
|
// num=Numerator[approx][[1]];
|
|
// den=Denominator[approx][[1]];
|
|
// coeffs=CoefficientList[poly,z];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
|
|
// x0=SetPrecision[0.5625,75];
|
|
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
|
|
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
|
|
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
|
|
const Q: [(u64, u64); 5] = [
|
|
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
|
|
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
|
|
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
|
|
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
|
|
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
|
|
];
|
|
|
|
let ps_den = f_polyeval5(
|
|
vz.hi,
|
|
f64::from_bits(0xbffbdaeff6fbb81c),
|
|
f64::from_bits(0x3fe91b12cf47da3a),
|
|
f64::from_bits(0xbfc7c5d0ffb7f1da),
|
|
f64::from_bits(0x3f939ada247f7609),
|
|
f64::from_bits(0xbf41be65038ccfe6),
|
|
);
|
|
|
|
let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4]));
|
|
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3]));
|
|
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2]));
|
|
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1]));
|
|
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0]));
|
|
let r = DoubleDouble::div(num, den);
|
|
let k = DoubleDouble::quick_mult(r, x);
|
|
let err = f_fmla(
|
|
k.hi,
|
|
f64::from_bits(0x3c70000000000000), // 2^-56
|
|
f64::from_bits(0x3c40000000000000), // 2^-59
|
|
);
|
|
let ub = k.hi + (k.lo + err);
|
|
let lb = k.hi + (k.lo - err);
|
|
if ub == lb {
|
|
return k;
|
|
}
|
|
return inverf_0p06_to_0p75(x);
|
|
}
|
|
|
|
let q = DoubleDouble::full_add_f64(-x, 1.0);
|
|
|
|
let mut zeta = fast_log_dd(q);
|
|
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
|
|
zeta = -zeta;
|
|
let zeta_sqrt = zeta.fast_sqrt();
|
|
let rz = zeta_sqrt.recip();
|
|
|
|
if x.hi < 0.9999 {
|
|
inverf_asympt_small(rz, zeta_sqrt)
|
|
} else {
|
|
inverf_asympt_long(rz, zeta_sqrt)
|
|
}
|
|
}
|
|
|
|
#[cold]
|
|
fn inverfc_extra_small(x: f64) -> DoubleDouble {
|
|
// Reversed order for erfinv with direct identity without subtraction.
|
|
let q = x;
|
|
|
|
let mut zeta = fast_log_d_to_dd(q);
|
|
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
|
|
zeta = -zeta;
|
|
let zeta_sqrt = zeta.fast_sqrt();
|
|
let rz = zeta_sqrt.recip();
|
|
if x >= 0.0001 {
|
|
inverf_asympt_small(rz, zeta_sqrt)
|
|
} else {
|
|
inverf_asympt_long(rz, zeta_sqrt)
|
|
}
|
|
}
|
|
|
|
/// Complementary inverse error function
|
|
pub fn f_erfcinv(x: f64) -> f64 {
|
|
let ix = x.to_bits();
|
|
|
|
if ix >= 0x4000000000000000u64 || ix == 0 {
|
|
// |x| == NaN, x == inf, |x| == 0, x < 0
|
|
if ix.wrapping_shl(1) == 0 {
|
|
return f64::INFINITY;
|
|
}
|
|
if ix == 0x4000000000000000u64 {
|
|
return f64::NEG_INFINITY;
|
|
}
|
|
return f64::NAN; // x == NaN, x == Inf, x > 2
|
|
}
|
|
|
|
if x == 1. {
|
|
return 0.;
|
|
}
|
|
|
|
// we compute erfcinv through identity
|
|
// erfcinv(x) = -erfinv(1-x)
|
|
|
|
static SIGN: [f64; 2] = [1.0, -1.0];
|
|
|
|
if x < 0.1 {
|
|
return inverfc_extra_small(x).to_f64();
|
|
}
|
|
|
|
let dx = if x > 1. {
|
|
DoubleDouble::from_full_exact_sub(2., x)
|
|
} else {
|
|
DoubleDouble::new(0., x)
|
|
};
|
|
let sign = SIGN[(x > 1.) as usize];
|
|
|
|
let mut dx = DoubleDouble::full_add_f64(-dx, 1.);
|
|
dx = DoubleDouble::from_exact_add(dx.hi, dx.lo);
|
|
erf_core(dx).to_f64() * sign
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_inverfc() {
|
|
assert_eq!(f_erfcinv(0.12), 1.0993909519492193);
|
|
assert_eq!(f_erfcinv(1.0000000000027623e-13), 5.261512368864527);
|
|
assert_eq!(f_erfcinv(1.0001200000182189), -0.00010634724760131264);
|
|
assert_eq!(f_erfcinv(0.7001200000182189), 0.2723481758403576);
|
|
assert_eq!(f_erfcinv(1.5231200000182189), -0.502985998867995);
|
|
assert_eq!(f_erfcinv(1.99545434324323243), -2.0064739778442213);
|
|
assert_eq!(f_erfcinv(1.), 0.);
|
|
assert!(f_erfcinv(2.05).is_nan());
|
|
assert!(f_erfcinv(-0.01).is_nan());
|
|
assert!(f_erfcinv(f64::NAN).is_nan());
|
|
assert!(f_erfcinv(f64::NEG_INFINITY).is_nan());
|
|
assert!(f_erfcinv(f64::INFINITY).is_nan());
|
|
}
|
|
}
|