Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1
vendor/pxfm/.cargo-checksum.json vendored Normal file

File diff suppressed because one or more lines are too long

25
vendor/pxfm/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,25 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "pxfm"
version = "0.1.24"
dependencies = [
"num-traits",
]

56
vendor/pxfm/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,56 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2024"
rust-version = "1.85"
name = "pxfm"
version = "0.1.24"
authors = ["Radzivon Bartoshyk"]
build = false
include = [
"/src/",
"/README.md",
"/LICENSE.md",
"/LICENSE-APACHE.md",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fast and accurate math"
documentation = "https://github.com/awxkee/pxfm"
readme = "README.md"
keywords = [
"libm",
"math",
]
categories = ["mathematics"]
license = "BSD-3-Clause OR Apache-2.0"
repository = "https://github.com/awxkee/pxfm"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[lib]
name = "pxfm"
path = "src/lib.rs"
[dependencies.num-traits]
version = "0.2"
[profile.dev.package.bessel]
opt-level = 3

201
vendor/pxfm/LICENSE-APACHE.md vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2024 Radzivon Bartoshyk
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

26
vendor/pxfm/LICENSE.md vendored Normal file
View File

@@ -0,0 +1,26 @@
Copyright (c) Radzivon Bartoshyk. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

5
vendor/pxfm/README.md vendored Normal file
View File

@@ -0,0 +1,5 @@
# Math routines
Mostly fast and accurate math.
Most of the methods have ULP less than 0.5.

349
vendor/pxfm/src/acos.rs vendored Normal file
View File

@@ -0,0 +1,349 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acospi::PI_OVER_TWO_F128;
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
/// Computes acos(x)
///
/// Max found ULP 0.5
pub fn f_acos(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-55.
if x_e < E_BIAS - 55 {
// When |x| < 2^-55, acos(x) = pi/2
return (x_abs + f64::from_bits(0x35f0000000000000)) + PI_OVER_TWO.hi;
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * p
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
// acos(x) = pi/2 - asin(x)
// ~ pi/2 - x * p
// = pi/2 - x * (p.hi + p.lo)
let r_hi = f_fmla(-x, p.hi, PI_OVER_TWO.hi);
// Use Dekker's 2SUM algorithm to compute the lower part.
let mut r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo;
r_lo = f_fmla(-x, p.lo, r_lo + PI_OVER_TWO.lo);
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
return acos_less_0p5_hard(x, x_sq);
}
// |x| >= 0.5
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// x = 1, acos(x) = 0,
// x = -1, acos(x) = pi
return if x == 1.0 {
0.0
} else {
f_fmla(-x_sign, PI.hi, PI.lo)
};
}
// |x| > 1, return NaN.
return f64::NAN;
}
// When |x| >= 0.5, we perform range reduction as follow:
//
// When 0.5 <= x < 1, let:
// y = acos(x)
// We will use the double angle formula:
// cos(2y) = 1 - 2 sin^2(y)
// and the complement angle identity:
// x = cos(y) = 1 - 2 sin^2 (y/2)
// So:
// sin(y/2) = sqrt( (1 - x)/2 )
// And hence:
// y/2 = asin( sqrt( (1 - x)/2 ) )
// Equivalently:
// acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) )
// Let u = (1 - x)/2, then:
// acos(x) = 2 * asin( sqrt(u) )
// Moreover, since 0.5 <= x < 1:
// 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
// And hence we can reuse the same polynomial approximation of asin(x) when
// |x| <= 0.5:
// acos(x) ~ 2 * sqrt(u) * P(u).
//
// When -1 < x <= -0.5, we reduce to the previous case using the formula:
// acos(x) = pi - acos(-x)
// = pi - 2 * asin ( sqrt( (1 + x)/2 ) )
// ~ pi - 2 * sqrt(u) * P(u),
// where u = (1 - |x|)/2.
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let r_hi;
let r_lo;
if x.is_sign_positive() {
r_hi = r0.hi;
r_lo = r0.lo;
} else {
let r = DoubleDouble::from_exact_add(PI.hi, -r0.hi);
r_hi = r.hi;
r_lo = (PI.lo - r0.lo) + r.lo;
}
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
acos_hard(x, u, v_hi, h, vh, vl)
}
#[cold]
#[inline(never)]
fn acos_hard(x: f64, u: f64, v_hi: f64, h: f64, vh: f64, vl: f64) -> f64 {
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
let m_v_p = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + m_v_p;
m_v.sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
// Perform computations in Float128:
// acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1,
// = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5.
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
let p_f128 = asin_eval_dyadic(y_f128, idx);
let mut r_f128 = m_v * p_f128;
if x.is_sign_negative() {
const PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
r_f128 = PI_F128 + r_f128;
}
r_f128.fast_as_f64()
}
#[cold]
#[inline(never)]
fn acos_less_0p5_hard(x: f64, x_sq: DoubleDouble) -> f64 {
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let mut x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * f64::from_bits(0xbf90000000000000),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
// Flip the sign of x_f128 to perform subtraction.
x_f128.sign = x_f128.sign.negate();
let r = PI_OVER_TWO_F128.quick_add(&x_f128.quick_mul(&p_f128));
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_acos_test() {
assert_eq!(f_acos(0.7), 0.7953988301841436);
assert_eq!(f_acos(-0.1), 1.6709637479564565);
assert_eq!(f_acos(-0.4), 1.9823131728623846);
}
}

191
vendor/pxfm/src/acosf.rs vendored Normal file
View File

@@ -0,0 +1,191 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use std::hint::black_box;
#[inline]
pub(crate) fn poly12(z: f64, c: [u64; 12]) -> f64 {
let z2 = z * z;
let z4 = z2 * z2;
let mut c0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
let mut c8 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
let c10 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
c0 = f_fmla(c2, z2, c0);
c4 = f_fmla(c6, z2, c4);
c8 = f_fmla(z2, c10, c8);
f_fmla(z4, f_fmla(z4, c8, c4), c0)
}
#[cold]
fn as_special(x: f32) -> f32 {
const PIH: f32 = f64::from_bits(0x400921fb60000000) as f32;
const PIL: f32 = -f64::from_bits(0x3e70000000000000) as f32;
let t = x.to_bits();
if t == (0x7fu32 << 23) {
return 0.0;
} // x=1
if t == (0x17fu32 << 23) {
return PIH + PIL;
} // x=-1
let ax = t.wrapping_shl(1);
if ax > (0xffu32 << 24) {
return x + x;
} // nan
f32::NAN
}
/// Compute acos
///
/// Max found ULP 0.49999982
#[inline]
pub fn f_acosf(x: f32) -> f32 {
const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
const O: [f64; 2] = [0., f64::from_bits(0x400921fb54442d18)];
let xs = x as f64;
let mut r: f64;
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax >= 0x7f << 24 {
return as_special(x);
}
if ax < 0x7ec2a1dcu32 {
// |x| < 0.880141
const B: [u64; 16] = [
0x3fefffffffd9ccb8,
0x3fc5555c94838007,
0x3fb32ded4b7c20fa,
0x3fa8566df703309e,
0xbf9980c959bec9a3,
0x3fe56fbb04998344,
0xc01403d8e4c49f52,
0x403b06c3e9f311ea,
0xc059ea97c4e2c21f,
0x407200b8261cc61b,
0xc082274c2799a5c7,
0x408a558a59cc19d3,
0xc08aca4b6a529ff0,
0x408228744703f813,
0xc06d7dbb0b322228,
0x4045c2018c0c0105,
];
/* avoid spurious underflow */
if ax < 0x40000000u32 {
// |x| < 2^-63
return PI2 as f32;
}
let z = xs;
let z2 = z * z;
let w0 = f_fmla(z2, f64::from_bits(B[1]), f64::from_bits(B[0]));
let w1 = f_fmla(z2, f64::from_bits(B[3]), f64::from_bits(B[2]));
let w2 = f_fmla(z2, f64::from_bits(B[5]), f64::from_bits(B[4]));
let w3 = f_fmla(z2, f64::from_bits(B[7]), f64::from_bits(B[6]));
let w4 = f_fmla(z2, f64::from_bits(B[9]), f64::from_bits(B[8]));
let w5 = f_fmla(z2, f64::from_bits(B[11]), f64::from_bits(B[10]));
let w6 = f_fmla(z2, f64::from_bits(B[13]), f64::from_bits(B[12]));
let w7 = f_fmla(z2, f64::from_bits(B[15]), f64::from_bits(B[14]));
let z4 = z2 * z2;
let z8 = z4 * z4;
let z16 = z8 * z8;
r = z
* ((f_fmla(z4, w1, w0) + z8 * f_fmla(z4, w3, w2))
+ z16 * (f_fmla(z4, w5, w4) + z8 * f_fmla(z4, w7, w6)));
let ub = f64::from_bits(0x3ff921fb54574191) - r;
let lb = f64::from_bits(0x3ff921fb543118a0) - r;
// Ziv's accuracy test
if ub == lb {
return ub as f32;
}
}
// accurate path
if ax < (0x7eu32 << 24) {
const C: [u64; 12] = [
0x3fc555555555529c,
0x3fb333333337e0dd,
0x3fa6db6db3b4465e,
0x3f9f1c72e13ac306,
0x3f96e89cebe06bc4,
0x3f91c6dcf5289094,
0x3f8c6dbbcc7c6315,
0x3f88f8dc2615e996,
0x3f7a5833b7bf15e8,
0x3f943f44ace1665c,
0xbf90fb17df881c73,
0x3fa07520c026b2d6,
];
if t == 0x328885a3u32 {
return black_box(f64::from_bits(0x3ff921fb60000000) as f32)
+ black_box(f64::from_bits(0x3e60000000000000) as f32);
}
if t == 0x39826222u32 {
return black_box(f64::from_bits(0x3ff920f6a0000000) as f32)
+ black_box(f64::from_bits(0x3e60000000000000) as f32);
}
let x2 = xs * xs;
r = f_fmla(-(xs * x2), poly12(x2, C), PI2 - xs);
} else {
const C: [u64; 12] = [
0x3ff6a09e667f3bcb,
0x3fbe2b7dddff2db9,
0x3f9b27247ab42dbc,
0x3f802995cc4e0744,
0x3f65ffb0276ec8ea,
0x3f5033885a928dec,
0x3f3911f2be23f8c7,
0x3f24c3c55d2437fd,
0x3f0af477e1d7b461,
0x3f0abd6bdff67dcb,
0xbef1717e86d0fa28,
0x3ef6ff526de46023,
];
let bx = xs.abs();
let z = 1.0 - bx;
let s = f64::copysign(z.sqrt(), xs);
r = f_fmla(s, poly12(z, C), O[t.wrapping_shr(31) as usize]);
}
r as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_acosf() {
assert_eq!(f_acosf(-0.5), 2.0943952);
assert_eq!(f_acosf(0.5), std::f32::consts::FRAC_PI_3);
assert!(f_acosf(7.).is_nan());
}
}

369
vendor/pxfm/src/acospi.rs vendored Normal file
View File

@@ -0,0 +1,369 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
pub(crate) const INV_PI_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc76b01ec5417056),
f64::from_bits(0x3fd45f306dc9c883),
);
// 1/PI with 128-bit precision generated by SageMath with:
// def format_hex(value):
// l = hex(value)[2:]
// n = 8
// x = [l[i:i + n] for i in range(0, len(l), n)]
// return "0x" + "'".join(x) + "_u128"
// r = 1/pi
// (s, m, e) = RealField(128)(r).sign_mantissa_exponent();
// print(format_hex(m));
pub(crate) const INV_PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xa2f9836e_4e441529_fc2757d1_f534ddc1_u128,
};
pub(crate) const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
/// Computes acos(x)/PI
///
/// Max ULP 0.5
pub fn f_acospi(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-55.
if x_e < E_BIAS - 55 {
// When |x| < 2^-55, acos(x) = pi/2
return f_fmla(f64::from_bits(0xbc80000000000000), x, 0.5);
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * p
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
// acos(x) = pi/2 - asin(x)
// ~ pi/2 - x * p
// = pi/2 - x * (p.hi + p.lo)
let mut r_hi = f_fmla(-x, p.hi, PI_OVER_TWO.hi);
// Use Dekker's 2SUM algorithm to compute the lower part.
let mut r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo;
r_lo = f_fmla(-x, p.lo, r_lo + PI_OVER_TWO.lo);
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r_hi), INV_PI_DD);
r_hi = p.hi;
r_lo = p.lo;
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let mut x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * f64::from_bits(0xbf90000000000000),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
// Flip the sign of x_f128 to perform subtraction.
x_f128.sign = x_f128.sign.negate();
let mut r = PI_OVER_TWO_F128.quick_add(&x_f128.quick_mul(&p_f128));
r = r.quick_mul(&INV_PI_F128);
return r.fast_as_f64();
}
// |x| >= 0.5
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// x = 1, acos(x) = 0,
// x = -1, acos(x) = pi
return if x == 1.0 { 0.0 } else { 1.0 };
}
// |x| > 1, return NaN.
return f64::NAN;
}
// When |x| >= 0.5, we perform range reduction as follow:
//
// When 0.5 <= x < 1, let:
// y = acos(x)
// We will use the double angle formula:
// cos(2y) = 1 - 2 sin^2(y)
// and the complement angle identity:
// x = cos(y) = 1 - 2 sin^2 (y/2)
// So:
// sin(y/2) = sqrt( (1 - x)/2 )
// And hence:
// y/2 = asin( sqrt( (1 - x)/2 ) )
// Equivalently:
// acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) )
// Let u = (1 - x)/2, then:
// acos(x) = 2 * asin( sqrt(u) )
// Moreover, since 0.5 <= x < 1:
// 0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
// And hence we can reuse the same polynomial approximation of asin(x) when
// |x| <= 0.5:
// acos(x) ~ 2 * sqrt(u) * P(u).
//
// When -1 < x <= -0.5, we reduce to the previous case using the formula:
// acos(x) = pi - acos(-x)
// = pi - 2 * asin ( sqrt( (1 + x)/2 ) )
// ~ pi - 2 * sqrt(u) * P(u),
// where u = (1 - |x|)/2.
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let mut r_hi;
let mut r_lo;
if x.is_sign_positive() {
r_hi = r0.hi;
r_lo = r0.lo;
} else {
let r = DoubleDouble::from_exact_add(PI.hi, -r0.hi);
r_hi = r.hi;
r_lo = (PI.lo - r0.lo) + r.lo;
}
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r_hi), INV_PI_DD);
r_hi = p.hi;
r_lo = p.lo;
let r_upper = r_hi + (r_lo + err);
let r_lower = r_hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let m_v_p = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + m_v_p;
m_v.sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
// Perform computations in Float128:
// acos(x) = (v_hi + v_lo + vll) * P(u) , when 0.5 <= x < 1,
// = pi - (v_hi + v_lo + vll) * P(u) , when -1 < x <= -0.5.
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
let p_f128 = asin_eval_dyadic(y_f128, idx);
let mut r_f128 = m_v * p_f128;
if x.is_sign_negative() {
const PI_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
r_f128 = PI_F128 + r_f128;
}
r_f128 = r_f128.quick_mul(&INV_PI_F128);
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn acospi_test() {
assert_eq!(f_acospi(0.5), 0.3333333333333333);
assert!(f_acospi(1.5).is_nan());
}
}

98
vendor/pxfm/src/acospif.rs vendored Normal file
View File

@@ -0,0 +1,98 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asinpif::ASINCOSF_PI_TABLE;
use crate::common::{dd_fmla, f_fmla};
/// Computes acos(x)/PI
///
/// Max ULP 0.5
#[inline]
pub fn f_acospif(x: f32) -> f32 {
let ax = x.abs();
let az = ax as f64;
let z = x as f64;
let t: u32 = x.to_bits();
let e: i32 = ((t >> 23) & 0xff) as i32;
if e >= 127 {
if x == 1.0 {
return 0.0;
}
if x == -1.0 {
return 1.0;
}
if e == 0xff && (t.wrapping_shl(9)) != 0 {
return x + x;
} // nan
return f32::NAN;
}
let s: i32 = 146i32.wrapping_sub(e);
let mut i = 0i32;
if s < 32 {
i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
}
let c = ASINCOSF_PI_TABLE[i as usize & 15];
let z2 = z * z;
let z4 = z2 * z2;
if i == 0 {
let mut c0 = f_fmla(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 += c2 * z4;
c4 += c6 * z4;
/* For |x| <= 0x1.0fd288p-127, c0 += c4*(z4*z4) would raise a spurious
underflow exception, we use an FMA instead, where c4 * z4 does not
underflow. */
c0 = dd_fmla(c4 * z4, z4, c0);
f_fmla(-z, c0, 0.5) as f32
} else {
let f = (1. - az).sqrt();
let mut c0 = f_fmla(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 += c2 * z2;
c4 += c6 * z2;
c0 += c4 * z4;
static SIGN: [f64; 2] = [0., 1.];
let r = SIGN[(t >> 31) as usize] + c0 * f64::copysign(f, x as f64);
r as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_acospif() {
assert_eq!(f_acospif(0.0), 0.5);
assert_eq!(f_acospif(0.5), 0.33333334);
assert_eq!(f_acospif(1.0), 0.0);
}
}

490
vendor/pxfm/src/asin.rs vendored Normal file
View File

@@ -0,0 +1,490 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
static ASIN_COEFFS: [[u64; 12]; 9] = [
[
0x3ff0000000000000,
0x0000000000000000,
0x3fc5555555555555,
0x3c65555555555555,
0x3fb3333333333333,
0x3fa6db6db6db6db7,
0x3f9f1c71c71c71c7,
0x3f96e8ba2e8ba2e9,
0x3f91c4ec4ec4ec4f,
0x3f8c99999999999a,
0x3f87a87878787878,
0x3f83fde50d79435e,
],
[
0x3ff015a397cf0f1c,
0xbc8eebd6ccfe3ee3,
0x3fc5f3581be7b08b,
0xbc65df80d0e7237d,
0x3fb4519ddf1ae530,
0x3fa8eb4b6eeb1696,
0x3fa17bc85420fec8,
0x3f9a8e39b5dcad81,
0x3f953f8df127539b,
0x3f91a485a0b0130a,
0x3f8e20e6e4930020,
0x3f8a466a7030f4c9,
],
[
0x3ff02be9ce0b87cd,
0x3c7e5d09da2e0f04,
0x3fc69ab5325bc359,
0xbc692f480cfede2d,
0x3fb58a4c3097aab1,
0x3fab3db36068dd80,
0x3fa3b94821846250,
0x3f9eedc823765d21,
0x3f998e35d756be6b,
0x3f95ea4f1b32731a,
0x3f9355115764148e,
0x3f916a5853847c91,
],
[
0x3ff042dc6a65ffbf,
0xbc8c7ea28dce95d1,
0x3fc74c4bd7412f9d,
0x3c5447024c0a3c87,
0x3fb6e09c6d2b72b9,
0x3faddd9dcdae5315,
0x3fa656f1f64058b8,
0x3fa21a42e4437101,
0x3f9eed0350b7edb2,
0x3f9b6bc877e58c52,
0x3f9903a0872eb2a4,
0x3f974da839ddd6d8,
],
[
0x3ff05a8621feb16b,
0xbc7e5b33b1407c5f,
0x3fc809186c2e57dd,
0xbc33dcb4d6069407,
0x3fb8587d99442dc5,
0x3fb06c23d1e75be3,
0x3fa969024051c67d,
0x3fa54e4f934aacfd,
0x3fa2d60a732dbc9c,
0x3fa149f0c046eac7,
0x3fa053a56dba1fba,
0x3f9f7face3343992,
],
[
0x3ff072f2b6f1e601,
0xbc92dcbb05419970,
0x3fc8d2397127aeba,
0x3c6ead0c497955fb,
0x3fb9f68df88da518,
0x3fb21ee26a5900d7,
0x3fad08e7081b53a9,
0x3fa938dd661713f7,
0x3fa71b9f299b72e6,
0x3fa5fbc7d2450527,
0x3fa58573247ec325,
0x3fa585a174a6a4ce,
],
[
0x3ff08c2f1d638e4c,
0x3c7b47c159534a3d,
0x3fc9a8f592078624,
0xbc6ea339145b65cd,
0x3fbbc04165b57aab,
0x3fb410df5f58441d,
0x3fb0ab6bdf5f8f70,
0x3fae0b92eea1fce1,
0x3fac9094e443a971,
0x3fac34651d64bc74,
0x3facaa008d1af080,
0x3fadc165bc0c4fc5,
],
[
0x3ff0a649a73e61f2,
0x3c874ac0d817e9c7,
0x3fca8ec30dc93890,
0xbc48ab1c0eef300c,
0x3fbdbc11ea95061b,
0x3fb64e371d661328,
0x3fb33e0023b3d895,
0x3fb2042269c243ce,
0x3fb1cce74bda2230,
0x3fb244d425572ce9,
0x3fb34d475c7f1e3e,
0x3fb4d4e653082ad3,
],
[
0x3ff0c152382d7366,
0xbc9ee6913347c2a6,
0x3fcb8550d62bfb6d,
0xbc6d10aec3f116d5,
0x3fbff1bde0fa3ca0,
0x3fb8e5f3ab69f6a4,
0x3fb656be8b6527ce,
0x3fb5c39755dc041a,
0x3fb661e6ebd40599,
0x3fb7ea3dddee2a4f,
0x3fba4f439abb4869,
0x3fbd9181c0fda658,
],
];
#[inline]
pub(crate) fn asin_eval(u: DoubleDouble, err: f64) -> (DoubleDouble, f64) {
// k = round(u * 32).
let k = (u.hi * f64::from_bits(0x4040000000000000)).round_finite();
let idx = k as u64;
// y = u - k/32.
let y_hi = f_fmla(k, f64::from_bits(0xbfa0000000000000), u.hi); // Exact
let y = DoubleDouble::from_exact_add(y_hi, u.lo);
let y2 = y.hi * y.hi;
// Add double-double errors in addition to the relative errors from y2.
let err = f_fmla(err, y2, f64::from_bits(0x3990000000000000));
let coeffs = ASIN_COEFFS[idx as usize];
let c0 = DoubleDouble::quick_mult(
y,
DoubleDouble::new(f64::from_bits(coeffs[3]), f64::from_bits(coeffs[2])),
);
let c1 = f_fmla(y.hi, f64::from_bits(coeffs[5]), f64::from_bits(coeffs[4]));
let c2 = f_fmla(y.hi, f64::from_bits(coeffs[7]), f64::from_bits(coeffs[6]));
let c3 = f_fmla(y.hi, f64::from_bits(coeffs[9]), f64::from_bits(coeffs[8]));
let c4 = f_fmla(y.hi, f64::from_bits(coeffs[11]), f64::from_bits(coeffs[10]));
let y4 = y2 * y2;
let d0 = f_fmla(y2, c2, c1);
let d1 = f_fmla(y2, c4, c3);
let mut r = DoubleDouble::from_exact_add(f64::from_bits(coeffs[0]), c0.hi);
let e1 = f_fmla(y4, d1, d0);
r.lo = f_fmla(y2, e1, f64::from_bits(coeffs[1]) + c0.lo + r.lo);
(r, err)
}
/// Computes asin(x)
///
/// Max found ULP 0.5
pub fn f_asin(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-26.
if x_e < E_BIAS - 26 {
// When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
// is:
// |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
// = x^2 / 6
// < 2^-54
// < epsilon(1)/2.
// = x otherwise. ,
if x.abs() == 0. {
return x;
}
// Get sign(x) * min_normal.
let eps = f64::copysign(f64::MIN_POSITIVE, x);
let normalize_const = if x_e == 0 { eps } else { 0.0 };
let scaled_normal =
f_fmla(x + normalize_const, f64::from_bits(0x4350000000000000), eps);
return f_fmla(
scaled_normal,
f64::from_bits(0x3c90000000000000),
-normalize_const,
);
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::from_exact_mult(x, p.hi);
let r_lo = f_fmla(x, p.lo, r0.lo);
let r_upper = r0.hi + (r_lo + err);
let r_lower = r0.hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * (f64::from_bits(0xbf90000000000000)),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
let r = x_f128.quick_mul(&p_f128);
return r.fast_as_f64();
}
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// return +- pi/2
return f_fmla(x_sign, PI_OVER_TWO.hi, x_sign * PI_OVER_TWO.lo);
}
// |x| > 1, return NaN.
if x.is_nan() {
return x;
}
return f64::NAN;
}
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
// Then,
// asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let r = DoubleDouble::from_exact_add(PI_OVER_TWO.hi, -r0.hi);
let r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
let (r_upper, r_lower);
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
r_upper = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, err));
r_lower = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, -err));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let r_lo = r_lo * x_sign;
let r_hi = r.hi * x_sign;
r_upper = r_hi + (r_lo + err);
r_lower = r.hi + (r_lo - err);
}
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
// vll = 2*v_ll = -vl * (h / (4u)).
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let mv0 = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + mv0;
m_v.sign = DyadicSign::Neg;
// Perform computations in Float128:
// asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let p_f128 = asin_eval_dyadic(y_f128, idx);
let r0_f128 = m_v.quick_mul(&p_f128);
let mut r_f128 = PI_OVER_TWO_F128.quick_add(&r0_f128);
if x.is_sign_negative() {
r_f128.sign = DyadicSign::Neg;
}
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_asin_test() {
assert_eq!(f_asin(-0.4), -0.41151684606748806);
assert_eq!(f_asin(-0.8), -0.9272952180016123);
assert_eq!(f_asin(0.3), 0.3046926540153975);
assert_eq!(f_asin(0.6), 0.6435011087932844);
}
}

1478
vendor/pxfm/src/asin_eval_dyadic.rs vendored Normal file

File diff suppressed because it is too large Load Diff

165
vendor/pxfm/src/asinf.rs vendored Normal file
View File

@@ -0,0 +1,165 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acosf::poly12;
use crate::common::{dd_fmlaf, f_fmla};
#[cold]
fn as_special(x: f32) -> f32 {
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax > (0xffu32 << 24) {
return x + x;
} // nan
f32::NAN
}
/// Computes asin
///
/// Max found ULP 0.49999928
#[inline]
pub fn f_asinf(x: f32) -> f32 {
const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
let xs = x as f64;
let mut r;
let t = x.to_bits();
let ax = t.wrapping_shl(1);
if ax > 0x7f << 24 {
return as_special(x);
}
if ax < 0x7ec29000u32 {
// |x| < 1.49029
if ax < 115 << 24 {
// |x| < 0.000244141
return dd_fmlaf(x, f64::from_bits(0x3e60000000000000) as f32, x);
}
const B: [u64; 16] = [
0x3ff0000000000005,
0x3fc55557aeca105d,
0x3fb3314ec3db7d12,
0x3fa775738a5a6f92,
0x3f75d5f7ce1c8538,
0x3fd605c6d58740f0,
0xc005728b732d73c6,
0x402f152170f151eb,
0xc04f962ea3ca992e,
0x40671971e17375a0,
0xc07860512b4ba230,
0x40826a3b8d4bdb14,
0xc0836f2ea5698b51,
0x407b3d722aebfa2e,
0xc066cf89703b1289,
0x4041518af6a65e2d,
];
let z = xs;
let z2 = z * z;
let w0 = f_fmla(z2, f64::from_bits(B[1]), f64::from_bits(B[0]));
let w1 = f_fmla(z2, f64::from_bits(B[3]), f64::from_bits(B[2]));
let w2 = f_fmla(z2, f64::from_bits(B[5]), f64::from_bits(B[4]));
let w3 = f_fmla(z2, f64::from_bits(B[7]), f64::from_bits(B[6]));
let w4 = f_fmla(z2, f64::from_bits(B[9]), f64::from_bits(B[8]));
let w5 = f_fmla(z2, f64::from_bits(B[11]), f64::from_bits(B[10]));
let w6 = f_fmla(z2, f64::from_bits(B[13]), f64::from_bits(B[12]));
let w7 = f_fmla(z2, f64::from_bits(B[15]), f64::from_bits(B[14]));
let z4 = z2 * z2;
let z8 = z4 * z4;
let z16 = z8 * z8;
r = z
* ((f_fmla(z4, w1, w0) + z8 * f_fmla(z4, w3, w2))
+ z16 * (f_fmla(z4, w5, w4) + z8 * f_fmla(z4, w7, w6)));
let ub = r;
let lb = r - z * f64::from_bits(0x3e0efa8eb0000000);
// Ziv's accuracy test
if ub == lb {
return ub as f32;
}
}
if ax < (0x7eu32 << 24) {
const C: [u64; 12] = [
0x3fc555555555529c,
0x3fb333333337e0dd,
0x3fa6db6db3b4465e,
0x3f9f1c72e13ac306,
0x3f96e89cebe06bc4,
0x3f91c6dcf5289094,
0x3f8c6dbbcc7c6315,
0x3f88f8dc2615e996,
0x3f7a5833b7bf15e8,
0x3f943f44ace1665c,
0xbf90fb17df881c73,
0x3fa07520c026b2d6,
];
let z = xs;
let z2 = z * z;
let c0 = poly12(z2, C);
r = z + (z * z2) * c0;
} else {
if ax == 0x7e55688au32 {
return f32::copysign(f64::from_bits(0x3fe75b8a20000000) as f32, x)
+ f32::copysign(f64::from_bits(0x3e50000000000000) as f32, x);
}
if ax == 0x7e107434u32 {
return f32::copysign(f64::from_bits(0x3fe1f4b640000000) as f32, x)
+ f32::copysign(f64::from_bits(0x3e50000000000000) as f32, x);
}
let bx = xs.abs();
let z = 1.0 - bx;
let s = z.sqrt();
const C: [u64; 12] = [
0x3ff6a09e667f3bcb,
0x3fbe2b7dddff2db9,
0x3f9b27247ab42dbc,
0x3f802995cc4e0744,
0x3f65ffb0276ec8ea,
0x3f5033885a928dec,
0x3f3911f2be23f8c7,
0x3f24c3c55d2437fd,
0x3f0af477e1d7b461,
0x3f0abd6bdff67dcb,
0xbef1717e86d0fa28,
0x3ef6ff526de46023,
];
r = PI2 - s * poly12(z, C);
r = f64::copysign(r, xs);
}
r as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asinf() {
assert_eq!(f_asinf(-0.5), -std::f32::consts::FRAC_PI_6);
assert_eq!(f_asinf(0.5), std::f32::consts::FRAC_PI_6);
assert!(f_asinf(7.).is_nan());
}
}

370
vendor/pxfm/src/asinpi.rs vendored Normal file
View File

@@ -0,0 +1,370 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::acospi::INV_PI_DD;
use crate::asin::asin_eval;
use crate::asin_eval_dyadic::asin_eval_dyadic;
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::round::RoundFinite;
/// Computes asin(x)/PI
///
/// Max found ULP 0.5
pub fn f_asinpi(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// |x| < 0.5.
if x_e < E_BIAS - 1 {
// |x| < 2^-26.
if x_e < E_BIAS - 26 {
// When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
// is:
// |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
// = x^2 / 6
// < 2^-54
// < epsilon(1)/2.
// = x otherwise. ,
if x.abs() == 0. {
return x;
}
if x_e < E_BIAS - 56 {
if (x_abs.to_bits().wrapping_shl(12)) == 0x59af9a1194efe000u64 {
let e = (x.to_bits() >> 52) & 0x7ff;
let h = f64::from_bits(0x3c7b824198b94a89);
let l = f64::from_bits(0x391fffffffffffff);
let mut t = (if x > 0. { 1.0f64 } else { -1.0f64 }).to_bits();
t = t.wrapping_sub(0x3c9u64.wrapping_sub(e).wrapping_shl(52));
return f_fmla(l, f64::from_bits(t), h * f64::from_bits(t));
}
let h = x * INV_PI_DD.hi;
let sx = x * f64::from_bits(0x4690000000000000); /* scale x */
let mut l = dd_fmla(sx, INV_PI_DD.hi, -h * f64::from_bits(0x4690000000000000));
l = dd_fmla(sx, INV_PI_DD.lo, l);
/* scale back */
let res = dyad_fmla(l, f64::from_bits(0x3950000000000000), h);
return res;
}
/* We use the Sollya polynomial 0x1.45f306dc9c882a53f84eafa3ea4p-2 * x
+ 0x1.b2995e7b7b606p-5 * x^3, with relative error bounded by 2^-106.965
on [2^-53, 2^-26] */
const C1H: f64 = f64::from_bits(0x3fd45f306dc9c883);
const C1L: f64 = f64::from_bits(0xbc76b01ec5417057);
const C3: f64 = f64::from_bits(0x3fab2995e7b7b606);
let h = C1H;
let l = dd_fmla(C3, x * x, C1L);
/* multiply h+l by x */
let hh = h * x;
let mut ll = dd_fmla(h, x, -hh);
/* hh+ll = h*x */
ll = dd_fmla(l, x, ll);
return hh + ll;
}
let x_sq = DoubleDouble::from_exact_mult(x, x);
let err = x_abs * f64::from_bits(0x3cc0000000000000);
// Polynomial approximation:
// p ~ asin(x)/x
let (p, err) = asin_eval(x_sq, err);
// asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
let mut r0 = DoubleDouble::from_exact_mult(x, p.hi);
let mut r_lo = f_fmla(x, p.lo, r0.lo);
r0 = DoubleDouble::mult(DoubleDouble::new(r_lo, r0.hi), INV_PI_DD);
r_lo = r0.lo;
let r_upper = r0.hi + (r_lo + err);
let r_lower = r0.hi + (r_lo - err);
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, perform 128-bit calculation.
// Recalculate mod 1/64.
let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// Get x^2 - idx/64 exactly. When FMA is available, double-double
// multiplication will be correct for all rounding modes. Otherwise, we use
// Float128 directly.
let x_f128 = DyadicFloat128::new_from_f64(x);
let u: DyadicFloat128;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// u = x^2 - idx/64
let u_hi = DyadicFloat128::new_from_f64(f_fmla(
idx as f64,
f64::from_bits(0xbf90000000000000),
x_sq.hi,
));
u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let x_sq_f128 = x_f128.quick_mul(&x_f128);
u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
idx as f64 * (f64::from_bits(0xbf90000000000000)),
));
}
let p_f128 = asin_eval_dyadic(u, idx);
let mut r = x_f128.quick_mul(&p_f128);
r = r.quick_mul(&crate::acospi::INV_PI_F128);
return r.fast_as_f64();
}
const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c91a62633145c07),
f64::from_bits(0x3ff921fb54442d18),
);
let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
// |x| >= 1
if x_e >= E_BIAS {
// x = +-1, asin(x) = +- pi/2
if x_abs == 1.0 {
// return +- pi/2
return x * 0.5; // asinpi_specific
}
// |x| > 1, return NaN.
if x.is_nan() {
return x;
}
return f64::NAN;
}
// u = (1 - |x|)/2
let u = f_fmla(x_abs, -0.5, 0.5);
// v_hi + v_lo ~ sqrt(u).
// Let:
// h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
// Then:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// ~ v_hi + h / (2 * v_hi)
// So we can use:
// v_lo = h / (2 * v_hi).
// Then,
// asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
let v_hi = u.sqrt();
let h;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
h = f_fmla(v_hi, -v_hi, u);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
h = (u - v_hi_sq.hi) - v_hi_sq.lo;
}
// Scale v_lo and v_hi by 2 from the formula:
// vh = v_hi * 2
// vl = 2*v_lo = h / v_hi.
let vh = v_hi * 2.0;
let vl = h / v_hi;
// Polynomial approximation:
// p ~ asin(sqrt(u))/sqrt(u)
let err = vh * f64::from_bits(0x3cc0000000000000);
let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
// Perform computations in double-double arithmetic:
// asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
let mut r = DoubleDouble::from_exact_add(PI_OVER_TWO.hi, -r0.hi);
let mut r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
let p = DoubleDouble::mult(DoubleDouble::new(r_lo, r.hi), INV_PI_DD);
r_lo = p.lo;
r.hi = p.hi;
let (r_upper, r_lower);
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
r_upper = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, err));
r_lower = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, -err));
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let r_lo = r_lo * x_sign;
let r_hi = r.hi * x_sign;
r_upper = r_hi + (r_lo + err);
r_lower = r.hi + (r_lo - err);
}
if r_upper == r_lower {
return r_upper;
}
// Ziv's accuracy test failed, we redo the computations in Float128.
// Recalculate mod 1/64.
let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
// After the first step of Newton-Raphson approximating v = sqrt(u), we have
// that:
// sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
// v_lo = h / (2 * v_hi)
// With error:
// sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
// = -h^2 / (2*v * (sqrt(u) + v)^2).
// Since:
// (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
// we can add another correction term to (v_hi + v_lo) that is:
// v_ll = -h^2 / (2*v_hi * 4u)
// = -v_lo * (h / 4u)
// = -vl * (h / 8u),
// making the errors:
// sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
// well beyond 128-bit precision needed.
// Get the rounding error of vl = 2 * v_lo ~ h / vh
// Get full product of vh * vl
let vl_lo;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
}
// vll = 2*v_ll = -vl * (h / (4u)).
let t = h * (-0.25) / u;
let vll = f_fmla(vl, t, vl_lo);
// m_v = -(v_hi + v_lo + v_ll).
let mv0 = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
let mut m_v = DyadicFloat128::new_from_f64(vh) + mv0;
m_v.sign = DyadicSign::Neg;
// Perform computations in Float128:
// asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
let y_f128 =
DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let p_f128 = asin_eval_dyadic(y_f128, idx);
let r0_f128 = m_v * p_f128;
let mut r_f128 = PI_OVER_TWO_F128 + r0_f128;
if x.is_sign_negative() {
r_f128.sign = DyadicSign::Neg;
}
r_f128 = r_f128.quick_mul(&crate::acospi::INV_PI_F128);
r_f128.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_asinpi_test() {
assert_eq!(
f_asinpi(-0.00000000032681723993732703),
-0.00000000010402915844735117
);
assert_eq!(f_asinpi(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017801371778309684), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005666352624669099);
assert_eq!(f_asinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000026752519513526076), 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008515591441480124);
assert_eq!(f_asinpi(-0.4), -0.13098988043445461);
assert_eq!(f_asinpi(-0.8), -0.2951672353008666);
assert_eq!(f_asinpi(0.4332432142124432), 0.14263088583055605);
assert_eq!(f_asinpi(0.8543543534343434), 0.326047108714517);
assert_eq!(f_asinpi(0.00323146509843243), 0.0010286090778797426);
}
}

262
vendor/pxfm/src/asinpif.rs vendored Normal file
View File

@@ -0,0 +1,262 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
pub(crate) static ASINCOSF_PI_TABLE: [[u64; 8]; 16] = [
[
0x3fd45f306dc9c882,
0x3fab2995e7b7dc2f,
0x3f98723a1cf50c7e,
0x3f8d1a4591d16a29,
0x3f83ce3aa68ddaee,
0x3f7d3182ab0cc1bf,
0x3f762b379a8b88e3,
0x3f76811411fcfec2,
],
[
0x3fdffffffffd3cda,
0xbfb17cc1b3355fdd,
0x3f9d067a1e8d5a99,
0xbf908e16fb09314a,
0x3f85eed43d42dcb2,
0xbf7f58baca7acc71,
0x3f75dab64e2dcf15,
0xbf659270e30797ac,
],
[
0x3fdfffffff7c4617,
0xbfb17cc149ded3a2,
0x3f9d0654d4cb2c1a,
0xbf908c3ba713d33a,
0x3f85d2053481079c,
0xbf7e485ebc545e7e,
0x3f7303baca167ddd,
0xbf5dee8d16d06b38,
],
[
0x3fdffffffa749848,
0xbfb17cbe71559350,
0x3f9d05a312269adf,
0xbf90862b3ee617d7,
0x3f85920708db2a73,
0xbf7cb0463b3862c3,
0x3f702b82478f95d7,
0xbf552a7b8579e729,
],
[
0x3fdfffffe1f92bb5,
0xbfb17cb3e74c64e3,
0x3f9d03af67311cbf,
0xbf9079441cbfc7a0,
0x3f852b4287805a61,
0xbf7ac3286d604a98,
0x3f6b2f1210d9701b,
0xbf4e740ddc25afd6,
],
[
0x3fdfffff92beb6e2,
0xbfb17c986fe9518b,
0x3f9cff98167c9a5e,
0xbf90638b591eae52,
0x3f84a0803828959e,
0xbf78adeca229f11d,
0x3f66b9a7ba05dfce,
0xbf4640521a43b2d0,
],
[
0x3fdffffeccee5bfc,
0xbfb17c5f1753f5ea,
0x3f9cf874e4fe258f,
0xbf9043e6cf77b256,
0x3f83f7db42227d92,
0xbf7691a6fa2a2882,
0x3f62f6543162bc61,
0xbf407d5da05822b6,
],
[
0x3fdffffd2f64431d,
0xbfb17bf8208c10c1,
0x3f9ced7487cdb124,
0xbf901a0d30932905,
0x3f83388f99b254da,
0xbf74844e245c65bd,
0x3f5fa777150197c6,
0xbf38c1ecf16a05c8,
],
[
0x3fdffffa36d1712e,
0xbfb17b523971bd4e,
0x3f9cddee26de2dee,
0xbf8fccb00abaaabc,
0x3f8269afc3622342,
0xbf72933152686752,
0x3f5a76d4956cc9a3,
0xbf32ce7d6dc651ce,
],
[
0x3fdffff5402ab3a1,
0xbfb17a5ba85da77a,
0x3f9cc96894e05c02,
0xbf8f532143cb832e,
0x3f819180b660ff09,
0xbf70c57417a78b3c,
0x3f562e26cbd7bb1e,
0xbf2ce28d33fe1df3,
],
[
0x3fdfffed8d639751,
0xbfb1790349f3ae76,
0x3f9caf9a4fd1b398,
0xbf8ec986b111342e,
0x3f80b53c3ad4baa4,
0xbf6e3c2282eeace4,
0x3f52a55369f55bbe,
0xbf2667fe48c396e8,
],
[
0x3fdfffe24b714161,
0xbfb177394fbcb719,
0x3f9c90652d920ebd,
0xbf8e3239197bddf1,
0x3f7fb2188525b025,
0xbf6b3aadd451afc7,
0x3f4f74020f31fdab,
0xbf218b0cb246768d,
],
[
0x3fdfffd298bec9e2,
0xbfb174efbfd34648,
0x3f9c6bcfe48ea92b,
0xbf8d8f9f2a16157c,
0x3f7e0044f56c8864,
0xbf6883e2347fe76c,
0x3f4a9f0e3c1b7af5,
0xbf1bb5acc0e60825,
],
[
0x3fdfffbd8b784c4d,
0xbfb1721abdd3722e,
0x3f9c41fee756d4b0,
0xbf8ce40bccf8065f,
0x3f7c59b684b70ef9,
0xbf66133d027996b3,
0x3f469cad01106397,
0xbf160f8e45494156,
],
[
0x3fdfffa23749cf88,
0xbfb16eb0a8285c06,
0x3f9c132d762e1b0d,
0xbf8c31a959398f4e,
0x3f7ac1c5b46bc8a0,
0xbf63e34f1abe51dc,
0x3f4346738737c0b9,
0xbf11b227a3f5c750,
],
[
0x3fdfff7fb25bb407,
0xbfb16aaa14d75640,
0x3f9bdfa75fca5ff2,
0xbf8b7a6e260d079c,
0x3f793ab06911033c,
0xbf61ee5560967fd5,
0x3f407d31060838bf,
0xbf0c96f33a283115,
],
];
/// Computes asin(x)/PI
///
/// Max ULP 0.5
#[inline]
pub fn f_asinpif(x: f32) -> f32 {
let ax = x.abs();
let az = ax as f64;
let z = x as f64;
let t = x.to_bits();
let e: i32 = ((t >> 23) & 0xff) as i32;
if e >= 127 {
// |x| >= 1 or nan
if ax == 1.0 {
return f32::copysign(0.5, x);
} // |x| = 1
if e == 0xff && (t.wrapping_shl(9)) != 0 {
return x + x;
} // x = nan
return f32::NAN; // |x| > 1
}
let s: i32 = 146i32.wrapping_sub(e);
let mut i = 0i32;
// s<32 corresponds to |x| >= 2^-12
if s < 32 {
i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
}
let z2 = z * z;
let z4 = z2 * z2;
let c = ASINCOSF_PI_TABLE[i as usize & 15];
if i == 0 {
// |x| < 2^-4
let mut c0 = f_fmla(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 = f_fmla(c2, z4, c0);
c4 = f_fmla(c6, z4, c4);
c0 += c4 * (z4 * z4);
(z * c0) as f32
} else {
// |x| >= 2^-4
let f = (1. - az).sqrt();
let mut c0 = f_fmla(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c2 = f_fmla(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
let mut c4 = f_fmla(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c6 = f_fmla(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
c0 = f_fmla(c2, z2, c0);
c4 = f_fmla(c6, z2, c4);
c0 += c4 * z4;
let r = f_fmla(
-c0,
f64::copysign(f, x as f64),
f64::copysign(0.5, x as f64),
);
r as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asinpif() {
assert_eq!(f_asinpif(0.0), 0.);
assert_eq!(f_asinpif(0.5), 0.16666667);
assert!(f_asinpif(1.5).is_nan());
}
}

292
vendor/pxfm/src/bessel/alpha0.rs vendored Normal file
View File

@@ -0,0 +1,292 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
//
/// See [bessel_0_asympt_alpha] for the info
pub(crate) fn bessel_0_asympt_alpha_hard(reciprocal: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 18] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x85555555_55555555_55555555_55555555_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xd6999999_99999999_99999999_9999999a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0xd1ac2492_49249249_24924924_92492492_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -123,
mantissa: 0xbbcd0fc7_1c71c71c_71c71c71_c71c71c7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x85e8fe45_8ba2e8ba_2e8ba2e8_ba2e8ba3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0x8b5a8f33_63c4ec4e_c4ec4ec4_ec4ec4ec_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xc7661d79_9d59b555_55555555_55555555_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xbbced715_c2897a28_78787878_78787878_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -96,
mantissa: 0xe14b19b4_aae3f7fe_be1af286_bca1af28_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xa7af7341_db2192db_975e0c30_c30c30c3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -82,
mantissa: 0x97a8f676_b349f6fc_5cefd338_590b2164_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -75,
mantissa: 0xa3d299fb_6f304d73_86e15f12_0fd70a3d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -68,
mantissa: 0xd050b737_cbc044ef_e8807e3c_87f43da1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -60,
mantissa: 0x9a02379b_daa7e492_854f42de_6d3dffe6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -52,
mantissa: 0x83011a39_380e467d_de6b70ec_b92ce0cc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -45,
mantissa: 0xfe16521f_c79e5d9a_a5bed653_e3844e9a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -36,
mantissa: 0x8b54b13d_3fb3e1c4_15dbb880_0bb32218_u128,
},
];
let x2 = reciprocal * reciprocal;
let mut p = C[17];
for i in (0..17).rev() {
p = x2 * p + C[i];
}
p * reciprocal
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_alpha(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0x3fc0000000000000),
(0x3c55555555555555, 0xbfb0aaaaaaaaaaab),
(0x3c5999999999999a, 0x3fcad33333333333),
(0xbc92492492492492, 0xbffa358492492492),
(0xbcbc71c71c71c71c, 0x403779a1f8e38e39),
(0xbd0745d1745d1746, 0xc080bd1fc8b1745d),
(0xbd7d89d89d89d89e, 0x40d16b51e66c789e),
(0x3dc5555555555555, 0xc128ecc3af33ab37),
(0x3e2143c3c3c3c3c4, 0x418779dae2b8512f),
(0x3df41e50d79435e5, 0xc1ec296336955c7f),
(0x3ef6dcbaf0618618, 0x4254f5ee683b6432),
(0x3f503a3102cc7a6f, 0xc2c2f51eced6693f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[11]),
DoubleDouble::from_bit_pair(C[10]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[9]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[8]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
let z = DoubleDouble::quick_mult(p, recip);
DoubleDouble::from_exact_add(z.hi, z.lo)
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_alpha_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 12] = [
0x3fc0000000000000,
0xbfb0aaaaaaaaaaab,
0x3fcad33333333333,
0xbffa358492492492,
0x403779a1f8e38e39,
0xc080bd1fc8b1745d,
0x40d16b51e66c789e,
0xc128ecc3af33ab37,
0x418779dae2b8512f,
0xc1ec296336955c7f,
0x4254f5ee683b6432,
0xc2c2f51eced6693f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[2]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[0]));
DoubleDouble::quick_mult(z, recip)
}

296
vendor/pxfm/src/bessel/alpha1.rs vendored Normal file
View File

@@ -0,0 +1,296 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_alpha_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 12] = [
0xbfd8000000000000,
0x3fc5000000000000,
0xbfd7bccccccccccd,
0x4002f486db6db6db,
0xc03e9fbf40000000,
0x4084997b55945d17,
0xc0d4a914195269d9,
0x412cd1b53816aec1,
0xc18aa4095d419351,
0x41ef809305f11b9d,
0xc2572e6809ed618b,
0x42c4c5b6057839f9,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[2]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(x2, z, f64::from_bits(C[0]));
DoubleDouble::quick_mult(z, recip)
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_alpha(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0xbfd8000000000000),
(0x0000000000000000, 0x3fc5000000000000),
(0x3c6999999999999a, 0xbfd7bccccccccccd),
(0x3cab6db6db6db6db, 0x4002f486db6db6db),
(0x0000000000000000, 0xc03e9fbf40000000),
(0x3d21745d1745d174, 0x4084997b55945d17),
(0x3d789d89d89d89d9, 0xc0d4a914195269d9),
(0xbdb999999999999a, 0x412cd1b53816aec1),
(0xbdfe5a5a5a5a5a5a, 0xc18aa4095d419351),
(0x3e7e0ca50d79435e, 0x41ef809305f11b9d),
(0xbedff8b720000000, 0xc2572e6809ed618b),
(0xbf64e5d8ae68b7a7, 0x42c4c5b6057839f9),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[11]),
DoubleDouble::from_bit_pair(C[10]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[9]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[8]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1));
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
let z = DoubleDouble::quick_mult(p, recip);
DoubleDouble::from_exact_add(z.hi, z.lo)
}
//
/// See [bessel_1_asympt_alpha] for the info
pub(crate) fn bessel_1_asympt_alpha_hard(reciprocal: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 18] = [
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0xc0000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xa8000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0xbde66666_66666666_66666666_66666666_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x97a436db_6db6db6d_b6db6db6_db6db6db_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xf4fdfa00_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa4cbdaac_a2e8ba2e_8ba2e8ba_2e8ba2e9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -113,
mantissa: 0xa548a0ca_934ec4ec_4ec4ec4e_c4ec4ec5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0xe68da9c0_b5760666_66666666_66666666_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -102,
mantissa: 0xd5204aea_0c9a8879_69696969_69696969_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xfc04982f_88dce9e0_ca50d794_35e50d79_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xb973404f_6b0c58ff_c5b90000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -82,
mantissa: 0xa62db02b_c1cfc563_44ea32e9_0b21642d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -75,
mantissa: 0xb220e7ff_443c1584_7e85f4e0_55eb851f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -68,
mantissa: 0xe10a255c_ca5e68cc_00c2d6c0_acdc8000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -60,
mantissa: 0xa573790c_5186f23b_5db502ea_d9fa5432_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -52,
mantissa: 0x8c0ffedc_407a7015_453df84e_9c3f1d39_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -44,
mantissa: 0x874226ed_c298a17a_d8c49a4e_dc9281a5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -36,
mantissa: 0x93cab36c_9ab9495c_310fa9cd_4b065359_u128,
},
];
let x2 = reciprocal * reciprocal;
let mut p = C[17];
for i in (0..17).rev() {
p = x2 * p + C[i];
}
p * reciprocal
}

157
vendor/pxfm/src/bessel/bessel_exp.rs vendored Normal file
View File

@@ -0,0 +1,157 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1};
use crate::round::RoundFinite;
#[inline(always)]
fn exp_poly(z: f64) -> DoubleDouble {
/* The following is a degree-4 polynomial generated by Sollya for exp(x)
over [-2^-12.905,2^-12.905]
with absolute error < 2^-74.34 (see sollya/Q_1.sollya). */
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555997996,
0x3fa5555555849d8d,
];
let mut q = dd_fmla(f64::from_bits(Q_1[4]), z, f64::from_bits(Q_1[3]));
q = dd_fmla(q, z, f64::from_bits(Q_1[2]));
let h0 = dd_fmla(q, z, f64::from_bits(Q_1[1]));
let v1 = DoubleDouble::from_exact_mult(z, h0);
DoubleDouble::f64_add(f64::from_bits(Q_1[0]), v1)
}
#[inline]
pub(crate) fn i0_exp(r: f64) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r * INVLOG2).round_finite();
const LOG_2E: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
let zh = f_fmla(LOG_2E.lo, k, f_fmla(-LOG_2E.hi, k, r));
let bk = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly(zh);
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = f64::from_bits(du).to_bits();
DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}
#[inline(always)]
fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
// Generated by Sollya:
// d = [-2^-12.905,2^-12.905];
// f = exp(x);
// w = 1;
// p = remez(f, 6, d, w);
// pf = fpminimax(f, [|0,1,2,3,4,5,6|], [|1, 107...|], d, absolute, floating, 0, p);
// err_p = -log2(dirtyinfnorm(pf*w-f, d));
// display = decimal;
const Q_1: [(u64, u64); 7] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3a20e40000000000, 0x3ff0000000000000),
(0x3a04820000000000, 0x3fe0000000000000),
(0xbc756423c5338a66, 0x3fc5555555555556),
(0xbc5560f74db5556c, 0x3fa5555555555556),
(0x3c3648eca89bc6ac, 0x3f8111111144fbee),
(0xbbd53d924ae90c8c, 0x3f56c16c16ffeecc),
];
let mut p = DoubleDouble::quick_mul_add(
z,
DoubleDouble::from_bit_pair(Q_1[6]),
DoubleDouble::from_bit_pair(Q_1[5]),
);
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[4]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[3]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[2]));
p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[1]));
DoubleDouble::quick_mul_add_f64(z, p, f64::from_bits(0x3ff0000000000000))
}
#[cold]
pub(crate) fn i0_exp_accurate(r: f64) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r * INVLOG2).round_finite();
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
let dx = f_fmla(-L2.hi, k, r);
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), k);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let bk = unsafe {
k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
};
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly_dd(dz);
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = f64::from_bits(du).to_bits();
DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0() {
assert_eq!(i0_exp(0.5).to_f64(), 1.6487212707001282);
assert_eq!(i0_exp_accurate(0.5).to_f64(), 1.6487212707001282);
}
}

260
vendor/pxfm/src/bessel/beta0.rs vendored Normal file
View File

@@ -0,0 +1,260 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_beta(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0xbfb0000000000000),
(0x0000000000000000, 0x3fba800000000000),
(0x0000000000000000, 0xbfe15f0000000000),
(0x0000000000000000, 0x4017651180000000),
(0x0000000000000000, 0xc05ab8c13b800000),
(0x0000000000000000, 0x40a730492f262000),
(0x0000000000000000, 0xc0fc73a7acd696f0),
(0xbdf3a00000000000, 0x41577458dd9fce68),
(0xbe4ba6b000000000, 0xc1b903ab9b27e18f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[7].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[6].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[5].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[4].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[2].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1));
p
}
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn bessel_0_asympt_beta_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 10] = [
0x3ff0000000000000,
0xbfb0000000000000,
0x3fba800000000000,
0xbfe15f0000000000,
0x4017651180000000,
0xc05ab8c13b800000,
0x40a730492f262000,
0xc0fc73a7acd696f0,
0x41577458dd9fce68,
0xc1b903ab9b27e18f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
);
DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[0]))
}
/// see [bessel_0_asympt_beta] for more info
pub(crate) fn bessel_0_asympt_beta_hard(recip: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -131,
mantissa: 0xd4000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x8af80000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -125,
mantissa: 0xbb288c00_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0xd5c609dc_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xb9824979_31000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -111,
mantissa: 0xe39d3d66_b4b78000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -105,
mantissa: 0xbba2c6ec_fe733d8c_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xc81d5cd9_3f0c79ba_6b000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -92,
mantissa: 0x86118ddf_c1ffc100_0ee1b000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xdc7ccfa9_930b874d_52df3464_00000000_u128,
},
];
let x2 = recip * recip;
let mut p = C[11];
for i in (0..11).rev() {
p = x2 * p + C[i];
}
p
}

264
vendor/pxfm/src/bessel/beta1.rs vendored Normal file
View File

@@ -0,0 +1,264 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::f_polyeval9;
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_beta_fast(recip: DoubleDouble) -> DoubleDouble {
const C: [u64; 10] = [
0x3ff0000000000000,
0x3fc8000000000000,
0xbfc8c00000000000,
0x3fe9c50000000000,
0xc01ef5b680000000,
0x40609860dd400000,
0xc0abae9b7a06e000,
0x41008711d41c1428,
0xc15ab70164c8be6e,
0x41bc1055e24f297f,
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let p = f_polyeval9(
x2.hi,
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
);
DoubleDouble::mul_f64_add_f64(x2, p, f64::from_bits(C[0]))
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn bessel_1_asympt_beta(recip: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000), // 1
(0x0000000000000000, 0x3fc8000000000000), // 2
(0x0000000000000000, 0xbfc8c00000000000), // 3
(0x0000000000000000, 0x3fe9c50000000000), // 4
(0x0000000000000000, 0xc01ef5b680000000), // 5
(0x0000000000000000, 0x40609860dd400000), // 6
(0x0000000000000000, 0xc0abae9b7a06e000), // 7
(0x0000000000000000, 0x41008711d41c1428), // 8
(0xbdf7a00000000000, 0xc15ab70164c8be6e),
(0xbe40e1f000000000, 0x41bc1055e24f297f),
];
// Doing (1/x)*(1/x) instead (1/(x*x)) to avoid spurious overflow/underflow
let x2 = DoubleDouble::quick_mult(recip, recip);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[7].1)); // 8
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[6].1)); // 7
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[5].1)); // 6
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[4].1)); // 5
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[3].1)); // 4
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[2].1)); // 3
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[1].1)); // 2
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(C[0].1)); // 1
p
}
/// see [bessel_1_asympt_beta] for more info
pub(crate) fn bessel_1_asympt_beta_hard(recip: DyadicFloat128) -> DyadicFloat128 {
static C: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xc0000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -130,
mantissa: 0xc6000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xce280000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -125,
mantissa: 0xf7adb400_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x84c306ea_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -116,
mantissa: 0xdd74dbd0_37000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x84388ea0_e0a14000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xd5b80b26_45f372f4_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0xe082af12_794bf6f1_e1000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0x94a06149_f30146bc_fe8ed000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xf212edfc_42a62526_4fac2b0c_00000000_u128,
},
];
let x2 = recip * recip;
let mut p = C[11];
for i in (0..11).rev() {
p = x2 * p + C[i];
}
p
}

1118
vendor/pxfm/src/bessel/i0.rs vendored Normal file

File diff suppressed because it is too large Load Diff

834
vendor/pxfm/src/bessel/i0e.rs vendored Normal file
View File

@@ -0,0 +1,834 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::bessel_exp::i0_exp_accurate;
use crate::bessel::i0::{
bessel_rsqrt_hard, eval_small_hard_3p6_to_7p5, i0_0_to_3p6_dd, i0_0_to_3p6_hard,
i0_3p6_to_7p5_dd,
};
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes exp(-|x|)*I0(x)
pub fn f_i0e(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// |x| <= 2.2204460492503131e-24f64
return 1.;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I0(x)Exp[-x] ~ 1 - x + O(x^2)
return 1. - x;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x = NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if xb <= 0x4023000000000000u64 {
// |x| <= 9.5
if xb <= 0x400ccccccccccccdu64 {
// |x| <= 3.6
return i0e_0_to_3p6_exec(f64::from_bits(xb));
} else if xb <= 0x401e000000000000u64 {
// |x| <= 7.5
return i0e3p6_to_7p5(f64::from_bits(xb));
}
return i0e_7p5_to_9p5(f64::from_bits(xb));
}
i0e_asympt(f64::from_bits(xb))
}
/**
Computes I0 on interval [-7.5; -3.6], [3.6; 7.5]
**/
#[inline]
fn i0e3p6_to_7p5(x: f64) -> f64 {
let mut r = i0_3p6_to_7p5_dd(x);
let v_exp = i0_exp(-x);
r = DoubleDouble::quick_mult(r, v_exp);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c56a09e667f3bcd), // 2^-57.5
f64::from_bits(0x3c00000000000000), // 2^-63
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return ub;
}
let v = eval_small_hard_3p6_to_7p5(x);
let v_exp_accurate = i0_exp_accurate(-x);
DoubleDouble::quick_mult(v, v_exp_accurate).to_f64()
}
#[inline]
fn i0e_0_to_3p6_exec(x: f64) -> f64 {
let mut r = i0_0_to_3p6_dd(x);
let v_exp = i0_exp(-x);
r = DoubleDouble::quick_mult(r, v_exp);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-66
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return ub;
}
let v = i0_0_to_3p6_hard(x);
let v_exp_accurate = i0_exp_accurate(-x);
DoubleDouble::quick_mult(v, v_exp_accurate).to_f64()
}
/**
Mid-interval [7.5;9.5] generated by Wolfram:
I0(x)=R(1/x)/sqrt(x)*Exp(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},11,11},WorkingPrecision->120]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=den;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0e_7p5_to_9p5(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0x3c778e3de1f76f48, 0x3fd988450531281b),
(0xbcb572f6149f389e, 0xc01a786676fb4d3a),
(0x3cf2f373365347ed, 0x405c0e8405fdb642),
(0x3d276a94c8f1e627, 0xc0885e4718dfb761),
(0x3d569f8a993434e2, 0x40b756d52d5fa90c),
(0xbd6f953f7dd1a223, 0xc0c8818365c47790),
(0xbd74247967fbf7b2, 0x40e8cf89daf87353),
(0x3db449add7abb056, 0x41145d3c2d96d159),
(0xbdc5cc822b71f891, 0xc123694c58fd039b),
(0x3da2047ac1a6fba8, 0x415462e630bf3e7e),
(0xbdc2f2c06eda6a95, 0xc14c6984ebdd6792),
(0xbdf51fa85dafeca5, 0x4166a437c202d27b),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cde08e4cbf324d1, 0xc030b67bd69af0ca),
(0x3cec5e4ee7e77024, 0x4071b54c0f58409c),
(0xbd340e1131739e2f, 0xc09f140a738b14b3),
(0x3d607673189d6644, 0x40cdb44bd822add2),
(0xbd7793a4f1dd74d1, 0xc0e03fe2689b802d),
(0xbd8415501228a87e, 0x410009beafea72cc),
(0x3dcecdac2702661f, 0x4128c2073da9a447),
(0xbdd8386404f3dec5, 0xc1389ec7d7186bf4),
(0xbe06eb53a3e86436, 0x4168b7a2dc85ed0b),
(0x3e098e2cefaf8299, 0xc1604f8cf34af02c),
(0x3e1a5e496b547001, 0x41776b1e0153d1e9),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3bc0000000000000),
f64::from_bits(0x392bdb8cdadbe111),
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up != lb {
return i0e_7p5_to_9p5_hard(x);
}
r.to_f64()
}
/**
Mid-interval [7.5;9.5] generated by Wolfram Mathematica:
I0(x)=R(1/x)/sqrt(x)*Exp(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/9.5,1/7.5},13,13},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i0e_7p5_to_9p5_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 14] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc422a04_45cde144_75a3800b_45c38460_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xada66144_fcccc1a3_036f76b2_cabd6281_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xeabdda02_fa201d98_10e58d1f_7eb62bd7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -116,
mantissa: 0xbbfd3297_6f88a7df_5924587b_d5bdcdb8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xfca29453_efe393bf_1651627b_7d543875_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -110,
mantissa: 0xee7c7220_bbbd248e_bb6adac6_f9a5ce95_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -107,
mantissa: 0xc07455dd_830ba705_414408c6_06732a5a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xe2247793_b50cd0f0_80e8981d_933f75da_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0xe14a9831_82582a0b_dd27e8b6_4ed9aac2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0xa3b2ae2f_5b64f37e_c1538435_34f02faf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -100,
mantissa: 0xbab73503_5b7e38d9_bbe4a84b_9007c6e8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xa68911fc_5d87bbe7_0d4fe854_5c681ac5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0x9e997222_55ef4045_fa9f311d_57d082a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -99,
mantissa: 0xbe93656a_b0a4f32d_3ebbfdeb_b1cbb839_u128,
},
];
static Q: [DyadicFloat128; 14] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xdaa34a7e_861dddff_a0642080_cd83dd65_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x93f05740_f4758772_bb9992f9_91e72795_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -115,
mantissa: 0xeddcb810_054c9aab_fa7e4214_d59d18b0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xa0180fcd_831ff6c0_ac2b8f02_37f3cfd1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0x97d25106_3b66907e_90b4f786_26daa0bb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -106,
mantissa: 0xf595ce38_aac16c11_001b874a_99603b45_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -103,
mantissa: 0x912b3715_4aca68f6_5821c2ed_43d77111_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -101,
mantissa: 0x90f97141_b896e2b6_38b87354_8945a43c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xd3e5f967_89097d6b_3a3060fe_852ff580_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0xf0d6de35_939da009_9ced21fd_48af7281_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xd2a0b183_6ac613b2_6745ce1d_8ed1c323_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -98,
mantissa: 0xbb9c089a_b7e939a2_732b3fb5_2e66cd77_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xcb2107c3_736bef81_609718c0_ba82cd8e_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[13];
for i in (0..13).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[13];
for i in (0..13).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64()
}
/**
I0(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},11,11},WorkingPrecision->120]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]]
```
**/
#[inline]
fn i0e_asympt(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc7ca19c5d824c54, 0x3fd9884533d43651),
(0x3cca32eb734e010e, 0xc03b7ca35caf42eb),
(0x3d03af8238d6f25e, 0x408b92cfcaa7070f),
(0xbd7a8ff7fdebed70, 0xc0d0a3be432cce93),
(0xbdababdb579bb076, 0x410a77dc51f1804d),
(0x3dc5e4e3c972832a, 0xc13cb0be2f74839c),
(0x3e01076f9b102e38, 0x41653b064cc61661),
(0xbe2157e700d445f4, 0xc184e1b076927460),
(0xbdfa4577156dde56, 0x41999e9653f9dc5f),
(0xbe47aa238a739ffe, 0xc1a130f6ded40c00),
(0xbe331b560b6fbf4a, 0x419317f11e674cae),
(0xbe0765596077d1e3, 0xc16024404db59d3f),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcf687703e843d07, 0xc051418f1c4dd0b9),
(0x3d468ab92cb87a0b, 0x40a15891d823e48d),
(0x3d8bfc17e5183376, 0xc0e4fce40dd82796),
(0xbdccbbcc2ecf8d4c, 0x4120beef869c61ec),
(0xbdf42170b4d5d150, 0xc1523ad18834a7ed),
(0xbe0eaa32f405afd4, 0x417b24ec57a8f480),
(0x3e3ec900705e7252, 0xc19af2a91d23d62e),
(0x3e3e220e274fa46b, 0x41b0cb905cc99ff5),
(0xbe46c6c61dee11f6, 0xc1b7452e50518520),
(0x3e3ed0fc063187bf, 0x41ac1772d1749896),
(0xbe11c578f04f4be1, 0xc180feb5b2ca47cb),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-65
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up != lb {
return i0e_asympt_hard(x);
}
lb
}
/**
I0(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx, err1}=MiniMaxApproximation[g[z],{z,{1/709.3,1/9.5},15,15},WorkingPrecision->120]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i0e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_7e5aad4a_70b749c4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0xabb4209d_ca11bdaa_186bef7f_390e6b77_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x8a2725e2_4749db25_625ad1f2_12a2a16c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -111,
mantissa: 0x8b4c2cd4_9e5d0c8b_c9be4d3e_781bb676_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -107,
mantissa: 0xc33fad7c_40599f7d_713e3081_6b5ad791_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -103,
mantissa: 0xc81da271_b623ba88_0be032b5_827d92fa_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -99,
mantissa: 0x99ec4975_b6aa7cae_7692a287_ed8ae47c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -96,
mantissa: 0xb3aa4745_fc2dd441_2dbd3e3c_d4539687_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -93,
mantissa: 0x9f14edc2_6882afca_29d2a067_dc459729_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -91,
mantissa: 0xd35c4d01_78d8cec6_fc8ae0ee_834da837_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xcdc529c7_6e082342_faad3073_07a9b61f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8ccac88f_2598c8a6_423b1f42_63591cb9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xfc044cfb_a20f0885_93d58660_17819ed5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0x813a700c_74d23f52_f81b179d_7ff0da9f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xe6c43da4_297216bf_bdd987cb_636906cf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -91,
mantissa: 0xa4998323_649c3cf2_64477869_3d2c6afd_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0xd772d5fd_a7077638_6e007274_d83b013c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xad914ef0_451ced2e_515657ef_fc7eeb53_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -110,
mantissa: 0xaf41180c_dffe96e5_f192fa40_0b1bff1e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -106,
mantissa: 0xf60dc728_241f71fd_5b93e653_ccbedace_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -102,
mantissa: 0xfcaefef9_39cf96e7_3cb75a98_da5e9761_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -98,
mantissa: 0xc2d2c837_5789587a_13ef38c6_a24c3413_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -95,
mantissa: 0xe41725c3_51d14486_a650044e_e8588f7b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -92,
mantissa: 0xcabeed9b_5e2e888d_81d32b11_d289a624_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0x8764876d_11ad6607_8a8d5382_3ffe82d9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0x84c9f9e5_6a5f5034_ad2c8512_16cb1ba1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb7c1d143_a15d8aab_03a7fa3e_b7d07a36_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0xa78f8257_4658040f_7a1ad39c_91ea9483_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xb231e0ca_b729a404_44c38f52_be208507_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xae317981_42349081_8bc68b28_f69b8e49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xb451abd3_5cd79c6d_7e578164_32f16da1_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0e() {
assert_eq!(f_i0e(0.00000000000000000000000000052342), 1.0);
assert_eq!(f_i0e(f64::EPSILON), 0.9999999999999998);
assert_eq!(f_i0e(9.500000000005492,), 0.13125126081422883);
assert!(f_i0e(f64::NAN).is_nan());
assert_eq!(f_i0e(f64::INFINITY), 0.);
assert_eq!(f_i0e(f64::NEG_INFINITY), 0.);
assert_eq!(f_i0e(7.500000000788034), 0.14831583006929877);
assert_eq!(f_i0e(715.), 0.014922205745802662);
assert_eq!(f_i0e(12.), 0.11642622121344044);
assert_eq!(f_i0e(16.), 0.10054412736125203);
assert_eq!(f_i0e(18.432), 0.09357372647647);
assert_eq!(f_i0e(26.432), 0.07797212360059241);
assert_eq!(f_i0e(0.2), 0.8269385516343293);
assert_eq!(f_i0e(7.5), 0.14831583007739552);
assert_eq!(f_i0e(-1.5), 0.36743360905415834);
assert_eq!(i0e_asympt_hard(18.432), 0.09357372647647);
}
}

335
vendor/pxfm/src/bessel/i0ef.rs vendored Normal file
View File

@@ -0,0 +1,335 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{
f_estrin_polyeval5, f_estrin_polyeval7, f_estrin_polyeval8, f_polyeval6, f_polyeval10,
};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes exp(-|x|)*I0(x)
///
/// Max ULP 0.5
pub fn f_i0ef(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb <= 0x40f00000u32 {
// |x| <= 7.5
let core_expf = core_expf(-f32::from_bits(xb));
if xb < 0x3f800000u32 {
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I0(x) * exp(-x) ~ 1 - x + O(x^2)
return 1. - x;
}
// |x| < 1
return i0f_small(f32::from_bits(xb), core_expf);
} else if xb <= 0x40600000u32 {
// |x| <= 3.5
return i0ef_1_to_3p5(f32::from_bits(xb), core_expf);
} else if xb <= 0x40c00000u32 {
// |x| <= 6
return i0f_3p5_to_6(f32::from_bits(xb), core_expf);
}
return i0f_6_to_7p5(f32::from_bits(xb), core_expf);
}
i0ef_asympt(f32::from_bits(xb))
}
/**
How polynomial is obtained described at [i0f_1_to_7p5].
Computes I0(x) as follows:
I0(x) = 1 + (x/2)^2 * P(x)
This method valid only [0;1]
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i0f_small(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff000000000013a),
f64::from_bits(0x3fcffffffffc20b6),
f64::from_bits(0x3f9c71c71e6cd6a2),
f64::from_bits(0x3f5c71c65b0af15f),
f64::from_bits(0x3f1234796fceb081),
f64::from_bits(0x3ec0280faf31678c),
f64::from_bits(0x3e664fd494223545),
);
(f_fmla(p, eval_x, 1.) * v_exp) as f32
}
/**
Computes I0.
/// Valid only on interval [1;3.5]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{1,3.5},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0ef_1_to_3p5(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffffb69),
f64::from_bits(0x3fc9ed7bd9dc97a7),
f64::from_bits(0x3f915c14693c842e),
f64::from_bits(0x3f45c6dc6a719e42),
f64::from_bits(0x3eeacb79eba725f7),
f64::from_bits(0x3e7b51e2acfc4355),
);
let p_den = f_estrin_polyeval5(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa84a10988f28eb),
f64::from_bits(0x3f50f5599197a4be),
f64::from_bits(0xbeea420cf9b13b1b),
f64::from_bits(0x3e735d0c1eb6ed7d),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
// Valid only on interval [6;7]
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{6,7},7,6},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_6_to_7p5(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_estrin_polyeval8(
eval_x,
f64::from_bits(0x3fefffffffffff7d),
f64::from_bits(0x3fcb373b00569ccf),
f64::from_bits(0x3f939069c3363b81),
f64::from_bits(0x3f4c2095c90c66b3),
f64::from_bits(0x3ef6713f648413db),
f64::from_bits(0x3e947efa2f9936b4),
f64::from_bits(0x3e2486a182f49420),
f64::from_bits(0x3da213034a33de33),
);
let p_den = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa32313fea59d9e),
f64::from_bits(0x3f460594c2ec6706),
f64::from_bits(0xbedf725fb714690f),
f64::from_bits(0x3e6d9cb39b19555c),
f64::from_bits(0xbdf1900e3abcb7a6),
f64::from_bits(0x3d64a21a2ea78ef6),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
// Valid only on interval [3.5;6]
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{3.5,6},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_3p5_to_6(x: f32, v_exp: f64) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffd9550),
f64::from_bits(0x3fc97e18ee033fb4),
f64::from_bits(0x3f90b3199079bce1),
f64::from_bits(0x3f442c300a425372),
f64::from_bits(0x3ee7831030ae18ca),
f64::from_bits(0x3e76387d67354932),
);
let p_den = f_polyeval6(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfaa079c484e406a),
f64::from_bits(0x3f5452098f1556fb),
f64::from_bits(0xbef33efb4a8128ac),
f64::from_bits(0x3e865996e19448ca),
f64::from_bits(0xbe09acbb64533c3e),
);
(f_fmla(p_num / p_den, eval_x, 1.) * v_exp) as f32
}
/**
Asymptotic expansion for I0.
Computes:
sqrt(x) * exp(-x) * I0(x) = Pn(1/x)/Qn(1/x)
hence:
I0(x)exp(-x) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1/7.5},9,9},WorkingPrecision->70]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=num;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=den;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d4364f),
f64::from_bits(0xc02ed6c9269921a7),
f64::from_bits(0x4070ee77ffed64a5),
f64::from_bits(0xc0a4ffd558b06889),
f64::from_bits(0x40cf2633e2840f6f),
f64::from_bits(0xc0ea813a9ba42b84),
f64::from_bits(0x40f569bf5d63eb8c),
f64::from_bits(0xc0b3138874cdd180),
f64::from_bits(0xc0fa3152ed485937),
f64::from_bits(0x40ddaccbed454f47),
);
let p_den = f_polyeval10(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc0436352c350b88c),
f64::from_bits(0x40855eaa17b05edd),
f64::from_bits(0xc0baa46f155bd266),
f64::from_bits(0x40e3e9fd90a2e695),
f64::from_bits(0xc1012dc621dfc1e8),
f64::from_bits(0x410cafeea713e8ce),
f64::from_bits(0xc0e0a3ee0077d7f7),
f64::from_bits(0xc110bcced6a39e9e),
f64::from_bits(0x40f9a1e4a91be4d6),
);
let z = p_num / p_den;
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0f() {
assert!(f_i0ef(f32::NAN).is_nan());
assert_eq!(f_i0ef(f32::NEG_INFINITY), 0.);
assert_eq!(f_i0ef(f32::INFINITY), 0.);
assert_eq!(f_i0ef(1.), 0.4657596);
assert_eq!(f_i0ef(5.), 0.1835408);
assert_eq!(f_i0ef(16.), 0.100544125);
assert_eq!(f_i0ef(32.), 0.070804186);
assert_eq!(f_i0ef(92.0), 0.04164947);
assert_eq!(f_i0ef(0.), 1.0);
assert_eq!(f_i0ef(28.), 0.075736605);
assert_eq!(f_i0ef(-28.), 0.075736605);
assert_eq!(f_i0ef(-32.), 0.070804186);
assert_eq!(f_i0ef(-92.0), 0.04164947);
assert_eq!(f_i0ef(-0.), 1.0);
}
}

354
vendor/pxfm/src/bessel/i0f.rs vendored Normal file
View File

@@ -0,0 +1,354 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{
f_estrin_polyeval5, f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval6,
};
/// Modified Bessel of the first kind of order 0
///
/// Max ULP 0.5
pub fn f_i0f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return f32::INFINITY;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb >= 0x42b7cd32u32 {
// |x| >= 91.90077
return f32::INFINITY;
}
if xb < 0x40f00000u32 {
// |x| < 7.5
if xb < 0x3f800000u32 {
// |x| < 1
if xb <= 0x34000000u32 {
// |x| < f32::EPSILON
// taylor series for I0(x) ~ 1 + x^2/4 + O(x^4)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
return f_fmlaf(x, x * 0.25, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let dx = x as f64;
return f_fmla(dx, dx * 0.25, 1.) as f32;
}
}
return i0f_small(f32::from_bits(xb)) as f32;
} else if xb <= 0x40600000u32 {
// |x| < 3.5
return i0f_1_to_3p5(f32::from_bits(xb));
} else if xb <= 0x40c00000u32 {
// |x| < 6
return i0f_3p5_to_6(f32::from_bits(xb));
}
return i0f_6_to_7p5(f32::from_bits(xb));
}
i0f_asympt(f32::from_bits(xb))
}
/**
How polynomial is obtained described at [i0f_1_to_7p5].
Computes I0(x) as follows:
I0(x) = 1 + (x/2)^2 * P(x)
This method valid only [0;1]
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i0f_small(x: f32) -> f64 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff000000000013a),
f64::from_bits(0x3fcffffffffc20b6),
f64::from_bits(0x3f9c71c71e6cd6a2),
f64::from_bits(0x3f5c71c65b0af15f),
f64::from_bits(0x3f1234796fceb081),
f64::from_bits(0x3ec0280faf31678c),
f64::from_bits(0x3e664fd494223545),
);
f_fmla(p, eval_x, 1.)
}
/**
Computes I0.
/// Valid only on interval [1;3.5]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{1,3.5},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_1_to_3p5(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffffb69),
f64::from_bits(0x3fc9ed7bd9dc97a7),
f64::from_bits(0x3f915c14693c842e),
f64::from_bits(0x3f45c6dc6a719e42),
f64::from_bits(0x3eeacb79eba725f7),
f64::from_bits(0x3e7b51e2acfc4355),
);
let p_den = f_estrin_polyeval5(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa84a10988f28eb),
f64::from_bits(0x3f50f5599197a4be),
f64::from_bits(0xbeea420cf9b13b1b),
f64::from_bits(0x3e735d0c1eb6ed7d),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
// Valid only on interval [6;7]
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{6,7},7,6},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_6_to_7p5(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_estrin_polyeval8(
eval_x,
f64::from_bits(0x3fefffffffffff7d),
f64::from_bits(0x3fcb373b00569ccf),
f64::from_bits(0x3f939069c3363b81),
f64::from_bits(0x3f4c2095c90c66b3),
f64::from_bits(0x3ef6713f648413db),
f64::from_bits(0x3e947efa2f9936b4),
f64::from_bits(0x3e2486a182f49420),
f64::from_bits(0x3da213034a33de33),
);
let p_den = f_estrin_polyeval7(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa32313fea59d9e),
f64::from_bits(0x3f460594c2ec6706),
f64::from_bits(0xbedf725fb714690f),
f64::from_bits(0x3e6d9cb39b19555c),
f64::from_bits(0xbdf1900e3abcb7a6),
f64::from_bits(0x3d64a21a2ea78ef6),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
// Valid only on interval [3.5;6]
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{3.5,6},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_3p5_to_6(x: f32) -> f32 {
let dx = x as f64;
const C: f64 = 1. / 4.;
let eval_x = dx * dx * C;
let p_num = f_polyeval6(
eval_x,
f64::from_bits(0x3feffffffffd9550),
f64::from_bits(0x3fc97e18ee033fb4),
f64::from_bits(0x3f90b3199079bce1),
f64::from_bits(0x3f442c300a425372),
f64::from_bits(0x3ee7831030ae18ca),
f64::from_bits(0x3e76387d67354932),
);
let p_den = f_polyeval6(
eval_x,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfaa079c484e406a),
f64::from_bits(0x3f5452098f1556fb),
f64::from_bits(0xbef33efb4a8128ac),
f64::from_bits(0x3e865996e19448ca),
f64::from_bits(0xbe09acbb64533c3e),
);
f_fmla(p_num / p_den, eval_x, 1.) as f32
}
/**
Asymptotic expansion for I0.
Computes:
sqrt(x) * exp(-x) * I0(x) = Pn(1/x)/Qn(1/x)
hence:
I0(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{1/92.3,1/7.5},8,8},WorkingPrecision->70]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=num;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_estrin_polyeval9(
recip,
f64::from_bits(0x3fd9884533d44829),
f64::from_bits(0xc02c940f40595581),
f64::from_bits(0x406d41c495c2f762),
f64::from_bits(0xc0a10ab76dda4520),
f64::from_bits(0x40c825b1c2a48d07),
f64::from_bits(0xc0e481d606d0b748),
f64::from_bits(0x40f34759deefbd40),
f64::from_bits(0xc0ef4b7fb49fa116),
f64::from_bits(0x40c409a6f882ba00),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc041f8a9131ad229),
f64::from_bits(0x408278e56af035bb),
f64::from_bits(0xc0b5a34a108f3e35),
f64::from_bits(0x40dee6f278ee24f5),
f64::from_bits(0xc0fa95093b0c4f9f),
f64::from_bits(0x4109982b87f75651),
f64::from_bits(0xc10618cc3c91e2db),
f64::from_bits(0x40e30895aec6fc4f),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i0f() {
assert!(f_i0f(f32::NAN).is_nan());
assert_eq!(f_i0f(f32::NEG_INFINITY), f32::INFINITY);
assert_eq!(f_i0f(f32::INFINITY), f32::INFINITY);
assert_eq!(f_i0f(1.), 1.2660658);
assert_eq!(f_i0f(5.), 27.239872);
assert_eq!(f_i0f(16.), 893446.25);
assert_eq!(f_i0f(32.), 5590908000000.0);
assert_eq!(f_i0f(92.0), f32::INFINITY);
assert_eq!(f_i0f(0.), 1.0);
assert_eq!(f_i0f(28.), 109534600000.0);
assert_eq!(f_i0f(-28.), 109534600000.0);
assert_eq!(f_i0f(-16.), 893446.25);
assert_eq!(f_i0f(-32.), 5590908000000.0);
}
}

656
vendor/pxfm/src/bessel/i1.rs vendored Normal file
View File

@@ -0,0 +1,656 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::polyeval::{f_estrin_polyeval5, f_polyeval6};
/// Modified Bessel of the first kind of order 1
///
/// Max found ULP 0.5
pub fn f_i1(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// Power series of I1(x) ~ x/2 + O(x^3)
// |x| <= 2.2204460492503131e-24
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I1(x) ~ x/2 + x^3/16 + O(x^4)
const A0: f64 = 1. / 2.;
const A1: f64 = 1. / 16.;
let r0 = f_fmla(x, x * A1, A0);
return r0 * x;
}
if x.is_infinite() {
return if x.is_sign_positive() {
f64::INFINITY
} else {
f64::NEG_INFINITY
};
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if xb >= 0x40864fe69ff9fec8u64 {
// |x| >= 713.9876098185423
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb < 0x401f000000000000u64 {
// |x| <= 7.75
return f64::copysign(i1_0_to_7p75(f64::from_bits(xb)).to_f64(), sign_scale);
}
i1_asympt(f64::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.5},9,9},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i1_0_to_7p75(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 5] = [
(0x3c55555555555555, 0x3fb5555555555555),
(0x3c1253e1df138479, 0x3f7304597c4fbd4c),
(0x3bcec398b7059ee9, 0x3f287b5b01f6b9c0),
(0xbb7354e7c92c4f77, 0x3ed21de117470d10),
(0xbb1d35ac0d7923cc, 0x3e717f3714dddc04),
];
let ps_num = f_estrin_polyeval5(
eval_x.hi,
f64::from_bits(0x3e063684ca1944a4),
f64::from_bits(0x3d92ac4a0e48a9bb),
f64::from_bits(0x3d1425988b0b0aea),
f64::from_bits(0x3c899839e74ddefc),
f64::from_bits(0x3bed8747bcdd1e61),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 4] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc3e59afb81ac7ea, 0xbf9c4848e0661d70),
(0x3bd62fa3dbc1a19c, 0x3f38a9eafcd7e674),
(0x3b6f4688b9eab7d0, 0xbecbfdec51454533),
];
let ps_den = f_polyeval6(
eval_x.hi,
f64::from_bits(0x3e56e7cde9266a32),
f64::from_bits(0xbddc316dff4a672f),
f64::from_bits(0x3d5a43aaee30ebb5),
f64::from_bits(0xbcd1fb023f4f1fa0),
f64::from_bits(0x3c4089ede324209f),
f64::from_bits(0xbb9f64f47ba69604),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(eval_x, p_den, f64::from_bits(0x3ff0000000000000));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
let r = DoubleDouble::quick_mult(z, x_over_05);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3be0000000000000), // 2^-65
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
i1_0_to_7p5_hard(x)
}
// Polynomial generated by Wolfram Mathematica:
// I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
// g[z_]:=f[2 Sqrt[z]]
// {err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.5},9,9},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[cold]
#[inline(never)]
pub(crate) fn i1_0_to_7p5_hard(x: f64) -> DoubleDouble {
const ONE_OVER_4: f64 = 1. / 4.;
let eval_x = DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_mult(x, x), ONE_OVER_4);
const P: [(u64, u64); 10] = [
(0x3c55555555555555, 0x3fb5555555555555),
(0x3c1253e1df138479, 0x3f7304597c4fbd4c),
(0x3bcec398b7059ee9, 0x3f287b5b01f6b9c0),
(0xbb7354e7c92c4f77, 0x3ed21de117470d10),
(0xbb1d35ac0d7923cc, 0x3e717f3714dddc04),
(0xba928dee24678e32, 0x3e063684ca1944a4),
(0xba36aa59912fcbed, 0x3d92ac4a0e48a9bb),
(0x39bad76f18b5ad37, 0x3d1425988b0b0aea),
(0xb923a6bab6928df4, 0x3c899839e74ddefc),
(0x3864356cdfa7b321, 0x3bed8747bcdd1e61),
];
let mut p_num = DoubleDouble::mul_add(
eval_x,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[7]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[6]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[5]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 10] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc3e59afb81ac7ea, 0xbf9c4848e0661d70),
(0x3bd62fa3dbc1a19c, 0x3f38a9eafcd7e674),
(0x3b6f4688b9eab7d0, 0xbecbfdec51454533),
(0x3af0fb4a17103ef8, 0x3e56e7cde9266a32),
(0xba71755779c6d4bd, 0xbddc316dff4a672f),
(0x39cf8ed8d449e2c6, 0x3d5a43aaee30ebb5),
(0x39704e900a373874, 0xbcd1fb023f4f1fa0),
(0xb8e33e87e4bffbb1, 0x3c4089ede324209f),
(0x380fb09b3fd49d5c, 0xbb9f64f47ba69604),
];
let mut p_den = DoubleDouble::mul_add(
eval_x,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[7]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[6]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[5]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[4]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
DoubleDouble::quick_mult(z, x_over_05)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1_asympt(x: f64, sign_scale: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc73a823f28a2f5e, 0x3fd9884533d43651),
(0x3cc0d5bb78e674b3, 0xc0354325c8029263),
(0x3d20e1155aaaa283, 0x4080c09b027c46a4),
(0xbd5b90dcf81b99c1, 0xc0bfc1311090c839),
(0xbd98f2fda9e8fa1b, 0x40f3bb81bb190ae2),
(0xbdcec960752b60da, 0xc1207c0bbbc31cd9),
(0x3dd3c9a299c9c41f, 0x414253e25c4584af),
(0xbde82e7b9a3e1acc, 0xc159a656aece377c),
(0x3e0d3d30d701a8ab, 0x416398df24c74ef2),
(0xbdf57b85ab7006e2, 0xc151fd119be1702b),
(0x3dd760928f4515fd, 0xc1508327e42639bc),
(0x3dc09e71bc648589, 0x4143e4933afa621c),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb334d5a476d9ad, 0xc04a75f94c1a0c1a),
(0xbd324d58ed98bfae, 0x4094b00e60301c42),
(0x3d7c8725666c4360, 0xc0d36b9d28d45928),
(0x3d7f8457c2945822, 0x4107d6c398a174ed),
(0x3dbc655ea216594b, 0xc1339393e6776e38),
(0xbdebb5dffef78272, 0x415537198d23f6a1),
(0xbdb577f8abad883e, 0xc16c6c399dcd6949),
(0x3e14261c5362f109, 0x4173c02446576949),
(0x3dc382ededad42c5, 0xc1547dff5770f4ec),
(0xbe05c0f74d4c7956, 0xc165c88046952562),
(0xbdbf9145927aa2c7, 0x414395e46bc45d5b),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let e = i0_exp(dx * 0.5);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = DoubleDouble::quick_mult(z * r_sqrt * e, e);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return f64::copysign(r.to_f64(), sign_scale);
}
i1_asympt_hard(x, sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i1_asympt_hard(x: f64, sign_scale: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_bea7da47_28f13acc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xda979406_3df6e66f_cf31c3f5_f194b48c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -120,
mantissa: 0xd60b7b96_c958929b_cabe1d8c_3d874767_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd27aad9a_8fb38d56_46ab4510_8479306e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xe0167305_c451bd1f_d2f17b68_5c62e2ff_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0x8f6d238f_c80d8e4a_08c130f6_24e1c925_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xfe32280f_2ea99024_d9924472_92d7ac8f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xa48815ac_d265609f_da4ace94_811390b2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0x9ededfe5_833b4cc1_731efd5c_f8729c6c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xe5b43203_2784ae6a_f7458556_0a8308ea_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf5df279a_3fb4ef60_8d10adee_7dd2f47b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xbdb59963_7a757ed1_87280e0e_7f93ca2b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xc87fdea5_53177ca8_c91de5fb_3f8f78d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0x846d16a7_4663ef5d_ad42d599_5bc726b8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb3ed2055_74262d95_389f33e4_2ac3774a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xa511aa32_c18c34e4_3d029a90_a71b7a55_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0x877b771a_ad8f5fd3_5aacf5f9_f04ee9de_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x89475ecd_9c84361e_800c8a3a_c8af23bf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x837d1196_cf2723f1_23b54da8_225efe05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0x8ae3aecb_15355751_a9ee12e5_a4dd9dde_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xb0886afa_bc13f996_ab45d252_75c8f587_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0x9b37d7cd_b114b86b_7d14a389_26599aa1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0xc716bf54_09d5dd9f_bc16679b_93aaeca4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xbe0cd82e_c8af8371_ab028ed9_c7902dd2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0x875f8d91_8ef5d434_a39d00f9_2aed3d2a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8e030781_5aa4ce7f_70156b82_8b216b7c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xd4dd2687_92646fbd_5ea2d422_da64fc0b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xd6d72ab3_64b4a827_0499af0f_13a51a80_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -84,
mantissa: 0x828f4e8b_728747a9_2cebe54a_810e2681_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0x91570096_36a3fcfb_6b936d44_68dda1be_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xf082ad00_86024ed4_dd31613b_ec41e3f8_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
let f_exp = rational128_exp(x);
(z * r_sqrt * f_exp).fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fi1() {
assert_eq!(
f_i1(0.0000000000000000000000000000000006423424234121),
3.2117121170605e-34
);
assert_eq!(f_i1(7.750000000757874), 315.8524811496668);
assert_eq!(f_i1(7.482812501363189), 245.58002285881892);
assert_eq!(f_i1(-7.750000000757874), -315.8524811496668);
assert_eq!(f_i1(-7.482812501363189), -245.58002285881892);
assert!(f_i1(f64::NAN).is_nan());
assert_eq!(f_i1(f64::INFINITY), f64::INFINITY);
assert_eq!(f_i1(f64::NEG_INFINITY), f64::NEG_INFINITY);
assert_eq!(f_i1(0.01), 0.005000062500260418);
assert_eq!(f_i1(-0.01), -0.005000062500260418);
assert_eq!(f_i1(-9.01), -1040.752038018038);
assert_eq!(f_i1(9.01), 1040.752038018038);
}
}

460
vendor/pxfm/src/bessel/i1e.rs vendored Normal file
View File

@@ -0,0 +1,460 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::i1::i1_0_to_7p75;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 1
///
/// Computes exp(-|x|)*I1(x)
pub fn f_i1e(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x760af31dc4611874u64 {
// |x| <= 2.2204460492503131e-24
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I1(x)*exp(-|x|) ~ x/2 - x^2/2 + O(x^3)
return f_fmla(x, -x * 0.5, x * 0.5);
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffff;
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb < 0x401f000000000000u64 {
// |x| <= 7.75
let v_exp = i0_exp(-f64::from_bits(xb));
let vi1 = i1_0_to_7p75(f64::from_bits(xb));
let r = DoubleDouble::quick_mult(vi1, v_exp);
return f64::copysign(r.to_f64(), sign_scale);
}
i1e_asympt(f64::from_bits(xb), sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)exp(-|x|) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1e_asympt(x: f64, sign_scale: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0xbc73a823f28a2f5e, 0x3fd9884533d43651),
(0x3cc0d5bb78e674b3, 0xc0354325c8029263),
(0x3d20e1155aaaa283, 0x4080c09b027c46a4),
(0xbd5b90dcf81b99c1, 0xc0bfc1311090c839),
(0xbd98f2fda9e8fa1b, 0x40f3bb81bb190ae2),
(0xbdcec960752b60da, 0xc1207c0bbbc31cd9),
(0x3dd3c9a299c9c41f, 0x414253e25c4584af),
(0xbde82e7b9a3e1acc, 0xc159a656aece377c),
(0x3e0d3d30d701a8ab, 0x416398df24c74ef2),
(0xbdf57b85ab7006e2, 0xc151fd119be1702b),
(0x3dd760928f4515fd, 0xc1508327e42639bc),
(0x3dc09e71bc648589, 0x4143e4933afa621c),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb334d5a476d9ad, 0xc04a75f94c1a0c1a),
(0xbd324d58ed98bfae, 0x4094b00e60301c42),
(0x3d7c8725666c4360, 0xc0d36b9d28d45928),
(0x3d7f8457c2945822, 0x4107d6c398a174ed),
(0x3dbc655ea216594b, 0xc1339393e6776e38),
(0xbdebb5dffef78272, 0x415537198d23f6a1),
(0xbdb577f8abad883e, 0xc16c6c399dcd6949),
(0x3e14261c5362f109, 0x4173c02446576949),
(0x3dc382ededad42c5, 0xc1547dff5770f4ec),
(0xbe05c0f74d4c7956, 0xc165c88046952562),
(0xbdbf9145927aa2c7, 0x414395e46bc45d5b),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = z * r_sqrt;
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return f64::copysign(r.to_f64(), sign_scale);
}
i1e_asympt_hard(x, sign_scale)
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)exp(-|x|) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/713.98,1/7.75},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i1e_asympt_hard(x: f64, sign_scale: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_bea7da47_28f13acc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -124,
mantissa: 0xda979406_3df6e66f_cf31c3f5_f194b48c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -120,
mantissa: 0xd60b7b96_c958929b_cabe1d8c_3d874767_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd27aad9a_8fb38d56_46ab4510_8479306e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -108,
mantissa: 0xe0167305_c451bd1f_d2f17b68_5c62e2ff_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0x8f6d238f_c80d8e4a_08c130f6_24e1c925_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xfe32280f_2ea99024_d9924472_92d7ac8f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -96,
mantissa: 0xa48815ac_d265609f_da4ace94_811390b2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0x9ededfe5_833b4cc1_731efd5c_f8729c6c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xe5b43203_2784ae6a_f7458556_0a8308ea_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf5df279a_3fb4ef60_8d10adee_7dd2f47b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -87,
mantissa: 0xbdb59963_7a757ed1_87280e0e_7f93ca2b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xc87fdea5_53177ca8_c91de5fb_3f8f78d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -85,
mantissa: 0x846d16a7_4663ef5d_ad42d599_5bc726b8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -86,
mantissa: 0xb3ed2055_74262d95_389f33e4_2ac3774a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0xa511aa32_c18c34e4_3d029a90_a71b7a55_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -122,
mantissa: 0x877b771a_ad8f5fd3_5aacf5f9_f04ee9de_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -118,
mantissa: 0x89475ecd_9c84361e_800c8a3a_c8af23bf_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x837d1196_cf2723f1_23b54da8_225efe05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0x8ae3aecb_15355751_a9ee12e5_a4dd9dde_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0xb0886afa_bc13f996_ab45d252_75c8f587_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0x9b37d7cd_b114b86b_7d14a389_26599aa1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0xc716bf54_09d5dd9f_bc16679b_93aaeca4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xbe0cd82e_c8af8371_ab028ed9_c7902dd2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0x875f8d91_8ef5d434_a39d00f9_2aed3d2a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -87,
mantissa: 0x8e030781_5aa4ce7f_70156b82_8b216b7c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -86,
mantissa: 0xd4dd2687_92646fbd_5ea2d422_da64fc0b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0xd6d72ab3_64b4a827_0499af0f_13a51a80_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -84,
mantissa: 0x828f4e8b_728747a9_2cebe54a_810e2681_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -85,
mantissa: 0x91570096_36a3fcfb_6b936d44_68dda1be_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xf082ad00_86024ed4_dd31613b_ec41e3f8_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
(z * r_sqrt).fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fi1e() {
assert_eq!(f_i1e(f64::EPSILON), 1.1102230246251563e-16);
assert_eq!(f_i1e(7.750000000757874), 0.13605110007443239);
assert_eq!(f_i1e(7.482812501363189), 0.13818116726273896);
assert_eq!(f_i1e(-7.750000000757874), -0.13605110007443239);
assert_eq!(f_i1e(-7.482812501363189), -0.13818116726273896);
assert!(f_i1e(f64::NAN).is_nan());
assert_eq!(f_i1e(f64::INFINITY), 0.);
assert_eq!(f_i1e(f64::NEG_INFINITY), 0.);
assert_eq!(f_i1e(0.01), 0.004950311047118276);
assert_eq!(f_i1e(-0.01), -0.004950311047118276);
assert_eq!(f_i1e(-9.01), -0.12716101566063667);
assert_eq!(f_i1e(9.01), 0.12716101566063667);
assert_eq!(f_i1e(763.), 0.014435579051182581);
assert_eq!(i1e_asympt_hard(9.01, 1.), 0.12716101566063667);
}
}

224
vendor/pxfm/src/bessel/i1ef.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval9, f_polyeval10};
/// Modified exponentially scaled Bessel of the first kind of order 1
///
/// Computes exp(-|x|)*I1(x)
///
/// Max ULP 0.5
pub fn f_i1ef(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, x == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { -0. };
}
return x + f32::NAN; // |x| == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb <= 0x40f80000u32 {
// |x| <= 7.75
let core_expf = core_expf(-f32::from_bits(xb));
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I1(x)/Exp(x) ~ x/2 - x^2/2 + O(x^3)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
let half_x = 0.5 * x;
return f_fmlaf(x, -half_x, half_x);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let dx = x as f64;
let half_x = 0.5 * dx;
return f_fmla(dx, -half_x, half_x) as f32;
}
}
return i1ef_small(f32::from_bits(xb), sign_scale, core_expf) as f32;
}
i1ef_asympt(f32::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.75},6,6},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1ef_small(x: f32, sign_scale: f64, core_expf: f64) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555555),
f64::from_bits(0x3f706cdccca396c4),
f64::from_bits(0x3f23f9e12bdbba92),
f64::from_bits(0x3ec8e39208e926b2),
f64::from_bits(0x3e62e53b433c42ff),
f64::from_bits(0x3def7cb16d10fb46),
f64::from_bits(0x3d6747cd73d9d783),
);
let p_den = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa2075f77b54885),
f64::from_bits(0x3f438c6d797c29f5),
f64::from_bits(0xbeda57e2a258c6da),
f64::from_bits(0x3e677e777c569432),
f64::from_bits(0xbdea9212a96babc1),
f64::from_bits(0x3d5e183186d5d782),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two * sign_scale * core_expf
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x)*exp(-x) = Pn(1/x)/Qm(1/x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1/7.75},9,8},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1ef_asympt(x: f32, sign_scale: f64) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d43652),
f64::from_bits(0xc030686a3694d13c),
f64::from_bits(0x407344697f45c2ee),
f64::from_bits(0xc0aa037ee36a8967),
f64::from_bits(0x40d5b2eab8cf5b17),
f64::from_bits(0xc0f65addf81dbee8),
f64::from_bits(0x410afc22ec1f9b8b),
f64::from_bits(0xc110821dd0fc12b4),
f64::from_bits(0x40feb3452c93aada),
f64::from_bits(0xc0c6d04e8c5d02f3),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc04460707a9ceed4),
f64::from_bits(0x4087ac89fcf51e9b),
f64::from_bits(0xc0bf830689f31b42),
f64::from_bits(0x40e9c281c367fab2),
f64::from_bits(0xc109b59ade76eb8c),
f64::from_bits(0x411d553a9f5673c5),
f64::from_bits(0xc11f9dbe0665523b),
f64::from_bits(0x4103b62a329b60d7),
);
let z = p_num / p_den;
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i1ef() {
assert!(f_i1ef(f32::NAN).is_nan());
assert_eq!(f_i1ef(f32::INFINITY), 0.0);
assert_eq!(f_i1ef(f32::NEG_INFINITY), 0.0);
assert_eq!(f_i1ef(0.), 0.);
assert_eq!(f_i1ef(1.), 0.20791042);
assert_eq!(f_i1ef(-1.), -0.20791042);
assert_eq!(f_i1ef(9.), 0.12722498);
assert_eq!(f_i1ef(-9.), -0.12722498);
assert_eq!(f_i1ef(0.000000000543453), 2.717265e-10);
assert_eq!(f_i1ef(-0.000000000543453), -2.717265e-10);
}
}

210
vendor/pxfm/src/bessel/i1f.rs vendored Normal file
View File

@@ -0,0 +1,210 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval9, f_polyeval10};
/// Modified Bessel of the first kind of order 1
///
/// Max ULP 0.5
pub fn f_i1f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
f32::INFINITY
} else {
f32::NEG_INFINITY
};
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb > 0x42b7d001 {
// x > 91.906261
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
if xb <= 0x40f80000u32 {
// |x| <= 7.75
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for I1(x) ~ x/2 + O(x^3)
return x * 0.5;
}
return i1f_small(f32::from_bits(xb), sign_scale) as f32;
}
i1f_asympt(f32::from_bits(xb), sign_scale)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,7.75},6,6},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32, sign_scale: f64) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555555),
f64::from_bits(0x3f706cdccca396c4),
f64::from_bits(0x3f23f9e12bdbba92),
f64::from_bits(0x3ec8e39208e926b2),
f64::from_bits(0x3e62e53b433c42ff),
f64::from_bits(0x3def7cb16d10fb46),
f64::from_bits(0x3d6747cd73d9d783),
);
let p_den = f_estrin_polyeval7(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa2075f77b54885),
f64::from_bits(0x3f438c6d797c29f5),
f64::from_bits(0xbeda57e2a258c6da),
f64::from_bits(0x3e677e777c569432),
f64::from_bits(0xbdea9212a96babc1),
f64::from_bits(0x3d5e183186d5d782),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two * sign_scale
}
/**
Asymptotic expansion for I1.
Computes:
sqrt(x) * exp(-x) * I1(x) = Pn(1/x)/Qn(1/x)
hence:
I1(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[1,x]
g[z_]:=f[1/z]
{err, approx,err1}=MiniMaxApproximation[g[z],{z,{1/91.9,1/7.75},9,8},WorkingPrecision->60]
poly=Numerator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_asympt(x: f32, sign_scale: f64) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_polyeval10(
recip,
f64::from_bits(0x3fd9884533d43711),
f64::from_bits(0xc0309c047537243a),
f64::from_bits(0x4073bdb14a29bf68),
f64::from_bits(0xc0aaf9eca14d15af),
f64::from_bits(0x40d6c629318a9e42),
f64::from_bits(0xc0f7bee33088a4b0),
f64::from_bits(0x410d018cef093ee2),
f64::from_bits(0xc111f32b325d3fe4),
f64::from_bits(0x4100dddad80e0b42),
f64::from_bits(0xc0c96006c91a00e2),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc044a11d10bae889),
f64::from_bits(0x408843069497d993),
f64::from_bits(0xc0c058710de4b9b9),
f64::from_bits(0x40eb0d97f71420ae),
f64::from_bits(0xc10b55d181ef9ea1),
f64::from_bits(0x411f9413e1932a48),
f64::from_bits(0xc1213bff5bc7d2d6),
f64::from_bits(0x4105c53e92d9b9c0),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i1f() {
assert!(f_i1f(f32::NAN).is_nan());
assert!(f_i1f(f32::INFINITY).is_infinite());
assert!(f_i1f(f32::NEG_INFINITY).is_infinite());
assert_eq!(f_i1f(0.), 0.);
assert_eq!(f_i1f(1.), 0.5651591);
assert_eq!(f_i1f(-1.), -0.5651591);
assert_eq!(f_i1f(9.), 1030.9147);
assert_eq!(f_i1f(-9.), -1030.9147);
}
}

587
vendor/pxfm/src/bessel/i2.rs vendored Normal file
View File

@@ -0,0 +1,587 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
/// Modified bessel of the first kind of order 2
pub fn f_i2(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux == 0 {
// |x| == 0, |x| == inf, x == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return f64::INFINITY;
}
return x + f64::NAN; // x = NaN
}
let xb = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
if xb < 0x401f000000000000u64 {
// |x| < 7.75
if xb <= 0x3cb0000000000000u64 {
// |x| <= f64::EPSILON
// Power series of I2(x) ~ x^2/8 + O(x^4)
const R: f64 = 1. / 8.;
let x2 = x * x * R;
return x2;
}
return i2_small(f64::from_bits(xb));
}
if xb >= 0x40864feaeefb23b8 {
// x >= 713.9897136326099
return f64::INFINITY;
}
i2_asympt(f64::from_bits(xb))
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000000001,7.75},11,11},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2_small(x: f64) -> f64 {
const P: [(u64, u64); 12] = [
(0x0000000000000000, 0x3fc0000000000000),
(0x3c247833fda9de9a, 0x3f8387c6e72a1b5f),
(0xbbccaf0be91261a6, 0x3f30ba88efff56fa),
(0x3b57c911bfebe1d7, 0x3ecc62e53d061300),
(0x3af3b963f26a3d05, 0x3e5bb090327a14e1),
(0x3a898bff9d40e030, 0x3de0d29c3d37e5b5),
(0xb9f2f63c80d377db, 0x3d5a9e365f1bf6e0),
(0xb965e6d78e1c2b65, 0x3ccbf7ef0929b813),
(0xb8da83d7d40e7310, 0x3c33737520046f4d),
(0xb83f811d5aa3f36e, 0x3b91506558dab318),
(0xb78e601bf5c998c3, 0x3ae2013b3e858bd1),
(0xb6c8185c51734ed8, 0x3a20cc277a5051ba),
];
let x_sqr = DoubleDouble::from_exact_mult(x, x);
let x2 = x_sqr * x_sqr;
let x4 = x2 * x2;
let x8 = x4 * x4;
let e0 = DoubleDouble::mul_add_f64(
x_sqr,
DoubleDouble::from_bit_pair(P[1]),
f64::from_bits(0x3fc0000000000000),
);
let e1 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc0ba42af56ed76b, 0xbf7cd8e6e2b39f60),
(0x3b90697aa005e598, 0x3efa0260394e1a3d),
(0xbb0c7ccde1f63c82, 0xbe6f1766ec64e492),
(0x3a63f18409bc336f, 0x3ddb80b6b5abad98),
(0xb9e0cd49f22132fe, 0xbd42ff9b55d553da),
(0xb934bfe64905d309, 0x3ca50814fa258ebc),
(0x38a1e35c2d6860f4, 0xbc02c4f2faca2195),
(0xb7ff39e268277e4e, 0x3b5aa545a2c1f16d),
(0xb71053f58545760c, 0xbaacde4c133d42d1),
(0xb68d0c2ccab0ae5b, 0x39f5a965b92b06bc),
(0xb5dc35bda16bee7b, 0xb931375b1c9cfbc7),
];
let e0 = DoubleDouble::mul_add_f64(
x_sqr,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
x_sqr,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let p = DoubleDouble::div(p_num, p_den);
DoubleDouble::quick_mult(p, x_sqr).to_f64()
}
/**
Asymptotic expansion for I2.
I2(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/714.0,1/7.5},11,11},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2_asympt(x: f64) -> f64 {
let dx = x;
let recip = DoubleDouble::from_quick_recip(x);
const P: [(u64, u64); 12] = [
(0x3c718bb28ebc5f4e, 0x3fd9884533d43650),
(0x3c96e15a87b6e1d1, 0xc0350acc9e5cb0f9),
(0xbd20b212a79e08f5, 0x40809251af67598a),
(0xbd563b7397df3a54, 0xc0bfc09ede682c8b),
(0xbd5eb872cb057d91, 0x40f44253a9e1e1ab),
(0x3d7614735e566fc5, 0xc121cbcd96dc8765),
(0xbddc4f8df2010026, 0x4145a592e8ec74ad),
(0x3dea227617b678a7, 0xc161df96fb6a9df9),
(0x3e17c9690d906194, 0x41732c71397757f8),
(0x3e0638226ce0b938, 0xc178893fde0e6ed7),
(0xbe09d8dc4e7930ce, 0x417066abe24b31df),
(0xbde152007ee29e54, 0xc150531da3f31b16),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcd0d33e9e73b503, 0xc0496f5a09751d50),
(0x3d2f9c44a069dc4b, 0x40934427187ac370),
(0xbd69e2e5a3618381, 0xc0d19983f74fdf52),
(0x3d88c69a62ae8b44, 0x410524fcaa71e85a),
(0xbdc0345b806dd0bf, 0xc13120daf531b66b),
(0xbdd35875712fff6f, 0x4152943a4f9f1c7f),
(0xbdf8dd50e92553fd, 0xc169b83aeede08ea),
(0x3e0800ecaa77f79e, 0x41746c61554a08ce),
(0x3dd74fbc32c5f696, 0xc16ba2febd1932a3),
(0x3dc23eb2c943b539, 0x413574ae68b6b378),
(0xbd95d86c5c94cd65, 0xc104adac99eaa90c),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let mut e = i0_exp(dx * 0.5);
e = DoubleDouble::from_exact_add(e.hi, e.lo);
let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
let r = DoubleDouble::quick_mult(z * r_sqrt * e, e);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c40000000000000), // 2^-59
f64::from_bits(0x3ba0000000000000), // 2^-69
);
let up = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if up == lb {
return r.to_f64();
}
i2_asympt_hard(x)
}
/**
Asymptotic expansion for I2.
I2(x)=R(1/x)*Exp(x)/sqrt(x)
Generated in Wolfram:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/714.0,1/7.5},15,15},WorkingPrecision->120]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn i2_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xcc42299e_a1b28468_3bb16645_ba1dc793_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -123,
mantissa: 0xe202abf7_de10e93f_2a2e6a0f_af69c788_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xf70296c3_ad33bde6_866cfd01_0e846cfc_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -113,
mantissa: 0xa83df971_736c4e6c_1a35479b_ad6d9172_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9baa2015_9c5ca461_0aff0b62_54a70fdb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -106,
mantissa: 0xc70af95d_f95d14ad_1094ea1b_e46b2d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -103,
mantissa: 0xa838fb48_e79fb706_642da604_6a73b4f8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0x8fe29f37_02b1e876_39e88664_1c8b3b5d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -100,
mantissa: 0xc8e9a474_0a03f93a_16d2e7a9_627eba4e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -95,
mantissa: 0x8807d1f6_6d646a08_8c7e8900_12d6a5ed_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0xe5c25026_97518024_36878256_fd81c08f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -91,
mantissa: 0xeaa075f0_f5151bed_95ec612f_ab9834a7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0x9b267222_82d5c666_348d7d1d_0fedfba4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -88,
mantissa: 0x81b45c4c_3e828396_1d5bdede_869c3b84_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xf4495d43_4bc8dba6_42bdb5d6_c8ba2c9c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -90,
mantissa: 0xc9b29546_0c226270_bb428035_587b6d6a_u128,
},
];
static Q: [DyadicFloat128; 16] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -121,
mantissa: 0x89e18bae_ca9629a1_26927ba2_fbdd66ab_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x92a90fc2_e905f634_4946e8a0_dd8e3874_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -112,
mantissa: 0xc1742696_d29e3846_3e183737_29db8b68_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0xabf61cc0_236a0e90_2572113d_fa339591_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -105,
mantissa: 0xcff0fe90_dac1b08e_9a5740ae_b2984fc1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -102,
mantissa: 0x9ff36729_e407c538_cfcea3a7_63f39043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -101,
mantissa: 0xc86ff6a3_9b803a31_d385e9ea_83f9d751_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -98,
mantissa: 0xb4a125b1_6cab70f3_0f314558_708843df_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -94,
mantissa: 0x9670fd33_f83bcaa7_85cf2d82_c0bf8cd5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -92,
mantissa: 0xd70b4ea5_32fedb9d_78a3c047_05e650f4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -90,
mantissa: 0xb9c7904c_3f97b633_c2c0ad9b_ad573ede_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -89,
mantissa: 0xc2023c21_5155e9fe_6fb17bb2_c865becd_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -89,
mantissa: 0xd9400a5e_27c58803_22948cf3_6154ac49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -90,
mantissa: 0x87aa272d_6a9700b4_449a9db8_1a93b0ee_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -93,
mantissa: 0xd1a86655_5b259611_dfc7affc_6ffb0e20_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let mut p_num = P[15];
for i in (0..15).rev() {
p_num = recip * p_num + P[i];
}
let mut p_den = Q[15];
for i in (0..15).rev() {
p_den = recip * p_den + Q[i];
}
let z = p_num * p_den.reciprocal();
let r_sqrt = bessel_rsqrt_hard(x, recip);
let f_exp = rational128_exp(x);
(z * r_sqrt * f_exp).fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i2() {
assert_eq!(f_i2(7.750000000757874), 257.0034362785801);
assert_eq!(f_i2(7.482812501363189), 198.26765887136534);
assert_eq!(f_i2(-7.750000000757874), 257.0034362785801);
assert_eq!(f_i2(-7.482812501363189), 198.26765887136534);
assert!(f_i2(f64::NAN).is_nan());
assert_eq!(f_i2(f64::INFINITY), f64::INFINITY);
assert_eq!(f_i2(f64::NEG_INFINITY), f64::INFINITY);
assert_eq!(f_i2(0.01), 1.2500104166992188e-5);
assert_eq!(f_i2(-0.01), 1.2500104166992188e-5);
assert_eq!(f_i2(-9.01), 872.9250699638584);
assert_eq!(f_i2(9.01), 872.9250699638584);
}
}

194
vendor/pxfm/src/bessel/i2f.rs vendored Normal file
View File

@@ -0,0 +1,194 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::exponents::core_expf;
use crate::polyeval::{f_estrin_polyeval8, f_estrin_polyeval9};
/// Modified Bessel of the first kind of order 2
///
/// ULP 0.5
pub fn f_i2f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return 0.;
}
if x.is_infinite() {
return f32::INFINITY;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits() & 0x7fff_ffff;
if xb >= 0x42b7d875u32 {
// |x| >= 91.92277 it's infinity
return f32::INFINITY;
}
if xb <= 0x40f80000u32 {
// |x| <= 7.75
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
const R: f64 = 1. / 8.;
return (dx * dx * R) as f32;
}
return i2f_small(f32::from_bits(xb));
}
i2f_asympt(f32::from_bits(xb))
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000000001,7.75},8,7},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_small(x: f32) -> f32 {
let dx = x as f64;
let x_sqr = dx * dx;
let p_num = f_estrin_polyeval9(
x_sqr,
f64::from_bits(0x3fc0000000000000),
f64::from_bits(0x3f831469a38d72c7),
f64::from_bits(0x3f2f453dd3dd98f4),
f64::from_bits(0x3ec8af52ee8fce9b),
f64::from_bits(0x3e5589f2cb4e0ec9),
f64::from_bits(0x3dd60fa268a4206d),
f64::from_bits(0x3d4ab3091ee18d6b),
f64::from_bits(0x3cb1efec43b15186),
f64::from_bits(0x3c050992c6e9e63f),
);
let p_den = f_estrin_polyeval8(
x_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf82075d8e3f1476),
f64::from_bits(0x3f03ef86564a284b),
f64::from_bits(0xbe7c498fab4a57d8),
f64::from_bits(0x3dec162ca0f68486),
f64::from_bits(0xbd53bb6398461540),
f64::from_bits(0x3cb265215261e64a),
f64::from_bits(0xbc01cf52cc350e81),
);
let p = p_num / p_den;
(p * x_sqr) as f32
}
/**
Asymptotic expansion for I2.
Computes:
sqrt(x) * exp(-x) * I2(x) = Pn(1/x)/Qn(1/x)
hence:
I2(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)
Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[2,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{1/92.3,1/7.5},8,8},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let p_num = f_estrin_polyeval9(
recip,
f64::from_bits(0x3fd9884533d45f46),
f64::from_bits(0xc02b979526807e1e),
f64::from_bits(0x406b1dd3e795bbed),
f64::from_bits(0xc09e43629031ec91),
f64::from_bits(0x40c48c03a39aec1d),
f64::from_bits(0xc0e0f022ccb8807a),
f64::from_bits(0x40f0302eeb22a776),
f64::from_bits(0xc0f02b01549d38b8),
f64::from_bits(0x40dad4e70f2bc264),
);
let p_den = f_estrin_polyeval9(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xc0405a71a88b191c),
f64::from_bits(0x407e19f7d247d098),
f64::from_bits(0xc0aeaac6e0ca17fe),
f64::from_bits(0x40d2301702f40a98),
f64::from_bits(0xc0e7e6c6c01841b3),
f64::from_bits(0x40ed67317e9e46cc),
f64::from_bits(0xc0d13786aa1ef416),
f64::from_bits(0xc0a6c9cfe579ae22),
);
let z = p_num / p_den;
let e = core_expf(x);
let r_sqrt = j1f_rsqrt(dx);
(z * r_sqrt * e) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_i2f() {
assert_eq!(f_i2f(0.), 0.);
assert_eq!(f_i2f(f32::INFINITY), f32::INFINITY);
assert_eq!(f_i2f(f32::NEG_INFINITY), f32::INFINITY);
assert_eq!(f_i2f(1.), 0.13574767);
assert_eq!(f_i2f(-1.), 0.13574767);
assert_eq!(f_i2f(9.432), 1314.6553);
assert_eq!(f_i2f(-9.432), 1314.6553);
}
}

626
vendor/pxfm/src/bessel/j0.rs vendored Normal file
View File

@@ -0,0 +1,626 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::alpha0::{
bessel_0_asympt_alpha, bessel_0_asympt_alpha_fast, bessel_0_asympt_alpha_hard,
};
use crate::bessel::beta0::{
bessel_0_asympt_beta, bessel_0_asympt_beta_fast, bessel_0_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j0_coeffs_remez::J0_COEFFS_REMEZ;
use crate::bessel::j0_coeffs_taylor::J0_COEFFS_TAYLOR;
use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval19};
use crate::sin_helper::{cos_dd_small, cos_dd_small_fast, cos_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
/// Bessel of the first kind of order 0
pub fn f_j0(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, |x| == NaN
if ux <= 0x77723ef88126da90u64 {
// |x| <= 0.00000000000000000000532
return 1.;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// J0(x) ~ 1-x^2/4+O[x]^4
let half_x = 0.5 * x; // exact.
return f_fmla(half_x, -half_x, 1.);
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ax = f64::from_bits(x_abs);
if x_abs <= 0x4052b33333333333u64 {
// |x| <= 74.8
if x_abs <= 0x3ff199999999999au64 {
// |x| <= 1.1
return j0_maclaurin_series_fast(ax);
}
return j0_small_argument_fast(ax);
}
j0_asympt_fast(ax)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
real_i = 0
for i in range(0, 24, 2):
print_double_double("", DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[inline]
pub(crate) fn j0_maclaurin_series_fast(x: f64) -> f64 {
const C: [u64; 12] = [
0x3ff0000000000000,
0xbfd0000000000000,
0x3f90000000000000,
0xbf3c71c71c71c71c,
0x3edc71c71c71c71c,
0xbe723456789abcdf,
0x3e002e85c0898b71,
0xbd8522a43f65486a,
0x3d0522a43f65486a,
0xbc80b313289be0b9,
0x3bf5601885e63e5d,
0xbb669ca9cf3b7f54,
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval10(
dx2.hi,
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(dx2, p, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(dx2, z, f64::from_bits(C[0]));
// squaring error (2^-56) + poly error 2^-75
let err = f_fmla(
dx2.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3b40000000000000), // 2^-75
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_maclaurin_series(x)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const J0_MACLAURIN_SERIES: [(u64, u64); 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
real_i = 0
for i in range(0, 24, 2):
print_double_double("", DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[cold]
pub(crate) fn j0_maclaurin_series(x: f64) -> f64 {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0xbfd0000000000000),
(0x0000000000000000, 0x3f90000000000000),
(0xbbdc71c71c71c71c, 0xbf3c71c71c71c71c),
(0x3b7c71c71c71c71c, 0x3edc71c71c71c71c),
(0xbab23456789abcdf, 0xbe723456789abcdf),
(0xba8b6edec0692e65, 0x3e002e85c0898b71),
(0x3a2604db055bd075, 0xbd8522a43f65486a),
(0xb9a604db055bd075, 0x3d0522a43f65486a),
(0x3928824198c6f6e1, 0xbc80b313289be0b9),
(0xb869b0b430eb27b8, 0x3bf5601885e63e5d),
(0x380ee6b4638f3a25, 0xbb669ca9cf3b7f54),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
dx2,
DoubleDouble::from_bit_pair(C[0]),
DoubleDouble::from_bit_pair(C[1]),
DoubleDouble::from_bit_pair(C[2]),
DoubleDouble::from_bit_pair(C[3]),
DoubleDouble::from_bit_pair(C[4]),
DoubleDouble::from_bit_pair(C[5]),
DoubleDouble::from_bit_pair(C[6]),
DoubleDouble::from_bit_pair(C[7]),
DoubleDouble::from_bit_pair(C[8]),
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[10]),
DoubleDouble::from_bit_pair(C[11]),
);
let r = DoubleDouble::from_exact_add(p.hi, p.lo);
const ERR: f64 = f64::from_bits(0x39d0000000000000); // 2^-98
let ub = r.hi + (r.lo + ERR);
let lb = r.hi + (r.lo - ERR);
if ub == lb {
return r.to_f64();
}
j0_maclaurin_series_hard(x)
}
/**
Generated by SageMath:
```python
mp.prec = 180
def print_expansion_at_0():
print(f"const P: [DyadicFloat128; 12] = [")
from mpmath import mp, j0, taylor
poly = taylor(lambda val: j0(val), 0, 24)
# print(poly)
real_i = 0
for i in range(0, 24, 2):
print_dyadic(DD(poly[i]))
real_i = real_i + 1
print("];")
print(poly)
print_expansion_at_0()
```
**/
#[cold]
#[inline(never)]
pub(crate) fn j0_maclaurin_series_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -133,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -139,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -145,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -151,
mantissa: 0x91a2b3c4_d5e6f809_1a2b3c4d_5e6f8092_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -158,
mantissa: 0x81742e04_4c5b8724_8909fcb6_8cd4e410_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -166,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -174,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -182,
mantissa: 0x85989944_df05c4ef_b7cce721_23e1b391_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -191,
mantissa: 0xab00c42f_31f2e799_3d2f3c53_6120e5d8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -200,
mantissa: 0xb4e54e79_dbfa9c23_29738e18_bb602809_u128,
},
];
let dx = DyadicFloat128::new_from_f64(x);
let x2 = dx * dx;
let mut p = P[11];
for i in (0..11).rev() {
p = x2 * p + P[i];
}
p.fast_as_f64()
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn j0_small_argument_fast(x: f64) -> f64 {
// let avg_step = 74.6145 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299043751549631;
let fx = x * INV_STEP;
const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x).abs();
let dist1 = (found_zero1.hi - x).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j0_maclaurin_series_fast(x);
}
let is_too_close_too_zero = dist.abs() < 1e-3;
let c = if is_too_close_too_zero {
&J0_COEFFS_TAYLOR[idx - 1]
} else {
&J0_COEFFS_REMEZ[idx - 1]
};
let r = DoubleDouble::full_add_f64(-found_zero, x.abs());
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J0_ZEROS_VALUE[idx]);
}
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_small_argument_dd(r, c)
}
#[cold]
fn j0_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3a90000000000000), // 2^-86
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j0_small_argument_hard(r, c);
}
p.to_f64()
}
#[cold]
#[inline(never)]
fn j0_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64()
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[inline]
pub(crate) fn j0_asympt_fast(x: f64) -> f64 {
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_0_asympt_alpha_fast(recip);
let beta = bessel_0_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), // 2^-65
f64::from_bits(0x3a60000000000000), // 2^-89
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt(x, recip, r_sqrt, angle)
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
pub(crate) fn j0_asympt(
x: f64,
recip: DoubleDouble,
r_sqrt: DoubleDouble,
angle: DoubleDouble,
) -> f64 {
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_0_asympt_alpha(recip);
let beta = bessel_0_asympt_beta(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), // 2^-66
f64::from_bits(0x39e0000000000000), // 2^-97
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt_hard(x)
}
/*
Evaluates:
J0 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[cold]
#[inline(never)]
pub(crate) fn j0_asympt_hard(x: f64) -> f64 {
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_0_asympt_alpha_hard(recip);
let beta = bessel_0_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = cos_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j0() {
assert_eq!(f_j0(f64::EPSILON), 1.0);
assert_eq!(f_j0(-0.000000000000000000000532), 1.0);
assert_eq!(f_j0(0.0000000000000000000532), 1.0);
assert_eq!(f_j0(-2.000976555054876), 0.22332760641907712);
assert_eq!(f_j0(-2.3369499004222215E+304), -3.3630754230844632e-155);
assert_eq!(
f_j0(f64::from_bits(0xd71a31ffe2ff7e9f)),
f64::from_bits(0xb2e58532f95056ff)
);
assert_eq!(f_j0(6.1795701510782757E+307), 6.075192922402001e-155);
assert_eq!(f_j0(6.1795701510782757E+301), 4.118334155030934e-152);
assert_eq!(f_j0(6.1795701510782757E+157), 9.5371668900364e-80);
assert_eq!(f_j0(79.), -0.08501719554953485);
// Without FMA 2.703816901253004e-16
#[cfg(any(
all(target_arch = "x86_64", target_feature = "fma"),
target_arch = "aarch64"
))]
assert_eq!(f_j0(93.463718781944774171190), 2.7038169012530046e-16);
assert_eq!(f_j0(99.746819858680596470279979), -8.419106281522749e-17);
assert_eq!(f_j0(f64::INFINITY), 0.);
assert_eq!(f_j0(f64::NEG_INFINITY), 0.);
assert!(f_j0(f64::NAN).is_nan());
}
}

1347
vendor/pxfm/src/bessel/j0_coeffs_remez.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1292
vendor/pxfm/src/bessel/j0_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

420
vendor/pxfm/src/bessel/j0f.rs vendored Normal file
View File

@@ -0,0 +1,420 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE, J0F_COEFFS};
use crate::bessel::trigo_bessel::cos_small;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval14};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the first kind of order 0
///
/// Max ulp 0.5
pub fn f_j0f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 {
// |x| == 0, |x| == inf, |x| == NaN, |x| <= f32::EPSILON
if ux == 0 {
// |x| == 0
return f64::from_bits(0x3ff0000000000000) as f32;
}
if x.is_infinite() {
return 0.;
}
if ux <= 0x6800_0000u32 {
// |x| < f32::EPSILON
// taylor series for J0(x) ~ 1 - x^2/4 + O(x^4)
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
return f_fmlaf(x, -x * 0.25, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::common::f_fmla;
let dx = x as f64;
return f_fmla(dx, -dx * 0.25, 1.) as f32;
}
}
return x + f32::NAN; // x == NaN
}
let x_abs = x.to_bits() & 0x7fff_ffff;
if x_abs <= 0x4295999au32 {
// |x| <= 74.8
if x_abs <= 0x3e800000u32 {
// |x| <= 0.25
return j0f_maclaurin_series(x);
}
if x_abs == 0x401a42e8u32 {
return f32::from_bits(0xbb3b2f69u32);
}
return small_argument_path(x);
}
// Exceptions
if x_abs == 0x65ce46e4 {
return f32::from_bits(0x1eed85c4);
} else if x_abs == 0x7e3dcda0 {
return f32::from_bits(0x92b81111);
} else if x_abs == 0x76d84625 {
return f32::from_bits(0x95d7a68b);
} else if x_abs == 0x6bf68a7b {
return f32::from_bits(0x1dc70a09);
} else if x_abs == 0x7842c820 {
return f32::from_bits(0x17ebf13e);
} else if x_abs == 0x4ba332e9 {
return f32::from_bits(0x27250206);
}
j0f_asympt(f32::from_bits(x_abs))
}
/**
Generated by SageMath:
```python
# Maclaurin series for j0
def print_expansion_at_0_f():
print(f"pub(crate) const J0_MACLAURIN_SERIES: [u64; 9] = [")
from mpmath import mp, j0, taylor
mp.prec = 60
poly = taylor(lambda val: j0(val), 0, 18)
z = 0
for i in range(0, 18, 2):
print(f"{double_to_hex(poly[i])},")
print("];")
print(f"poly {poly}")
print_expansion_at_0_f()
```
**/
#[inline]
fn j0f_maclaurin_series(x: f32) -> f32 {
pub(crate) const C: [u64; 9] = [
0x3ff0000000000000,
0xbfd0000000000000,
0x3f90000000000000,
0xbf3c71c71c71c71c,
0x3edc71c71c71c71c,
0xbe723456789abcdf,
0x3e002e85c0898b71,
0xbd8522a43f65486a,
0x3d0522a43f65486a,
];
let dx = x as f64;
f_polyeval9(
dx * dx,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn small_argument_path(x: f32) -> f32 {
let x_abs = f32::from_bits(x.to_bits() & 0x7fff_ffff) as f64;
// let avg_step = 74.6145 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299043751549631;
let fx = x_abs * INV_STEP;
const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j0f_maclaurin_series(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J0_ZEROS_VALUE[idx]) as f32;
}
let c = &J0F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
p as f32
}
#[inline]
pub(crate) fn j1f_rsqrt(x: f64) -> f64 {
(1. / x) * x.sqrt()
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - PI/4 - alpha(x))
*/
#[inline]
fn j0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j0f_asympt_alpha(dx);
let beta = j0f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = cos_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
**/
#[inline]
pub(crate) fn j0f_asympt_alpha(x: f64) -> f64 {
const C: [u64; 12] = [
0x3fc0000000000000,
0xbfb0aaaaaaaaaaab,
0x3fcad33333333333,
0xbffa358492492492,
0x403779a1f8e38e39,
0xc080bd1fc8b1745d,
0x40d16b51e66c789e,
0xc128ecc3af33ab37,
0x418779dae2b8512f,
0xc1ec296336955c7f,
0x4254f5ee683b6432,
0xc2c2f51eced6693f,
];
let recip = 1. / x;
let x2 = recip * recip;
let p = f_polyeval12(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
p * recip
}
/**
Beta series
Generated by SageMath:
```python
#generate b series
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x', default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(0, x, 50)
Q = Qn_asymptotic(0, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
#see the series
print(b_series)
```
**/
#[inline]
pub(crate) fn j0f_asympt_beta(x: f64) -> f64 {
const C: [u64; 10] = [
0x3ff0000000000000,
0xbfb0000000000000,
0x3fba800000000000,
0xbfe15f0000000000,
0x4017651180000000,
0xc05ab8c13b800000,
0x40a730492f262000,
0xc0fc73a7acd696f0,
0x41577458dd9fce68,
0xc1b903ab9b27e18f,
];
let recip = 1. / x;
let x2 = recip * recip;
f_polyeval10(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
)
}
#[cfg(test)]
mod tests {
use crate::f_j0f;
#[test]
fn test_j0f() {
println!("0x{:8x}", f32::EPSILON.to_bits().wrapping_shl(1));
assert_eq!(f_j0f(-3123.), 0.012329336);
assert_eq!(f_j0f(-0.1), 0.99750155);
assert_eq!(f_j0f(-15.1), -0.03456193);
assert_eq!(f_j0f(3123.), 0.012329336);
assert_eq!(f_j0f(0.1), 0.99750155);
assert_eq!(f_j0f(15.1), -0.03456193);
assert_eq!(f_j0f(f32::INFINITY), 0.);
assert_eq!(f_j0f(f32::NEG_INFINITY), 0.);
assert!(f_j0f(f32::NAN).is_nan());
}
}

1050
vendor/pxfm/src/bessel/j0f_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

697
vendor/pxfm/src/bessel/j1.rs vendored Normal file
View File

@@ -0,0 +1,697 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(clippy::excessive_precision)]
use crate::bessel::alpha1::{
bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard,
};
use crate::bessel::beta1::{
bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval8, f_polyeval9, f_polyeval12, f_polyeval19};
use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
/// Bessel of the first kind of order 1
///
/// Note about accuracy:
/// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly
/// in the same precision, since any nearest representable number have ULP > 0.5,
/// for example `J1(0.000000000000000000000000000000000000023509886)` in single precision
/// have 0.7 ULP for any number with extended precision that would be represented in f32
/// Same applies to J1(4.4501477170144018E-309) in double precision and some others subnormal numbers
pub fn f_j1(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x72338c9356bb0314u64 {
// |x| <= 0.000000000000000000000000000000001241
// J1(x) ~ x/2+O[x]^3
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
// J1(x) ~ x/2-x^3/16+O[x]^5
let quad_part_x = x * 0.125; // exact. x / 8
return f_fmla(quad_part_x, -quad_part_x, 0.5) * x;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x == NaN
}
let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax < 0x4052a6784230fcf8u64 {
// |x| < 74.60109
if ax < 0x3feccccccccccccd {
// |x| < 0.9
return j1_maclaurin_series_fast(x);
}
return j1_small_argument_fast(x);
}
j1_asympt_fast(x)
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn j1_asympt_fast(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_1_asympt_alpha_fast(recip);
let beta = bessel_1_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), // 2^-65
f64::from_bits(0x3a60000000000000), // 2^-89
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt(origin_x, recip, r_sqrt, angle)
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
fn j1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_1_asympt_alpha(recip);
let beta = bessel_1_asympt_beta(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bc0000000000000), // 2^-67
f64::from_bits(0x39c0000000000000), // 2^-99
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt_hard(origin_x)
}
/**
Generated in Sollya:
```text
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1_at_zero_fast.sollya
**/
#[inline]
pub(crate) fn j1_maclaurin_series_fast(x: f64) -> f64 {
const C0: DoubleDouble = DoubleDouble::from_bit_pair((0x3b30e9e087200000, 0x3fe0000000000000));
let x2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
x2.hi,
f64::from_bits(0xbfb0000000000000),
f64::from_bits(0x3f65555555555555),
f64::from_bits(0xbf0c71c71c71c45e),
f64::from_bits(0x3ea6c16c16b82b02),
f64::from_bits(0xbe3845c87ec0cbef),
f64::from_bits(0x3dc27e0313e8534c),
f64::from_bits(0xbd4443dd2d0305d0),
f64::from_bits(0xbd0985a435fe9aa1),
f64::from_bits(0x3d10c82d92c46d30),
f64::from_bits(0xbd0aa3684321f219),
f64::from_bits(0x3cf8351f29ac345a),
f64::from_bits(0xbcd333fe6cd52c9f),
);
let mut z = DoubleDouble::mul_f64_add(x2, p, C0);
z = DoubleDouble::quick_mult_f64(z, x);
// squaring error (2^-56) + poly error 2^-75
let err = f_fmla(
x2.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3b40000000000000), // 2^-75
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j1_maclaurin_series(x)
}
/**
Generated in Sollya:
```text
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1_at_zero.sollya
**/
pub(crate) fn j1_maclaurin_series(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const CL: [(u64, u64); 5] = [
(0xb930000000000000, 0x3fe0000000000000),
(0x39c8e80000000000, 0xbfb0000000000000),
(0x3c05555554f3add7, 0x3f65555555555555),
(0xbbac71c4eb0f8c94, 0xbf0c71c71c71c71c),
(0xbb3f56b7a43206d4, 0x3ea6c16c16c16c17),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval8(
dx2.hi,
f64::from_bits(0xbe3845c8a0ce5129),
f64::from_bits(0x3dc27e4fb7789ea2),
f64::from_bits(0xbd4522a43f633af1),
f64::from_bits(0x3cc2c97589d53f97),
f64::from_bits(0xbc3ab8151dca7912),
f64::from_bits(0x3baf08732286d1d4),
f64::from_bits(0xbb10ac65637413f4),
f64::from_bits(0xbae4d8336e4f779c),
);
let mut p_e = DoubleDouble::mul_f64_add(dx2, p, DoubleDouble::from_bit_pair(CL[4]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[3]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[2]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[1]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[0]));
let p = DoubleDouble::quick_mult_f64(p_e, x);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), // 2^-66
f64::from_bits(0x3a00000000000000), // 2^-95
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_maclaurin_series_hard(origin_x);
}
p.to_f64() * sign_scale
}
/**
Taylor expansion at 0
Generated by SageMath:
```python
def print_expansion_at_0():
print(f"static C: [DyadicFloat128; 13] = ")
from mpmath import mp, j1, taylor, expm1
poly = taylor(lambda val: j1(val), 0, 26)
real_i = 0
print("[")
for i in range(1, len(poly), 2):
print_dyadic(poly[i])
real_i = real_i + 1
print("],")
print("];")
mp.prec = 180
print_expansion_at_0()
```
**/
#[cold]
#[inline(never)]
fn j1_maclaurin_series_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
static C: [DyadicFloat128; 13] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -136,
mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -142,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -148,
mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -155,
mantissa: 0xc22e4506_72894ab6_cd8efb11_d33f5618_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -162,
mantissa: 0x93f27dbb_c4fae397_780b69f5_333c725b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -170,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -178,
mantissa: 0x964bac6d_7ae67d8d_aec68405_485dea03_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -187,
mantissa: 0xd5c0f53a_fe6fa17f_8c7b0b68_39691f4e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -196,
mantissa: 0xf8bb4be7_8e7896b0_58fee362_01a4370c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -205,
mantissa: 0xf131bdf7_cff8d02e_e1ef6820_f9d58ab6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -214,
mantissa: 0xc5e72c48_0d1aec75_3caa2e0d_edd008ca_u128,
},
];
let rx = DyadicFloat128::new_from_f64(x);
let dx = rx * rx;
let mut p = C[12];
for i in (0..12).rev() {
p = dx * p + C[i];
}
(p * rx).fast_as_f64() * sign_scale
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn j1_small_argument_fast(x: f64) -> f64 {
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j1_maclaurin_series_fast(x);
}
let r = DoubleDouble::full_add_f64(-found_zero, x_abs);
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale;
}
let is_zero_too_close = dist.abs() < 1e-3;
let c = if is_zero_too_close {
&J1_COEFFS_TAYLOR[idx - 1]
} else {
&J1_COEFFS[idx - 1]
};
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64() * sign_scale;
}
j1_small_argument_dd(sign_scale, r, c)
}
fn j1_small_argument_dd(sign_scale: f64, r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3a00000000000000), // 2^-95
);
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_small_argument_path_hard(sign_scale, r, c);
}
p.to_f64() * sign_scale
}
#[cold]
#[inline(never)]
fn j1_small_argument_path_hard(sign_scale: f64, r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64() * sign_scale
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
This method is required for situations where x*x or 1/(x*x) will overflow
*/
#[cold]
#[inline(never)]
fn j1_asympt_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_1_asympt_alpha_hard(recip);
let beta = bessel_1_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j1() {
assert_eq!(f_j1(0.000000000000000000000000000000001241), 6.205e-34);
assert_eq!(f_j1(0.0000000000000000000000000000004321), 2.1605e-31);
assert_eq!(f_j1(0.00000000000000000004321), 2.1605e-20);
assert_eq!(f_j1(73.81695991658546), -0.06531447184607607);
assert_eq!(f_j1(0.01), 0.004999937500260416);
assert_eq!(f_j1(0.9), 0.4059495460788057);
assert_eq!(
f_j1(162605674999778540000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008686943178258183
);
assert_eq!(f_j1(3.831705970207517), -1.8501090915423025e-15);
assert_eq!(f_j1(-3.831705970207517), 1.8501090915423025e-15);
assert_eq!(f_j1(-6.1795701510782757E+307), 8.130935041593236e-155);
assert_eq!(
f_j1(0.000000000000000000000000000000000000008827127),
0.0000000000000000000000000000000000000044135635
);
assert_eq!(
f_j1(-0.000000000000000000000000000000000000008827127),
-0.0000000000000000000000000000000000000044135635
);
assert_eq!(f_j1(5.4), -0.3453447907795863);
assert_eq!(
f_j1(77.743162408196766932633181568235159),
0.09049267898021947
);
assert_eq!(
f_j1(84.027189586293545175976760219782591),
0.0870430264022591
);
assert_eq!(f_j1(f64::NEG_INFINITY), 0.0);
assert_eq!(f_j1(f64::INFINITY), 0.0);
assert!(f_j1(f64::NAN).is_nan());
}
}

1543
vendor/pxfm/src/bessel/j1_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1282
vendor/pxfm/src/bessel/j1_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

422
vendor/pxfm/src/bessel/j1f.rs vendored Normal file
View File

@@ -0,0 +1,422 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1f_coeffs::J1F_COEFFS;
use crate::bessel::trigo_bessel::sin_small;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval7, f_polyeval10, f_polyeval12, f_polyeval14};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the first kind of order 1
///
/// Max ULP 0.5
///
/// Note about accuracy:
/// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly
/// in the same precision, since any nearest representable number have ULP > 0.5.
/// For example `J1(0.000000000000000000000000000000000000023509886)` in single precision
/// have an error at least 0.72 ULP for any number with extended precision,
/// that would be represented in f32.
pub fn f_j1f(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN
if ux == 0 {
// |x| == 0
return x;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if ax < 0x429533c2u32 {
// |x| <= 74.60109
if ax < 0x3e800000u32 {
// |x| <= 0.25
if ax <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for J1(x) ~ x/2 + O(x^3)
return x * 0.5;
}
return poly_near_zero(x);
}
return small_argument_path(x);
}
// Exceptional cases:
if ax == 0x6ef9be45 {
return if x.is_sign_negative() {
f32::from_bits(0x187d8a8f)
} else {
-f32::from_bits(0x187d8a8f)
};
} else if ax == 0x7f0e5a38 {
return if x.is_sign_negative() {
-f32::from_bits(0x131f680b)
} else {
f32::from_bits(0x131f680b)
};
}
j1f_asympt(x) as f32
}
#[inline]
fn j1f_rsqrt(x: f64) -> f64 {
(1. / x) * x.sqrt()
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn j1f_asympt(x: f32) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = f32::from_bits(x.to_bits() & 0x7fff_ffff);
let dx = x as f64;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_small(r0);
let z0 = beta * m_sin;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
scale * z0 * sign_scale
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied.
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
R_series = (-Q/P)
# alpha is atan(R_series) so we're doing Taylor series atan expansion on R_series
arctan_series_Z = sum([QQ(-1)**k * x**(QQ(2)*k+1) / RealField(700)(RealField(700)(2)*k+1) for k in range(25)])
alpha_series = arctan_series_Z(R_series)
# see the series
print(alpha_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn j1f_asympt_alpha(x: f64) -> f64 {
const C: [u64; 12] = [
0xbfd8000000000000,
0x3fc5000000000000,
0xbfd7bccccccccccd,
0x4002f486db6db6db,
0xc03e9fbf40000000,
0x4084997b55945d17,
0xc0d4a914195269d9,
0x412cd1b53816aec1,
0xc18aa4095d419351,
0x41ef809305f11b9d,
0xc2572e6809ed618b,
0x42c4c5b6057839f9,
];
let recip = 1. / x;
let x2 = recip * recip;
let p = f_polyeval12(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
p * recip
}
/**
Note expansion generation below: this is negative series expressed in Sage as positive,
so before any real evaluation `x=1/x` should be applied
Generated by SageMath:
```python
def binomial_like(n, m):
prod = QQ(1)
z = QQ(4)*(n**2)
for k in range(1,m + 1):
prod *= (z - (2*k - 1)**2)
return prod / (QQ(2)**(2*m) * (ZZ(m).factorial()))
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
def Pn_asymptotic(n, y, terms=10):
# now y = 1/x
return sum( (-1)**m * binomial_like(n, 2*m) / (QQ(2)**(2*m)) * y**(QQ(2)*m) for m in range(terms) )
def Qn_asymptotic(n, y, terms=10):
return sum( (-1)**m * binomial_like(n, 2*m + 1) / (QQ(2)**(2*m + 1)) * y**(QQ(2)*m + 1) for m in range(terms) )
P = Pn_asymptotic(1, x, 50)
Q = Qn_asymptotic(1, x, 50)
def sqrt_series(s):
val = S.valuation()
lc = S[val] # Leading coefficient
b = lc.sqrt() * x**(val // 2)
for _ in range(5):
b = (b + S / b) / 2
b = b
return b
S = (P**2 + Q**2).truncate(50)
b_series = sqrt_series(S).truncate(30)
# see the beta series
print(b_series)
```
See notes/bessel_asympt.ipynb for generation
**/
#[inline]
pub(crate) fn j1f_asympt_beta(x: f64) -> f64 {
const C: [u64; 10] = [
0x3ff0000000000000,
0x3fc8000000000000,
0xbfc8c00000000000,
0x3fe9c50000000000,
0xc01ef5b680000000,
0x40609860dd400000,
0xc0abae9b7a06e000,
0x41008711d41c1428,
0xc15ab70164c8be6e,
0x41bc1055e24f297f,
];
let recip = 1. / x;
let x2 = recip * recip;
f_polyeval10(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
)
}
/**
Generated in Sollya:
```python
pretty = proc(u) {
return ~(floor(u*1000)/1000);
};
bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x)/x;
d = [0, 0.921];
w = 1;
pf = fpminimax(f, [|0,2,4,6,8,10,12|], [|D...|], d, absolute, floating);
w = 1;
or_f = bessel_j1(x);
pf1 = pf * x;
err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
print ("relative error:", pretty(err_p));
for i from 0 to degree(pf) by 2 do {
print("'", coeff(pf, i), "',");
};
```
See ./notes/bessel_sollya/bessel_j1f_at_zero.sollya
**/
#[inline]
fn poly_near_zero(x: f32) -> f32 {
let dx = x as f64;
let x2 = dx * dx;
let p = f_polyeval7(
x2,
f64::from_bits(0x3fe0000000000000),
f64::from_bits(0xbfaffffffffffffc),
f64::from_bits(0x3f65555555554089),
f64::from_bits(0xbf0c71c71c2a74ae),
f64::from_bits(0x3ea6c16bbd1dc5c1),
f64::from_bits(0xbe384562afb69e7d),
f64::from_bits(0x3dc248d0d0221cd0),
);
(p * dx) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn small_argument_path(x: f32) -> f32 {
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
let x_abs = f32::from_bits(x.to_bits() & 0x7fff_ffff) as f64;
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return poly_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return (f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale) as f32;
}
let c = &J1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
(p * sign_scale) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_f_j1f() {
assert_eq!(
f_j1f(77.743162408196766932633181568235159),
0.09049267898021947
);
assert_eq!(
f_j1f(-0.000000000000000000000000000000000000008827127),
-0.0000000000000000000000000000000000000044135635
);
assert_eq!(
f_j1f(0.000000000000000000000000000000000000008827127),
0.0000000000000000000000000000000000000044135635
);
assert_eq!(f_j1f(5.4), -0.3453447907795863);
assert_eq!(
f_j1f(84.027189586293545175976760219782591),
0.0870430264022591
);
assert_eq!(f_j1f(f32::INFINITY), 0.);
assert_eq!(f_j1f(f32::NEG_INFINITY), 0.);
assert!(f_j1f(f32::NAN).is_nan());
assert_eq!(f_j1f(-1.7014118e38), 0.000000000000000000006856925);
}
}

857
vendor/pxfm/src/bessel/j1f_coeffs.rs vendored Normal file
View File

@@ -0,0 +1,857 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
Generating Taylor expansions for zero and extremums see [crate::bessel::j1_coeffs::J1_ZEROS]
to start of explanation.
Generated by SageMath and Sollya:
```python
def compute_intervals(zeros):
intervals = []
for i in range(0, len(zeros)):
if i == 0:
a = (zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
elif i + 1 > len(zeros) - 1:
a = (zeros[i - 1] + zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i]) + 0.83 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
else:
a = (zeros[i - 1] + zeros[i]) / 2 - zeros[i] - 0.05
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
return intervals
intervals = compute_intervals(j1_zeros)
# print(intervals)
def build_sollya_script(a, b, zero, deg):
return f"""
prec = 500;
bessel_j1 = library("./pxfm/notes/bessel_sollya/cmake-build-release/libbessel_sollya.dylib");
f = bessel_j1(x + {zero});
d = [{a}, {b}];
pf = remez(f, {deg}, d, 1, 1e-25);
for i from 0 to degree(pf) do {{
write(coeff(pf, i)) >> "coefficients.txt";
write("\\n") >> "coefficients.txt";
}};
"""
def load_coefficients(filename):
with open(filename, "r") as f:
return [RR(line.strip()) for line in f if line.strip()]
def call_sollya_on_interval(a, b, zero, degree=12):
sollya_script = build_sollya_script(a, b, zero, degree)
with open("tmp_interval.sollya", "w") as f:
f.write(sollya_script)
import subprocess
if os.path.exists("coefficients.txt"):
os.remove("coefficients.txt")
try:
result = subprocess.run(
["sollya", "tmp_interval.sollya"],
check=True,
capture_output=True,
text=True
)
except subprocess.CalledProcessError as e:
return
degree = 13
print(f"pub(crate) static J1F_COEFFS: [[u64;{degree + 1}]; {len(intervals)}] = [")
for i in range(0, len(intervals)):
interval = intervals[i]
call_sollya_on_interval(interval[0], interval[1], interval[2], degree)
coeffs = load_coefficients(f"coefficients.txt")
print("[")
for c in coeffs:
print(double_to_hex(c) + ",")
print("],")
print("];")
```
**/
pub(crate) static J1F_COEFFS: [[u64; 14]; 47] = [
[
0x3fe29ea3d19f035d,
0xbce22d3695a081b6,
0xbfca41115c5deeab,
0x3f78d1448e710c46,
0x3f8c441a2f9a4f69,
0xbf386671c22a634e,
0xbf39e2504b2e7b5b,
0x3ee34ccc14eef789,
0x3eda49718b72405e,
0xbe810474efe3c9c6,
0xbe70fa29fb791201,
0x3e1362d76c062ab0,
0x3dfdd76f07295520,
0xbda1a753bf39cb58,
],
[
0xbc600f4743bf63e1,
0xbfd9c6cf582cbf8a,
0x3faae8a39f51ad73,
0x3fab589d1da1462b,
0xbf7537544c334c23,
0xbf624b34099ee01c,
0x3f26e4c2d53c4f46,
0x3f083a06ee794927,
0xbec9799d241e5d2b,
0xbea3382caabc394d,
0x3e617039a48bb9f6,
0x3e345a8c920dcd07,
0xbdf014c3bf3000a5,
0xbdc034aa4e0a0169,
],
[
0xbfd626ee83500bf2,
0x3cb26dfd317b25ec,
0x3fc55f6bec9ef90d,
0xbf83d23336fd2aca,
0xbf88c77a98398e83,
0x3f45cdc98dc64f81,
0x3f373576fec2e394,
0xbef24614559dc202,
0xbed7b852bb05a2ff,
0x3e90ac054c63ba46,
0x3e6ea70b302a9ba3,
0xbe23616f333984e9,
0xbdfb0a064790ebe6,
0x3db0d9e08e0394a8,
],
[
0xbc69b70cbe5811c2,
0x3fd33518b3874e8d,
0xbf95e70dc6036109,
0xbfa80c83bdeee89d,
0x3f69a4b292e2ab0d,
0x3f613fbc7d6bc462,
0xbf207358bb38afba,
0xbf0796a754e5c2d1,
0x3ec4255a5a67b552,
0x3ea3026fd5c83da8,
0xbe5d482c06555943,
0xbe34473271553ffa,
0x3dec0515656e1a0c,
0x3dbf11e1948b68d2,
],
[
0x3fd17dbf09d40d24,
0xbca5b86542306439,
0xbfc1404bf647c233,
0x3f74f4df276a170d,
0x3f85c628542932bb,
0xbf3d68ab724123eb,
0xbf356acb63c4c7d5,
0x3eec10b48d91d910,
0x3ed67eaa56c73092,
0xbe8bb65dbfeed0f6,
0xbe6d8683c673b075,
0x3e20f7b6316797c6,
0x3dfa451f70b9922a,
0xbdadce2c76e4d044,
],
[
0x3c65df812ede650d,
0xbfcff654544ebcd3,
0x3f89223ff2c07565,
0x3fa4b0c5d5da68d8,
0xbf5f91a9ee0b0e8a,
0xbf5f51c2489da5c1,
0x3f16b4c9c8efdfe6,
0x3f063c54768ebb67,
0xbebe3724b50493f9,
0xbea25c12f8827c9d,
0x3e5747d49182153b,
0x3e33e41718262cb9,
0xbde738f2d8f2cac8,
0xbdbe80a4948dbf09,
],
[
0xbfcddceb4ce1bf49,
0x3c9e79566c79eb3d,
0x3fbda52116c0a587,
0xbf6a9da4603b9358,
0xbf8331e74ea51630,
0x3f33e5cb6ecbca42,
0x3f33885fe920e6b8,
0xbee494c100626ece,
0xbed512b940a2ae49,
0x3e85a8688c9ba4ce,
0x3e6c31a31b773184,
0xbe1bd11439ffd259,
0xbdf96a9daeb33936,
0x3da9176221cc5aa0,
],
[
0xbc62383f10698557,
0x3fcbf3337873a7d9,
0xbf80c83a2d7adab7,
0xbfa251858011820e,
0x3f559eb160bdad7b,
0x3f5c5bce33b024a1,
0xbf10413e2f7af958,
0xbf04a6704d9a9d07,
0x3eb6c43df550ea17,
0x3ea16abdc27eeb92,
0xbe52576e7fc9d1e6,
0xbe332dc1c1ed3ee2,
0x3de2f6391206ebbc,
0x3dbda410af6fe5f4,
],
[
0x3fca7f63fea81f25,
0xbc9710bf367611f2,
0xbfba60afb0664019,
0x3f62c1e930937e24,
0x3f814506466cfd08,
0xbf2cca8c0c28fab3,
0xbf31df821c353039,
0x3edee8816088c0d5,
0x3ed3a365144be247,
0xbe80ed354ac60d34,
0xbe6ab31b90ea8e41,
0x3e168836e968cdd7,
0x3df8613c2e496c6b,
0xbda4daa0300cec8d,
],
[
0x3c5e59bc05abf185,
0xbfc925c6fca08f55,
0x3f786dd32e0596e8,
0x3fa09463bbd036c8,
0xbf4fda0298c57ed5,
0xbf59f4be6075f749,
0x3f0877991961e89e,
0x3f032cb00f1d1bde,
0xbeb19d8c17e4c965,
0xbea06a043bd432bf,
0x3e4d398ca8f49a5e,
0x3e3250f2ec743ceb,
0xbddf086f08c63838,
0xbdbc8e4fa8a9f9e9,
],
[
0xbfc810f50225b04a,
0x3c923e2f625151bc,
0x3fb7fdf97ac36a6f,
0xbf5c3c256a8cde19,
0xbf7f98feb7276ef1,
0x3f25f6559e6b5a2c,
0x3f3080f57a3a527d,
0xbed80c5147824d09,
0xbed256dac8ee5bae,
0x3e7af7628377d0c7,
0x3e6938ef2e239da2,
0xbe12633fe4f5465f,
0xbdf745af34a2de92,
0x3da15e1c189cc1e4,
],
[
0xbc59737d6e4fe431,
0x3fc70c511227d5aa,
0xbf72ccb0e975555d,
0xbf9e7dc08e70e9c5,
0x3f48acdc5b030cb1,
0x3f580503724ae80a,
0xbf032ee4c8c82218,
0xbf01e5d2836968fa,
0x3eac129da754f086,
0x3e9ef1612a209ee4,
0xbe47b90193cc5cb3,
0xbe316f0e2a3b6246,
0x3dd9aabe334f3655,
0x3dbb62c2bd937db8,
],
[
0x3fc633e7f7f05300,
0xbc8dba9947515d38,
0xbfb6273784c1bfc4,
0x3f563ae94ade4347,
0x3f7d4666536b9564,
0xbf216d528356c33f,
0xbf2ec0dcdab1fcc9,
0x3ed34e967676159e,
0x3ed135c5c78436c9,
0xbe75f7c3c6380689,
0xbe67dba82b616a97,
0x3e0e71b8431ababd,
0x3df62fc761d5cf41,
0xbd9d2fe54f4a496b,
],
[
0x3c558a68c87f4030,
0xbfc5664e13b70622,
0x3f6e16555e1087dd,
0x3f9c5e1ad9fb2f2d,
0xbf43d369f956c6bd,
0xbf566f4ec27a7a37,
0x3eff0de050de72b9,
0x3f00cf26431ce3a2,
0xbea6f46c2694b0df,
0xbe9d407f232dd5f2,
0x3e43a29f2e4c9c8a,
0x3e3098ca879e4471,
0xbdd585d6255bc3df,
0xbdba39fcc1ea78bf,
],
[
0xbfc4b71d4ca2cc69,
0x3c88c930c0b4c560,
0x3fb4ae245697fb03,
0xbf5215e4e1a6153c,
0xbf7b633ed6d8e543,
0x3f1c7f17b4d42a82,
0x3f2ce01b8a6eca10,
0xbecfced72e1b750f,
0xbed03c9cd706bc3a,
0x3e72450e1e5d9dd3,
0x3e66a249f63c5bc1,
0xbe0999366388212b,
0xbdf52ba007be60f3,
0x3d98ced5beb32a74,
],
[
0xbc526f6d035edf6d,
0x3fc40f90793605bb,
0xbf68c833077fb99d,
0xbf9aa0ce0421d16e,
0x3f405fa598ed8bab,
0x3f551d30d78a7993,
0xbef9c5807480a6e1,
0xbeffc1bbf50ca15b,
0x3ea32dfda14ee884,
0x3e9bc2119616c18d,
0xbe408b0d01f43d88,
0xbe2fa87db40715d8,
0x3dd24d20c9bf3988,
0x3db920af9a64d8b9,
],
[
0x3fc37dfa8f5a550a,
0xbc850d0284917193,
0xbfb3775c1a04efff,
0x3f4e2b4810a4a882,
0x3f79d151a72aa26d,
0xbf17d8e5a0a8f01d,
0xbf2b49a641814268,
0x3ecac10968085b43,
0x3ececa610eed952d,
0xbe6eefd23aebb19b,
0xbe658bda5ec8aafe,
0x3e05d77b1c39da47,
0x3df43d63ca6b9538,
0xbd9555dba21d3a76,
],
[
0x3c4fe3057c054c4c,
0xbfc2f2072e638cf3,
0x3f64df208bbd408f,
0x3f992bb5e1e159a9,
0xbf3ba181c0657121,
0xbf53fe9d5ba9fb4a,
0x3ef5d17600fd9483,
0x3efe26d373f4ffea,
0xbea0509689b62f58,
0xbe9a70f1b160bc28,
0x3e3c4fa74da61f57,
0x3e2e44cc2feeed24,
0xbdcf87b5a4255e18,
0xbdb81bebaaec3c7c,
],
[
0xbfc2768d29c69936,
0x3c822565e3c86e7f,
0x3fb271811730b057,
0xbf49a8df96a15635,
0xbf787c81cf1b96e9,
0x3f14549cdbcc339c,
0x3f29ed2567282f3d,
0xbec6e4137cf2411c,
0xbecd53321406f402,
0x3e6a98443cd6fc90,
0x3e6494adc7c6521b,
0xbe02e1d787962f20,
0xbdf3653d7772f823,
0x3d928dbabedf1d31,
],
[
0xbc4bd8c1a48b98b5,
0x3fc1ff5eec6a01cd,
0xbf61e438b722bfe0,
0xbf97ed5fffc1c711,
0x3f37b7997ba917ee,
0x3f53081def95b78f,
0xbef2c5f5ec3350b1,
0xbefcc11a59469f36,
0x3e9c2c3894e80d05,
0x3e9946d150444e47,
0xbe388ce82c32666d,
0xbe2d044ba8d28f8d,
0x3dcb7a77f047774e,
0x3db72cad88060e0b,
],
[
0x3fc194eba75b32f9,
0xbc7faef3b1a5e821,
0xbfb190f7dc273599,
0x3f462bb47a5c8cc1,
0x3f7756ef20f501d3,
0xbf1198b0baaa058c,
0xbf28be8cf854b2d7,
0x3ec3dd6f88b69c69,
0x3ecc09c72877c12b,
0xbe6728ec2da828ad,
0xbe63b897c2c7b139,
0x3e008344f3db34b5,
0x3df2a1a5ef6e57ff,
0xbd904c70dc90d3bc,
],
[
0x3c4888e51c985983,
0xbfc12dd57bf18ad9,
0x3f5f1e1e7f3937bf,
0x3f96d9afe883018e,
0xbf34a538a4802887,
0xbf52316250b44b33,
0x3ef05f11562b37ff,
0x3efb86bad38b7b43,
0xbe98a1b250bb7d2f,
0xbe983dca646511fe,
0x3e3588f1be962e6d,
0x3e2be36513882145,
0xbdc831edc515fa05,
0xbdb6520b1c14e6f6,
],
[
0xbfc0d0d36473e98c,
0x3c7bf69dcc64d467,
0x3fb0cda9974abd9e,
0xbf4367f38f204418,
0xbf7656b75e3b5a4f,
0x3f0ed82abf947b58,
0x3f27b4e5b765cd39,
0xbec171fd1a726d85,
0xbecae62a6c526e99,
0x3e64648b6f18fd5e,
0x3e62f3b53a117819,
0xbdfd2c72ca1c90f8,
0xbdf1f085bd0fab41,
0x3d8ce566a6478844,
],
[
0xbc45ca84b624bf30,
0x3fc076826cc2c191,
0xbf5b62885e006ac2,
0xbf95e7f53001e43e,
0x3f322ebeb8d9f78a,
0x3f517444a79fe500,
0xbeece06f1cc95449,
0xbefa7006e603acdb,
0x3e95c42dcf4cb755,
0x3e9750c9cbea6158,
0xbe3313f5f0d7c83f,
0xbe2ade4e1f9c8db0,
0x3dc57f801723eb46,
0x3db58a5b7895974e,
],
[
0x3fc02455675ab6d2,
0xbc78e3f9125495c0,
0xbfb021c155a72057,
0x3f412be56fc16829,
0x3f75749d556a12df,
0xbf0b51f1f9db1832,
0xbf26c96a07103bad,
0x3ebef3a7bef163ee,
0x3ec9e206eb2ce693,
0xbe6220bf8745e1a3,
0xbe6242a68bb3eb7f,
0x3df9ffc1c0ac86f0,
0x3df14fb8a5e39fac,
0xbd89d286115bf7a6,
],
[
0x3c4380441b0b0c6a,
0xbfbfa8b41711c839,
0x3f5857d39699926e,
0x3f9511c6dadaa99b,
0xbf302c289dbbcc5a,
0xbf50cc2238d1bf52,
0x3ee9b64d5a4aa86c,
0x3ef976fb01920f8b,
0xbe93693a8cc790fd,
0xbe967b9496685d1b,
0x3e310c25f77e4f25,
0x3e29f17f3e13ecd9,
0xbdc3414987038eec,
0xbdb4d3b9dc98b4b1,
],
[
0xbfbf161d0c28b48b,
0x3c765dc0b792167d,
0x3faf11d837aa6e5c,
0xbf3eab76da4d4788,
0xbf74ab329f05bdc5,
0x3f086ada57d5a903,
0x3f25f6e78e464093,
0xbebbb2720677d252,
0xbec8f8525854df7e,
0x3e603f882886871e,
0x3e61a293516bd71e,
0xbdf75995de0fcb8d,
0xbdf0bd411cec4c70,
0x3d873e4d136b8f8b,
],
[
0xbc418c91b7939a2c,
0x3fbe8727daa3daec,
0xbf55d353e285455c,
0xbf94524d4813cbac,
0x3f2d037574df02eb,
0x3f50356bb7473b5d,
0xbee7156bfaea76f5,
0xbef896d7dbd3810e,
0x3e9172c5e1abd5c6,
0x3e95baadfc18282d,
0xbe2eb240c0cc9c75,
0xbe2919d9b9e0a0b9,
0x3dc15e50952db326,
0x3db42c51c147e65d,
],
[
0x3fbe0357c158b118,
0xbc74361048923786,
0xbfadffc2fc1a90f5,
0x3f3b9b82ae081404,
0x3f73f64e05315346,
0xbf05fe4b66e63077,
0xbf2539518d55a85c,
0x3eb8f8d02bcc2897,
0x3ec825039164993f,
0xbe5d566920c2c9ab,
0xbe61111befe6e2b8,
0x3df51d4c70439a2f,
0x3df0375b8b7f66b3,
0xbd850e412a9bff06,
],
[
0x3c3fc518d24f616f,
0xbfbd8293aa55d18f,
0x3f53b6beb83f212f,
0x3f93a5ccbc12a602,
0xbf2a3765d26776da,
0xbf4f5ab33747e91e,
0x3ee4df6f1a6da3df,
0x3ef7cbd49b834b1a,
0xbe8f9607c8362a02,
0xbe950b374b4c0d92,
0x3e2bd1f7180ef6ba,
0x3e2854abbc0c7de8,
0xbdbf87db79765a71,
0xbdb3926b2e69585a,
],
[
0xbfbd0b36e5737457,
0x3c726585805a22d8,
0x3fad082ce3c6b4a2,
0xbf3905d00c5e9c91,
0xbf7352b073fcfa33,
0x3f03f1ccfed800a0,
0x3f248d74577878fa,
0xbeb6a9ef1ba885bb,
0xbec764d8b51b8b1c,
0x3e5aa78ed7e846ea,
0x3e608c46d6182272,
0xbdf33581106d6379,
0xbdef78fcff7e62ac,
0x3d832f39fadd44c1,
],
[
0xbc3cea65a1050db5,
0x3fbc96700bf039e1,
0xbf51ec0b5de4bafb,
0xbf93095734a2441c,
0x3f27d74e122576e6,
0x3f4e636fe2585c98,
0xbee2fe11959a4f56,
0xbef712e4d39f1e3a,
0x3e8cc3ac0e3e5a57,
0x3e946ad2493deefc,
0xbe295ca72f5034a9,
0xbe279fa7ce9e0732,
0x3dbcc7fe67868a0c,
0x3db30482af82a3e3,
],
[
0x3fbc29ae8400a31f,
0xbc70d624180ba1cb,
0xbfac27138da31b39,
0x3f36d141fcbed86f,
0x3f72bdc71061ff60,
0xbf0231cf645337e2,
0xbf23f0bf3a855d26,
0x3eb4b05ea24a407d,
0x3ec6b52ac7705590,
0xbe585a82e1962dc0,
0xbe60126ea6b0d3d5,
0x3df191f5eda7279d,
0x3dee96ae83ca14cd,
0xbd8191df21c5049c,
],
[
0x3c3a725871d54f1e,
0xbfbbbf246914235e,
0x3f5062daee353d6e,
0x3f927a96f174b658,
0xbf25cdb5dea7195a,
0xbf4d818348f8b2ae,
0x3ee160aab6b91ebc,
0x3ef6698d6e3dde27,
0xbe8a56325d99553d,
0xbe93d7884737e010,
0x3e273dfa1c71021f,
0x3e26f8d7f1f40cd1,
0xbdba675a7a73a904,
0xbdb28141631b6b6a,
],
[
0xbfbb5b8273b75054,
0x3c6ef081b4f49e8c,
0x3fab59418c36a598,
0xbf34eafeaa92d6ad,
0xbf7235801af9154a,
0x3f00af9747e26a9d,
0x3f23611db02b9d63,
0xbeb2fbe420b9b6ee,
0xbec613cc016f8c79,
0x3e565cfa070daeea,
0x3e5f4465aa2f3924,
0xbdf0262c878a66b9,
0xbdedc58772bf3be9,
0x3d802a5900529d85,
],
[
0xbc384aa4fbafc099,
0x3fbaf9cb49c4f934,
0xbf4e1d930b512b68,
0xbf91f7a8fec6eb30,
0x3f240a553105f569,
0x3f4cb20c812efe23,
0xbedff5195120ac4b,
0xbef5cdc48eb38532,
0x3e883b07bbc753fc,
0x3e934fb5f8f40030,
0xbe2566435ca657bc,
0xbe265e90a4422b0e,
0x3db85513c8865e5e,
0x3db2077cf853887e,
],
[
0x3fba9e13a0db6429,
0xbc6c8fc49071e774,
0xbfaa9c1ca2161ab5,
0x3f3344a09efdef03,
0x3f71b82c43097eb4,
0xbefebfb97beaa2a2,
0xbf22dcdb1b1095b7,
0x3eb180048a016cca,
0x3ec57eee38f33b34,
0xbe54a0cd96cea7fb,
0xbe5e74d5d6cab4bf,
0x3dedd0d2df39d680,
0x3ded0395b2b002c8,
0xbd7ddf4961633a0a,
],
[
0x3c366129d7cdda38,
0xbfba4407e04298d1,
0x3f4bcc9df0cefa78,
0x3f917f0266db20d1,
0xbf2280a052210834,
0xbf4bf2ada1f36d37,
0x3edd83d57dbfecdc,
0x3ef53dd97231158c,
0xbe8663c0bacd7255,
0xbe92d1fb6736ed8c,
0x3e23c94599131ea8,
0x3e25cf6504993c56,
0xbdb683e80faa06e8,
0xbdb19631ce482d25,
],
[
0xbfb9ef3bb2213b0a,
0x3c6a7968b4e09650,
0x3fa9ed82007a9965,
0xbf31d2fdeeb2c55d,
0xbf71446866fe7a9f,
0x3efc73b6851770db,
0x3f22628de4e7b12f,
0xbeb03303cb6b68e6,
0xbec4f50f5682c560,
0x3e531ae61db4a107,
0x3e5db4387e5cde8e,
0xbdeba1fde369500d,
0xbdec4f2d9a6ca338,
0x3d7bb4d1335b858d,
],
[
0xbc34b2fde0d60fa7,
0x3fb99be744018c90,
0xbf49c3f52a2af15f,
0xbf910f5ca51f983b,
0x3f2126c8e8c80fca,
0x3f4b416f7d4ef26a,
0xbedb5e2e533b6e90,
0xbef4b86226f8434d,
0x3e84c505ef1872eb,
0x3e925d2f3adca31c,
0xbe225d5434e81821,
0xbe254a1c99a03af6,
0x3db4e94bd07d8a6f,
0x3db12c7cbd69804c,
],
[
0x3fb94d3276914e50,
0xbc68a054fb407a48,
0xbfa94bac1950e23d,
0x3f308d4ff8f228ce,
0x3f70d90d29bf518f,
0xbefa6d56164ba1ad,
0xbf21f107d97a6716,
0x3eae1a62755de03b,
0x3ec474ea4fd020bb,
0xbe51c2774b54b19c,
0xbe5d00e34f95d0da,
0x3de9b39b1acf50d9,
0x3deba6dd96aebb23,
0xbd79c96d87e11eb2,
],
[
0x3c3336a57f397478,
0xbfb8ffc9bd24fe07,
0x3f47f7d46ab331c5,
0x3f90a7a725d3fb50,
0xbf1fea1728ee3283,
0xbf4a9cac69f01f7f,
0x3ed977f48dd3df20,
0x3ef43c2d8dc63c79,
0xbe8355d08ef44310,
0xbe91f054ae49cd66,
0x3e211ab3e0c31baf,
0x3e24cdaac969d4dd,
0xbdb37cc6ee5acaa3,
0xbdb0c99a10f9a149,
],
[
0xbfb8b67a2481077c,
0x3c66ff282f533f22,
0x3fa8b51f21068dcb,
0xbf2ed935c7af3be8,
0xbf707522a502e55b,
0x3ef8a1960639d120,
0x3f21874a473e56eb,
0xbeac10cf4666ac95,
0xbec3fd6b83d91ed3,
0x3e509072f594172b,
0x3e5c5969c0715bb8,
0xbde7fbce7fa1d168,
0xbdeb09679444c7c3,
0x3d7813cf0f88ee50,
],
[
0xbc31df60e9bfe4d4,
0x3fb86e51be0a9153,
0xbf465ed1b387e0dc,
0xbf9046fc5a218a13,
0x3f1dca617fec07ec,
0x3f4a030022145da6,
0xbed7c761870dcbbb,
0xbef3c83888dc1ceb,
0x3e820edd9a880dbb,
0x3e918a9391ba3964,
0xbe1ff64d38db85c5,
0xbe245927a9c55d79,
0x3db2377b4d9f1923,
0x3db06cdd9727d79f,
],
[
0x3fb829d06fee9265,
0xbc6580de45a47453,
0xbfa8289a526d76b3,
0x3f2cd680355cdc28,
0x3f7017d70f5091c7,
0xbef707978e429e47,
0xbf21247ce0bc7ead,
0x3eaa3f6135c91ad4,
0x3ec38da7a255e74a,
0xbe4efe445dfcd93f,
0xbe5bbc925c9c4b66,
0x3de6729121dc2aec,
0x3dea75b6fa634294,
0xbd768bad7f03cc70,
],
[
0x3c30ac9cf88bc679,
0xbfb7e656efb009ad,
0x3f44f15066f3d3cc,
0x3f8fd932c26aacb2,
0xbf1be460dd833bb1,
0xbf49733b5917b1ec,
0x3ed64488c387546b,
0x3ef35ba58b547387,
0xbe80ea46c863072a,
0xbe912b31edd1db5a,
0x3e1df32c5fd2f995,
0x3e23ebca247be24d,
0xbdb113cb7b7c70db,
0xbdb015b1107de244,
],
[
0xbfb7a62320798174,
0x3c9957b1463c023c,
0x3fa7a50ca4504ab9,
0xbf2b095ccb52d0c4,
0xbf6f80ef11d944d8,
0x3ef59822dd4acc8e,
0x3f20c7e6a7116068,
0xbea89e01408239eb,
0xbec324d470ec229a,
0x3e4d13ff680fb32a,
0x3e5b2943a9554368,
0xbde5283c9b463c57,
0xbde9e8488fabd47c,
0x3d7a620fb02a1ce9,
],
];

396
vendor/pxfm/src/bessel/jincpi.rs vendored Normal file
View File

@@ -0,0 +1,396 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(clippy::excessive_precision)]
use crate::bessel::alpha1::bessel_1_asympt_alpha_fast;
use crate::bessel::beta1::bessel_1_asympt_beta_fast;
use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval9, f_polyeval19};
use crate::round::RoundFinite;
use crate::sin_helper::sin_dd_small_fast;
/// Normalized jinc 2*J1(PI\*x)/(pi\*x)
pub fn f_jincpi(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON, |x| == inf, x == NaN
if ux <= 0x7960000000000000u64 {
// |x| <= f64::EPSILON
return 1.0;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; // x = NaN
}
let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax < 0x4052a6784230fcf8u64 {
// |x| < 74.60109
if ax < 0x3fd3333333333333 {
// |x| < 0.3
return jincpi_near_zero(f64::from_bits(ax));
}
let scaled_pix = f64::from_bits(ax) * std::f64::consts::PI; // just test boundaries
if scaled_pix < 74.60109 {
return jinc_small_argument_fast(f64::from_bits(ax));
}
}
jinc_asympt_fast(f64::from_bits(ax))
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
fn jinc_asympt_fast(ox: f64) -> f64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
let x = DoubleDouble::quick_mult_f64(PI, ox);
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
// argument reduction assuming x here value is already multiple of PI.
// k = round((x*Pi) / (pi*2))
let kd = (ox * 0.5).round_finite();
// y = (x * Pi) - k * 2
let rem = f_fmla(kd, -2., ox);
let angle = DoubleDouble::quick_mult_f64(PI, rem);
let recip = x.recip();
let alpha = bessel_1_asympt_alpha_fast(recip);
let beta = bessel_1_asympt_beta_fast(recip);
// Without full subtraction cancellation happens sometimes
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let dx_sqrt = x.fast_sqrt();
let scale = DoubleDouble::div(SQRT_2_OVER_PI, dx_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
DoubleDouble::quick_mult(p, recip).to_f64() * 2.
}
#[inline]
pub(crate) fn jincpi_near_zero(x: f64) -> f64 {
// Polynomial Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=BesselJ[1,x*Pi]/(x*Pi)
// {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},7,7},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 8] = [
(0xbb2bddffe9450ca6, 0x3fe0000000000000),
(0x3c3b0b0a7393eccb, 0xbfce4cd3c3c87615),
(0xbc7f9f784e0594a6, 0xbfe043283b1e383f),
(0xbc6af77bca466875, 0x3fcee46673cf919f),
(0xbc0b62837b038ea8, 0x3fc0b7cc55c9a4af),
(0x3c5c08841871f124, 0xbfb002b65231dcdd),
(0xbc26cf2d89ea63bc, 0xbf849022a7a0712b),
(0xbbe535d492c0ac1c, 0x3f740b48910d5105),
];
const Q: [(u64, u64); 8] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c4aba6577f3253e, 0xbfde4cd3c3c87615),
(0x3c52f58f82e3438c, 0x3fcbd0a475006cf9),
(0x3c36e496237d6b49, 0xbfb9f4cea13b06e9),
(0xbbbbf3e4ef3a28fe, 0x3f967ed0cee85392),
(0x3c267ac442bb3bcf, 0xbf846e192e22f862),
(0x3bd84e9888993cb0, 0x3f51e0fff3cfddee),
(0x3bd7c0285797bd8e, 0xbf3ea7a621fa1c8c),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let p0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let q0 = DoubleDouble::mul_add(x2, p1, p0);
let q1 = DoubleDouble::mul_add(x2, p3, p2);
let p_num = DoubleDouble::mul_add(x4, q1, q0);
let p0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[1]),
x,
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let q0 = DoubleDouble::mul_add(x2, p1, p0);
let q1 = DoubleDouble::mul_add(x2, p3, p2);
let p_den = DoubleDouble::mul_add(x4, q1, q0);
DoubleDouble::quick_mult_f64(DoubleDouble::div(p_num, p_den), 2.).to_f64()
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
pub(crate) fn jinc_small_argument_fast(x: f64) -> f64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
let dx = DoubleDouble::quick_mult_f64(PI, x);
const INV_STEP: f64 = 0.6300176043004198;
let fx = dx.hi * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - dx.hi).abs();
let dist1 = (found_zero1.hi - dx.hi).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return jincpi_near_zero(x);
}
let r = DoubleDouble::quick_dd_sub(dx, found_zero);
// We hit exact zero, value, better to return it directly
if dist == 0. {
return DoubleDouble::quick_mult_f64(
DoubleDouble::from_f64_div_dd(f64::from_bits(J1_ZEROS_VALUE[idx]), dx),
2.,
)
.to_f64();
}
let is_zero_too_close = dist.abs() < 1e-3;
let c = if is_zero_too_close {
&J1_COEFFS_TAYLOR[idx - 1]
} else {
&J1_COEFFS[idx - 1]
};
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
z = DoubleDouble::quick_mult_f64(DoubleDouble::div(z, dx), 2.);
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3bf0000000000000), // 2^-64
);
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j1_small_argument_dd(r, c, dx)
}
fn j1_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24], inv_scale: DoubleDouble) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let z = DoubleDouble::div(p, inv_scale);
DoubleDouble::quick_mult_f64(z, 2.).to_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jincpi() {
assert_eq!(f_jincpi(f64::EPSILON), 1.0);
assert_eq!(f_jincpi(0.5000000000020244), 0.7217028449014163);
assert_eq!(f_jincpi(73.81695991658546), -0.0004417546638317049);
assert_eq!(f_jincpi(0.01), 0.9998766350182722);
assert_eq!(f_jincpi(0.9), 0.28331697846510623);
assert_eq!(f_jincpi(3.831705970207517), -0.036684415010255086);
assert_eq!(f_jincpi(-3.831705970207517), -0.036684415010255086);
assert_eq!(
f_jincpi(0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(
f_jincpi(-0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(f_jincpi(5.4), -0.010821736808448256);
assert_eq!(
f_jincpi(77.743162408196766932633181568235159),
-0.00041799098646950523
);
assert_eq!(
f_jincpi(84.027189586293545175976760219782591),
-0.00023927934929850555
);
assert_eq!(f_jincpi(f64::NEG_INFINITY), 0.0);
assert_eq!(f_jincpi(f64::INFINITY), 0.0);
assert!(f_jincpi(f64::NAN).is_nan());
}
}

249
vendor/pxfm/src/bessel/jincpif.rs vendored Normal file
View File

@@ -0,0 +1,249 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
use crate::bessel::j1f_coeffs::J1F_COEFFS;
use crate::bessel::trigo_bessel::sin_small;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval6, f_polyeval14};
use crate::round::RoundFinite;
/// Normalized jinc 2*J1(PI\*x)/(pi\*x)
///
/// ULP 0.5
pub fn f_jincpif(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 {
// |x| <= f32::EPSILON, |x| == inf, |x| == NaN
if ux <= 0x6800_0000u32 {
// |x| == 0
return 1.;
}
if x.is_infinite() {
return 0.;
}
return x + f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if ax < 0x429533c2u32 {
// |x| < 74.60109
if ax <= 0x3e800000u32 {
// |x| < 0.25
return jincf_near_zero(f32::from_bits(ax));
}
let scaled_pix = f32::from_bits(ax) * std::f32::consts::PI; // just test boundaries
if scaled_pix < 74.60109 {
return jincpif_small_argument(f32::from_bits(ax));
}
}
jincpif_asympt(f32::from_bits(ax)) as f32
}
#[inline]
fn jincf_near_zero(x: f32) -> f32 {
let dx = x as f64;
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=BesselJ[1,x*Pi]/(x*Pi)
// {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},6,0},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval6(
dx,
f64::from_bits(0x3fe0000000000002),
f64::from_bits(0xbfd46cd1822a5aa0),
f64::from_bits(0xbfde583c923dc6f4),
f64::from_bits(0x3fd3834f47496519),
f64::from_bits(0x3fb8118468756e6f),
f64::from_bits(0xbfafaff09f13df88),
);
let p_den = f_polyeval6(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfe46cd1822a4cb0),
f64::from_bits(0x3fd2447a026f477a),
f64::from_bits(0xbfc6bdf2192404e5),
f64::from_bits(0x3fa0cf182218e448),
f64::from_bits(0xbf939ab46c3f7a7d),
);
(p_num / p_den * 2.) as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn jincpif_small_argument(ox: f32) -> f32 {
const PI: f64 = f64::from_bits(0x400921fb54442d18);
let x = ox as f64 * PI;
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
// let avg_step = 74.60109 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6300176043004198;
let inv_scale = x;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return jincf_near_zero(ox);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return (f64::from_bits(J1_ZEROS_VALUE[idx]) / inv_scale * 2.) as f32;
}
let c = &J1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval14(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
);
(p / inv_scale * 2.) as f32
}
/*
Evaluates:
J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
discarding 1*PI/2 using identities gives:
J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
*/
#[inline]
pub(crate) fn jincpif_asympt(x: f32) -> f64 {
const PI: f64 = f64::from_bits(0x400921fb54442d18);
let dox = x as f64;
let dx = dox * PI;
let inv_scale = dx;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
// argument reduction assuming x here value is already multiple of PI.
// k = round((x*Pi) / (pi*2))
let kd = (dox * 0.5).round_finite();
// y = (x * Pi) - k * 2
let angle = f_fmla(kd, -2., dox) * PI;
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_small(r0);
let z0 = beta * m_sin;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
let j1pix = scale * z0;
(j1pix / inv_scale) * 2.
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jincpif() {
assert_eq!(f_jincpif(-102.59484), 0.00024380769);
assert_eq!(f_jincpif(102.59484), 0.00024380769);
assert_eq!(f_jincpif(100.08199), -0.00014386141);
assert_eq!(f_jincpif(0.27715185), 0.9081822);
assert_eq!(f_jincpif(0.007638072), 0.99992806);
assert_eq!(f_jincpif(-f32::EPSILON), 1.0);
assert_eq!(f_jincpif(f32::EPSILON), 1.0);
assert_eq!(
f_jincpif(0.000000000000000000000000000000000000008827127),
1.0
);
assert_eq!(f_jincpif(5.4), -0.010821743);
assert_eq!(
f_jincpif(77.743162408196766932633181568235159),
-0.00041799102
);
assert_eq!(
f_jincpif(-77.743162408196766932633181568235159),
-0.00041799102
);
assert_eq!(
f_jincpif(84.027189586293545175976760219782591),
-0.00023927793
);
assert_eq!(f_jincpif(f32::INFINITY), 0.);
assert_eq!(f_jincpif(f32::NEG_INFINITY), 0.);
assert!(f_jincpif(f32::NAN).is_nan());
assert_eq!(f_jincpif(-1.7014118e38), -0.0);
}
}

643
vendor/pxfm/src/bessel/k0.rs vendored Normal file
View File

@@ -0,0 +1,643 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::logs::{fast_log_d_to_dd, log_dd};
use crate::polyeval::f_polyeval3;
/// Modified Bessel of the second kind of order 0
///
/// Max ULP 0.5
pub fn f_k0(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x40862e42fefa39f0u64 {
// x >= 709.7827128933841
return 0.;
}
if xb <= 0x3ff0000000000000 {
// x <= 1
return k0_small_dd(x).to_f64();
}
k0_asympt(x)
}
/**
Computes I0 on interval [0; 1]
as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))
Generated by Wolfram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0_0_to_1_fast(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 3] = [
(0xbae20452afd5045b, 0x3ff0000000000000),
(0xbc5b6ff3f140da20, 0x3fc93c83592c03de),
(0x3c25b350e9128d49, 0x3f904f33ef2de455),
];
let ps_num = f_polyeval3(
eval_x.hi,
f64::from_bits(0x3f433805a2fabaaa),
f64::from_bits(0x3ee5897e7f554966),
f64::from_bits(0x3e731401f0bb5de4),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c323fa63bef2b4e, 0xbfab0df29b4ff089),
(0x3bfedbdf64ed3110, 0x3f564662064157d2),
];
let ps_den = f_polyeval3(
eval_x.hi,
f64::from_bits(0xbef6bdbb484fd0a4),
f64::from_bits(0x3e8d6ced53309351),
f64::from_bits(0xbe13cff13854e945),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let z = DoubleDouble::quick_mult(p, eval_x);
DoubleDouble::full_add_f64(z, 1.)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Generated in Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn k0_small_dd(x: f64) -> DoubleDouble {
let dx = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0x3c1be095d044e896, 0x3fbdadb014541eb2),
(0x3c7321baa1d0a2d9, 0x3fd1b9f19bc9019a),
(0xbc33ce33a244e5bd, 0x3f94ec39f8744183),
(0x3bd7008dfc623255, 0x3f3d85175b25727d),
(0xbb4aa2a1c4905d30, 0x3ed007a860ef3235),
(0xbae8daa77abd6f7f, 0x3e4839e32c19f31a),
];
let ps_num = f_polyeval3(
dx.hi,
f64::from_bits(0x3f3d85175b25727d),
f64::from_bits(0x3ed007a860ef3235),
f64::from_bits(0x3e4839e32c19f31a),
);
let mut p_num = DoubleDouble::mul_f64_add(dx, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc2a82a292acdc83, 0xbf91be3a25c968d6),
(0xbb9d2c37183a6496, 0x3f23bac6961619d8),
];
let ps_den = f_polyeval3(
dx.hi,
f64::from_bits(0xbeac315b81faa1bf),
f64::from_bits(0x3e2ab2d2fbae0863),
f64::from_bits(0xbd9be23550f83df7),
);
let mut p_den = DoubleDouble::mul_f64_add(dx, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(dx, p_den, f64::from_bits(0x3ff0000000000000));
let prod = DoubleDouble::div(p_num, p_den);
let vi_log = fast_log_d_to_dd(x);
let vi = i0_0_to_1_fast(x);
let r = DoubleDouble::mul_add(vi_log, -vi, prod);
let err = r.hi * f64::from_bits(0x3c00000000000000); // 2^-63
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
k0_small_hard(x, vi)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Generated in Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k0_small_hard(x: f64, vi: DoubleDouble) -> DoubleDouble {
let dx = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0x3c1be095d044e896, 0x3fbdadb014541eb2),
(0x3c7321baa1d0a2d9, 0x3fd1b9f19bc9019a),
(0xbc33ce33a244e5bd, 0x3f94ec39f8744183),
(0x3bd7008dfc623255, 0x3f3d85175b25727d),
(0xbb4aa2a1c4905d30, 0x3ed007a860ef3235),
(0xbae8daa77abd6f7f, 0x3e4839e32c19f31a),
];
let mut p_num = DoubleDouble::mul_add(
dx,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(dx, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 6] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc2a82a292acdc83, 0xbf91be3a25c968d6),
(0xbb9d2c37183a6496, 0x3f23bac6961619d8),
(0xbb32032e14c6c2b2, 0xbeac315b81faa1bf),
(0x3aa1a1dc04bfba96, 0x3e2ab2d2fbae0863),
(0x3a3e0f678099fcff, 0xbd9be23550f83df7),
];
let mut p_den = DoubleDouble::mul_add(
dx,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(dx, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(dx, p_den, f64::from_bits(0x3ff0000000000000));
let prod = DoubleDouble::div(p_num, p_den);
let v_log = log_dd(x);
DoubleDouble::mul_add(v_log, -vi, prod)
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let e = i0_exp(x * 0.5);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a11d237114e, 0x3ff40d931ff62706),
(0x3cdd614ddf4929e5, 0x4040645168c3e483),
(0xbd1ecf9ea0af6ab2, 0x40757419a703a2ab),
(0xbd3da3551fb27770, 0x409d4e65365522a2),
(0xbd564d58855d1a46, 0x40b6dd32f5a199d9),
(0xbd6cf055ca963a8e, 0x40c4fd2368f19618),
(0x3d4b6cdfbdc058df, 0x40c68faa11ebcd59),
(0x3d5b4ce4665bfa46, 0x40bb6fbe08e0a8ea),
(0xbd4316909063be15, 0x40a1953103a5be31),
(0x3d12f3f8edf41af0, 0x4074d2cb001e175c),
(0xbcd7bba36540264f, 0x40316cffcad5f8f9),
(0xbc6bf28dfdd5d37d, 0x3fc2f487fe78b8d7),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb9e8a5b17e696a, 0x403a485acd64d64a),
(0x3cd2e2e9c87f71f7, 0x4071518092320ecb),
(0xbd0d05bdb9431a2f, 0x4097e57e4c22c08e),
(0x3d5207068ab19ba9, 0x40b2ebadb2db62f9),
(0xbd64e37674083471, 0x40c1c0e4e9d6493d),
(0x3d3efb7a9a62b020, 0x40c3b94e8d62cdc7),
(0x3d47d6ce80a2114b, 0x40b93c2fd39e868e),
(0xbd1dfda61f525861, 0x40a1877a53a7f8d8),
(0x3d1236ff523dfcfa, 0x4077c3a10c2827de),
(0xbcc889997c9b0fe7, 0x4039a1d80b11c4a1),
(0x3c7ded0e8d73dddc, 0x3fdafe4913722123),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, e * r_sqrt * e);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-62
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k0_asympt_hard(x);
}
r.to_f64()
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->90]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline(never)]
#[cold]
fn k0_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be520f51_a7b8f970_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc84d8d0c_7faeef84_e56abccc_3d70f8a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xd1a71096_3da22280_35768c9e_0d3ddf42_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xf202e474_3698aabb_05688da0_ea1a088d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xaaa01830_8138af4d_1137b2dd_11a216f5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x99e0649f_320bca1a_c7adadb3_f5d8498e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4d81657_de1baf00_918cbc76_c6974e96_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8a9a28c8_a61c2c7a_12416d56_51c0b3d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x88a079f1_d9bd4582_6353316c_3aeb9dc9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xa82e10eb_9dc6225a_ef6223e7_54aa254d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xf5fc07fe_6b652e8a_0b9e74ba_d0c56118_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xc5288444_c7354b24_4a4e1663_86488928_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x96d3d226_a220ae6e_d6cca1ae_40f01e27_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -121,
mantissa: 0xa7ab931b_499c4964_499c1091_4ab9673d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xf8373d1a_9ff3f9c6_e5cfbe0a_85ccc131_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xa05190f4_dcf0d35c_277e0f21_0635c538_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa8837381_94c38992_86c0995d_5e5fa474_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xc3a4f279_9297e905_f59cc065_75959de8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x8b05ade4_03432e06_881ce37d_a907216d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xfd77f85e_35626f21_355ae728_01b78cbe_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x972ed117_254970eb_661121dc_a4462d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xec9d204a_9294ab57_2ef500d5_59d701b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xf033522d_cae45860_53a01453_c56da895_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9a33640c_9896ead5_1ce040d7_b36544f3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xefe714fa_49da0166_fdf8bc68_57b77fa0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd323b84c_214196b0_e25b8931_930fea0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xbbb5240b_346642d8_010383cb_1e8a607e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x88dcfa2a_f9f7d2ab_dd017994_8fae7e87_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc891477c_526e0f5e_74c4ae9f_9d8732b5_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let e = rational128_exp(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q = Q[14];
for i in (0..14).rev() {
q = recip * q + Q[i];
}
let v = p0 * q.reciprocal();
let r = v * e.reciprocal() * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0() {
assert_eq!(f_k0(0.11), 2.3332678776741127);
assert_eq!(f_k0(0.643), 0.7241025575342853);
assert_eq!(f_k0(0.964), 0.4433737413379138);
assert_eq!(f_k0(2.964), 0.03621679838808167);
assert_eq!(f_k0(423.43), 7.784461905543397e-186);
assert_eq!(f_k0(0.), f64::INFINITY);
assert_eq!(f_k0(-0.), f64::INFINITY);
assert!(f_k0(-0.5).is_nan());
assert!(f_k0(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k0(f64::INFINITY), 0.);
}
}

434
vendor/pxfm/src/bessel/k0e.rs vendored Normal file
View File

@@ -0,0 +1,434 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::k0::k0_small_dd;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes K0(x)exp(x)
pub fn f_k0e(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3ff0000000000000 {
// x <= 1
let v_k0 = k0_small_dd(x);
let v_exp = i0_exp(x);
return DoubleDouble::quick_mult(v_exp, v_k0).to_f64();
}
k0e_asympt(x)
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0e_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a11d237114e, 0x3ff40d931ff62706),
(0x3cdd614ddf4929e5, 0x4040645168c3e483),
(0xbd1ecf9ea0af6ab2, 0x40757419a703a2ab),
(0xbd3da3551fb27770, 0x409d4e65365522a2),
(0xbd564d58855d1a46, 0x40b6dd32f5a199d9),
(0xbd6cf055ca963a8e, 0x40c4fd2368f19618),
(0x3d4b6cdfbdc058df, 0x40c68faa11ebcd59),
(0x3d5b4ce4665bfa46, 0x40bb6fbe08e0a8ea),
(0xbd4316909063be15, 0x40a1953103a5be31),
(0x3d12f3f8edf41af0, 0x4074d2cb001e175c),
(0xbcd7bba36540264f, 0x40316cffcad5f8f9),
(0xbc6bf28dfdd5d37d, 0x3fc2f487fe78b8d7),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcb9e8a5b17e696a, 0x403a485acd64d64a),
(0x3cd2e2e9c87f71f7, 0x4071518092320ecb),
(0xbd0d05bdb9431a2f, 0x4097e57e4c22c08e),
(0x3d5207068ab19ba9, 0x40b2ebadb2db62f9),
(0xbd64e37674083471, 0x40c1c0e4e9d6493d),
(0x3d3efb7a9a62b020, 0x40c3b94e8d62cdc7),
(0x3d47d6ce80a2114b, 0x40b93c2fd39e868e),
(0xbd1dfda61f525861, 0x40a1877a53a7f8d8),
(0x3d1236ff523dfcfa, 0x4077c3a10c2827de),
(0xbcc889997c9b0fe7, 0x4039a1d80b11c4a1),
(0x3c7ded0e8d73dddc, 0x3fdafe4913722123),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, r_sqrt);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-62
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k0e_asympt_hard(x);
}
r.to_f64()
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->90]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline(never)]
#[cold]
fn k0e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be520f51_a7b8f970_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc84d8d0c_7faeef84_e56abccc_3d70f8a2_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xd1a71096_3da22280_35768c9e_0d3ddf42_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xf202e474_3698aabb_05688da0_ea1a088d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xaaa01830_8138af4d_1137b2dd_11a216f5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x99e0649f_320bca1a_c7adadb3_f5d8498e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4d81657_de1baf00_918cbc76_c6974e96_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8a9a28c8_a61c2c7a_12416d56_51c0b3d3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x88a079f1_d9bd4582_6353316c_3aeb9dc9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xa82e10eb_9dc6225a_ef6223e7_54aa254d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xf5fc07fe_6b652e8a_0b9e74ba_d0c56118_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xc5288444_c7354b24_4a4e1663_86488928_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0x96d3d226_a220ae6e_d6cca1ae_40f01e27_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -121,
mantissa: 0xa7ab931b_499c4964_499c1091_4ab9673d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -129,
mantissa: 0xf8373d1a_9ff3f9c6_e5cfbe0a_85ccc131_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xa05190f4_dcf0d35c_277e0f21_0635c538_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xa8837381_94c38992_86c0995d_5e5fa474_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xc3a4f279_9297e905_f59cc065_75959de8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x8b05ade4_03432e06_881ce37d_a907216d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xfd77f85e_35626f21_355ae728_01b78cbe_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x972ed117_254970eb_661121dc_a4462d2f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xec9d204a_9294ab57_2ef500d5_59d701b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xf033522d_cae45860_53a01453_c56da895_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0x9a33640c_9896ead5_1ce040d7_b36544f3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xefe714fa_49da0166_fdf8bc68_57b77fa0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xd323b84c_214196b0_e25b8931_930fea0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -116,
mantissa: 0xbbb5240b_346642d8_010383cb_1e8a607e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0x88dcfa2a_f9f7d2ab_dd017994_8fae7e87_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xc891477c_526e0f5e_74c4ae9f_9d8732b5_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q = Q[14];
for i in (0..14).rev() {
q = recip * q + Q[i];
}
let v = p0 * q.reciprocal();
let r = v * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0() {
assert_eq!(f_k0e(0.00060324324), 7.533665613459802);
assert_eq!(f_k0e(0.11), 2.6045757643537244);
assert_eq!(f_k0e(0.643), 1.3773725807788395);
assert_eq!(f_k0e(0.964), 1.1625987432322884);
assert_eq!(f_k0e(2.964), 0.7017119941259377);
assert_eq!(f_k0e(423.43), 0.06088931243251448);
assert_eq!(f_k0e(4324235240321.43), 6.027056776336986e-7);
assert_eq!(k0e_asympt_hard(423.43), 0.06088931243251448);
assert_eq!(f_k0e(0.), f64::INFINITY);
assert_eq!(f_k0e(-0.), f64::INFINITY);
assert!(f_k0e(-0.5).is_nan());
assert!(f_k0e(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k0e(f64::INFINITY), 0.);
}
}

184
vendor/pxfm/src/bessel/k0ef.rs vendored Normal file
View File

@@ -0,0 +1,184 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0f::i0f_small;
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval8};
/// Modified exponentially scaled Bessel of the first kind of order 0
///
/// Computes K0(x)exp(x)
///
/// Max ULP 0.5
pub fn f_k0ef(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
// taylor series for K0(x)exp(x) ~ (-euler_gamma + log(2) - log(x)) + (-euler_gamma + log(2) - log(x)) * x
let dx = x as f64;
let log_x = fast_logf(x);
const M_EULER_GAMMA_P_LOG2: f64 = f64::from_bits(0x3fbdadb014541eb2);
let c1 = -log_x + M_EULER_GAMMA_P_LOG2;
return f_fmla(c1, dx, c1) as f32;
}
return k0ef_small(x);
}
k0ef_asympt(x)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0ef_small(x: f32) -> f32 {
let v_log = fast_logf(x);
let i0 = i0f_small(x);
let v_exp = core_expf(x);
let dx = x as f64;
let p = f_estrin_polyeval7(
dx * dx,
f64::from_bits(0x3fbdadb014541ece),
f64::from_bits(0x3fd1dadb01453e9c),
f64::from_bits(0x3f99dadb01491ac7),
f64::from_bits(0x3f4bb90e82a4f609),
f64::from_bits(0x3eef4749ebd25b10),
f64::from_bits(0x3e85d5b5668593af),
f64::from_bits(0x3e15233b0788618b),
);
let c = f_fmla(-i0, v_log, p);
(c * v_exp) as f32
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x)exp(x) = Pn(1/x)/Qm(1/x) / sqrt(x)
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{2^-33,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let r_sqrt = j1f_rsqrt(dx);
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff62701),
f64::from_bits(0x402d8410a60e2ced),
f64::from_bits(0x404e9f18049bf704),
f64::from_bits(0x405c07682282783c),
f64::from_bits(0x4057379c68ce6d5e),
f64::from_bits(0x403ffd64a0105c4e),
f64::from_bits(0x400cc53ed67913b4),
f64::from_bits(0x3faf8cc8747a5d72),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4027ccde1d0eeb14),
f64::from_bits(0x40492418133aa7a7),
f64::from_bits(0x4057be8a004d0938),
f64::from_bits(0x4054cc77d1dfef26),
f64::from_bits(0x403fd2187097af1d),
f64::from_bits(0x4011c77649649e55),
f64::from_bits(0x3fc2080a5965ef9b),
);
let v = p_num / p_den;
let pp = v * r_sqrt;
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0f() {
assert_eq!(f_k0ef(2.034804e-5), 10.918679);
assert_eq!(f_k0ef(0.010260499), 4.743962);
assert_eq!(f_k0ef(0.3260499), 1.7963701);
assert_eq!(f_k0ef(0.72341), 1.3121376);
assert_eq!(f_k0ef(0.), f32::INFINITY);
assert_eq!(f_k0ef(-0.), f32::INFINITY);
assert!(f_k0ef(-0.5).is_nan());
assert!(f_k0ef(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k0ef(f32::INFINITY), 0.);
}
}

184
vendor/pxfm/src/bessel/k0f.rs vendored Normal file
View File

@@ -0,0 +1,184 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0f::i0f_small;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval7, f_estrin_polyeval8};
/// Modified Bessel of the second kind of order 0
///
/// Max ULP 0.5
///
/// This method have exactly one exception which is not correctly rounded with FMA.
pub fn f_k0f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x42cbc4fbu32 {
// x > 101.88473
return 0.;
}
if xb <= 0x3f800000u32 {
if xb <= 0x34000000u32 {
// |x| < f32::EPSILON
// taylor series for K0(x) ~ -euler_gamma + log(2) - log(x)
let log_x = fast_logf(x);
const EULER_GAMMA_PLUS_LOG2: f64 = f64::from_bits(0x3fbdadb014541eb2);
return (-log_x + EULER_GAMMA_PLUS_LOG2) as f32;
}
// x <= 1.0
return k0f_small(x);
}
k0f_asympt(x)
}
/**
K0(x) + log(x) * I0(x) = P(x^2)
hence
K0(x) = P(x^2) - log(x)*I0(x)
Polynomial generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselK[0,x]+Log[x]BesselI[0,x]
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0f_small(x: f32) -> f32 {
let v_log = fast_logf(x);
let i0 = i0f_small(x);
let dx = x as f64;
let p = f_estrin_polyeval7(
dx * dx,
f64::from_bits(0x3fbdadb014541ece),
f64::from_bits(0x3fd1dadb01453e9c),
f64::from_bits(0x3f99dadb01491ac7),
f64::from_bits(0x3f4bb90e82a4f609),
f64::from_bits(0x3eef4749ebd25b10),
f64::from_bits(0x3e85d5b5668593af),
f64::from_bits(0x3e15233b0788618b),
);
let c = f_fmla(-i0, v_log, p);
c as f32
}
/**
Generated in Wolfram
Computes sqrt(x)*exp(x)*K0(x)=Pn(1/x)/Qm(1/x)
hence
K0(x) = Pn(1/x)/Qm(1/x) / (sqrt(x) * exp(x))
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff62701),
f64::from_bits(0x402d8410a62d9c17),
f64::from_bits(0x404e9f1804dd7e54),
f64::from_bits(0x405c076822dcd255),
f64::from_bits(0x4057379c6932949f),
f64::from_bits(0x403ffd64a0bd54b7),
f64::from_bits(0x400cc53ed733fd97),
f64::from_bits(0x3faf8cc8756944eb),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4027ccde1d27ffc9),
f64::from_bits(0x40492418136fb90f),
f64::from_bits(0x4057be8a00983906),
f64::from_bits(0x4054cc77d2379b76),
f64::from_bits(0x403fd218713ec08d),
f64::from_bits(0x4011c77649d3f65f),
f64::from_bits(0x3fc2080a59e87324),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k0f() {
assert_eq!(f_k0f(2.034804e-5), 10.918458);
assert_eq!(f_k0f(0.010260499), 4.695535);
assert_eq!(f_k0f(0.3260499), 1.2965646);
assert_eq!(f_k0f(0.72341), 0.636511734);
assert_eq!(f_k0f(0.), f32::INFINITY);
assert_eq!(f_k0f(-0.), f32::INFINITY);
assert!(f_k0f(-0.5).is_nan());
assert!(f_k0f(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k0f(f32::INFINITY), 0.);
}
}

644
vendor/pxfm/src/bessel/k1.rs vendored Normal file
View File

@@ -0,0 +1,644 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::rational128_exp;
use crate::logs::{fast_log_d_to_dd, log_dd};
use crate::polyeval::f_polyeval3;
/// Modified Bessel of the second kind of order 1
///
/// Max ULP 0.5
pub fn f_k1(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x4086140538aa7d38u64 {
// 706.5025494880165
return 0.;
}
if xb <= 0x3ff0000000000000 {
// x <= 1
return k1_small(x).to_f64();
}
k1_asympt(x)
}
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
// g[z_]:=f[2 Sqrt[z]]
// {err,approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i1_fast(x: f64) -> DoubleDouble {
let half_x = x * 0.5; // this is exact
let eval_x = DoubleDouble::from_exact_mult(half_x, half_x);
const P: [(u64, u64); 3] = [
(0x3c5555555553c008, 0x3fb5555555555555),
(0x3c06f1014b703de8, 0x3f6dfda17d0a2cef),
(0xbbc2594d655d84db, 0x3f21b2c299108f7b),
];
let ps_num = f_polyeval3(
eval_x.hi,
f64::from_bits(0x3ec37625c178f5e2),
f64::from_bits(0x3e5843215f0d5088),
f64::from_bits(0x3dd97f1f45f47244),
);
let mut p_num = DoubleDouble::mul_f64_add(eval_x, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(eval_x, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 3] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc32ebd3ac0e6253, 0xbfa42c718ce308f7),
(0xbbe1626e81e3c1bc, 0x3f482772320eab0e),
];
let ps_den = f_polyeval3(
eval_x.hi,
f64::from_bits(0xbee169811ef4f4a1),
f64::from_bits(0x3e6ebdab5dbe02a5),
f64::from_bits(0xbdeb1dbb29fec52a),
);
let mut p_den = DoubleDouble::mul_f64_add(eval_x, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(eval_x, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let eval_sqr = DoubleDouble::quick_mult(eval_x, eval_x);
let mut z = DoubleDouble::mul_f64_add_f64(eval_x, 0.5, 1.);
z = DoubleDouble::mul_add(p, eval_sqr, z);
let x_over_05 = DoubleDouble::from_exact_mult(x, 0.5);
DoubleDouble::quick_mult(z, x_over_05)
}
/**
Rational approximant for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1,x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn k1_small(x: f64) -> DoubleDouble {
let rcp = DoubleDouble::from_quick_recip(x);
let x2 = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0xbc7037c12b888927, 0xbfd3b5b6028a83d6),
(0x3c39dba459d023e5, 0xbfb4bac288cfe0cd),
(0x3be0575395050120, 0xbf6c4a1abe9061df),
(0x3b755df8e375b3d4, 0xbf0c850679678599),
(0xbb097e0ec926785f, 0xbe98c4a9b608ae1f),
(0xbaa029f31c786e81, 0xbe104efe2246ee51),
];
let ps_num = f_polyeval3(
x2.hi,
f64::from_bits(0xbf0c850679678599),
f64::from_bits(0xbe98c4a9b608ae1f),
f64::from_bits(0xbe104efe2246ee51),
);
let mut p_num = DoubleDouble::mul_f64_add(x2, ps_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 5] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c19f62e592f3e71, 0xbf8d3bd595449ca9),
(0xbba8472b975a12d7, 0x3f194de71babe24a),
(0xbb2eec4b611c19b5, 0xbe994a5dbec84e4d),
(0x3a9bae2028402903, 0x3e0981ded64a954b),
];
let ps_den = f_fmla(
x2.hi,
f64::from_bits(0x3e0981ded64a954b),
f64::from_bits(0xbe994a5dbec84e4d),
);
let mut p_den = DoubleDouble::mul_f64_add(x2, ps_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(x2, p_den, f64::from_bits(0x3ff0000000000000));
let p = DoubleDouble::div(p_num, p_den);
let lg = fast_log_d_to_dd(x);
let v_i = i1_fast(x);
let z = DoubleDouble::mul_add(v_i, lg, rcp);
let r = DoubleDouble::mul_f64_add(p, x, z);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c20000000000000), // 2^-61
f64::from_bits(0x3a80000000000000), // 2^-87
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r;
}
k1_small_hard(x)
}
/**
Rational approximant for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1,x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1_small_hard(x: f64) -> DoubleDouble {
let rcp = DoubleDouble::from_quick_recip(x);
let x2 = DoubleDouble::from_exact_mult(x, x);
const P: [(u64, u64); 6] = [
(0xbc7037c12b888927, 0xbfd3b5b6028a83d6),
(0x3c39dba459d023e5, 0xbfb4bac288cfe0cd),
(0x3be0575395050120, 0xbf6c4a1abe9061df),
(0x3b755df8e375b3d4, 0xbf0c850679678599),
(0xbb097e0ec926785f, 0xbe98c4a9b608ae1f),
(0xbaa029f31c786e81, 0xbe104efe2246ee51),
];
let mut p_num = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(x2, p_num, DoubleDouble::from_bit_pair(P[0]));
const Q: [(u64, u64); 5] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c19f62e592f3e71, 0xbf8d3bd595449ca9),
(0xbba8472b975a12d7, 0x3f194de71babe24a),
(0xbb2eec4b611c19b5, 0xbe994a5dbec84e4d),
(0x3a9bae2028402903, 0x3e0981ded64a954b),
];
let mut p_den = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(Q[4]),
DoubleDouble::from_bit_pair(Q[3]),
);
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add(x2, p_den, DoubleDouble::from_bit_pair(Q[0]));
let p = DoubleDouble::div(p_num, p_den);
let lg = log_dd(x);
let v_i = i1_fast(x);
let z = DoubleDouble::mul_add(v_i, lg, rcp);
DoubleDouble::mul_f64_add(p, x, z)
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let e = i0_exp(x * 0.5);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a0690becb3b, 0x3ff40d931ff62706),
(0xbce573e1bbf2f0b7, 0x40402cebfab5721d),
(0x3d11a739b7c11e7b, 0x4074f58abc0cfbf1),
(0xbd2682a09ded0116, 0x409c8315f8facef2),
(0xbd3a19e91a120168, 0x40b65f7a4caed8b9),
(0x3d449c3d2b834543, 0x40c4fe41fdb4e7b8),
(0xbd6bdd415ac7f7e1, 0x40c7aa402d035d03),
(0x3d528412ff0d6b24, 0x40bf68faddd7d850),
(0xbd48f4bb3f61dac6, 0x40a75f5650249952),
(0xbd1dc534b275e309, 0x4081bddd259c0582),
(0xbcce5103350bd226, 0x4046c7a049014484),
(0x3c8935f8acd6c1d0, 0x3fef7524082b1859),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cc0d2508437b3f4, 0x40396ff483adec14),
(0xbd130a9c9f8a5338, 0x4070225588d8c15d),
(0xbceceba8fa0e65a2, 0x4095481f6684e3bb),
(0x3d4099f3c178fd2a, 0x40afedc8a778bf42),
(0xbd3a7e6a6276a3e7, 0x40bc0c060112692e),
(0x3d11538c155b16d8, 0x40bcb12bd1101782),
(0xbd5f7b04cdea2c61, 0x40b07fa363202e10),
(0xbce444ed035b66c6, 0x4093d6fe8f44f838),
(0xbcf6f88fb942b610, 0x4065c99fa44030c3),
(0xbcbd1d2aedee5bc9, 0x40207ffabeb00eea),
(0xbc39a0c8091102c9, 0x3facff3d892cd57a),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let mut r_e = DoubleDouble::quick_mult(e, r_sqrt);
r_e = DoubleDouble::from_exact_add(r_e.hi, r_e.lo);
r_e = DoubleDouble::quick_mult(r_e, e);
r_e = DoubleDouble::from_exact_add(r_e.hi, r_e.lo);
let r = DoubleDouble::div(z, r_e);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-61
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k1_asympt_hard(x);
}
r.to_f64()
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be5210ac_f26f25d1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc5f546cb_659a39d0_fafbd188_36ca05b9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xcd0b7cfa_de158d26_7084bbe9_f1bdb66d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xeac7be2f_957d1260_8849508a_2a5a8972_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xa4d14fec_fecc6444_4c7b0287_dad71a86_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x94e3180c_01df9932_ad2acd8b_bab59c05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb0de10f8_74918442_94a96368_8eaa4d0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8adfea76_d6dbe5d9_46bfaf83_9341f4b5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8f0a4337_b69b602c_cf187222_f3a3379f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xbd4c3ebf_c2db0fad_1b425641_cc470043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x9b14d29f_9b97e3c8_c1a7b9d0_787f0ddb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x93e670d2_07a553ef_a90d4895_cf1b5011_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0x93e0ee0a_cb4d8910_6b4d3e37_f4f9df49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xff0ce10d_5585abd1_e8a53a12_65131ad4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xf020536d_822cbe51_c8de095a_03367c83_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x9c729dd5_4828a918_42807f58_d485a511_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x9ff6f631_0794001d_433ab0c5_d4c682a9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb3f81e8b_1e0e85a6_3928342e_c83088a1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xf6b1c203_a60d4294_239ad045_2c67c224_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xd7a98b14_7a499762_abde5c38_3a5b40e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xf4eb8b77_a2cdc686_afd1273f_d464c8b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4c1e12a_93ee86fc_930c6f94_cfa6ac3a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xaaeaab88_32b776b7_fdd76b0f_24349f41_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xc8ec9d61_5bf2ee9b_878b4962_4a5cee85_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x8b97bab0_3351673f_22f10d40_fd1c9ff3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -114,
mantissa: 0xd31cb80a_bf8cbedc_b0dcf7e7_c599f79e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -117,
mantissa: 0x96b354c8_69197193_ea4f608f_81943988_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x989af1bb_e48b5c44_7cd09746_f15e935a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xb7b51326_23c29ed5_8d3dcf5a_79bd9a4f_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let e = rational128_exp(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q0 = Q[14];
for i in (0..14).rev() {
q0 = recip * q0 + Q[i];
}
let v = p0 * q0.reciprocal();
let r = v * (e.reciprocal() * r_sqrt);
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1() {
assert_eq!(f_k1(0.643), 1.184534109892725);
assert_eq!(f_k1(0.964), 0.6402280656771248);
assert_eq!(f_k1(2.964), 0.04192888446074039);
assert_eq!(f_k1(8.43), 9.824733212831289e-5);
assert_eq!(f_k1(16.43), 2.3142404075259965e-8);
assert_eq!(f_k1(423.43), 7.793648638470207e-186);
assert_eq!(f_k1(0.), f64::INFINITY);
assert_eq!(f_k1(-0.), f64::INFINITY);
assert!(f_k1(-0.5).is_nan());
assert!(f_k1(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k1(f64::INFINITY), 0.);
}
}

425
vendor/pxfm/src/bessel/k1e.rs vendored Normal file
View File

@@ -0,0 +1,425 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::i0_exp;
use crate::bessel::k1::k1_small;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
/// Modified exponentially scaled Bessel of the second kind of order 1
///
/// Computes K1(x)exp(x)
pub fn f_k1e(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x7ffu64 << 52 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
// |x| == 0
return f64::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f64::NAN };
}
return x + f64::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3ff0000000000000 {
// x <= 1
let v_exp = i0_exp(x);
let v_k = k1_small(x);
return DoubleDouble::quick_mult(v_exp, v_k).to_f64();
}
k1e_asympt(x)
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},11,11},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1e_asympt(x: f64) -> f64 {
let recip = DoubleDouble::from_quick_recip(x);
let r_sqrt = DoubleDouble::from_sqrt(x);
const P: [(u64, u64); 12] = [
(0xbc9a6a0690becb3b, 0x3ff40d931ff62706),
(0xbce573e1bbf2f0b7, 0x40402cebfab5721d),
(0x3d11a739b7c11e7b, 0x4074f58abc0cfbf1),
(0xbd2682a09ded0116, 0x409c8315f8facef2),
(0xbd3a19e91a120168, 0x40b65f7a4caed8b9),
(0x3d449c3d2b834543, 0x40c4fe41fdb4e7b8),
(0xbd6bdd415ac7f7e1, 0x40c7aa402d035d03),
(0x3d528412ff0d6b24, 0x40bf68faddd7d850),
(0xbd48f4bb3f61dac6, 0x40a75f5650249952),
(0xbd1dc534b275e309, 0x4081bddd259c0582),
(0xbcce5103350bd226, 0x4046c7a049014484),
(0x3c8935f8acd6c1d0, 0x3fef7524082b1859),
];
let x2 = DoubleDouble::quick_mult(recip, recip);
let x4 = DoubleDouble::quick_mult(x2, x2);
let x8 = DoubleDouble::quick_mult(x4, x4);
let e0 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3cc0d2508437b3f4, 0x40396ff483adec14),
(0xbd130a9c9f8a5338, 0x4070225588d8c15d),
(0xbceceba8fa0e65a2, 0x4095481f6684e3bb),
(0x3d4099f3c178fd2a, 0x40afedc8a778bf42),
(0xbd3a7e6a6276a3e7, 0x40bc0c060112692e),
(0x3d11538c155b16d8, 0x40bcb12bd1101782),
(0xbd5f7b04cdea2c61, 0x40b07fa363202e10),
(0xbce444ed035b66c6, 0x4093d6fe8f44f838),
(0xbcf6f88fb942b610, 0x4065c99fa44030c3),
(0xbcbd1d2aedee5bc9, 0x40207ffabeb00eea),
(0xbc39a0c8091102c9, 0x3facff3d892cd57a),
];
let e0 = DoubleDouble::mul_add_f64(
recip,
DoubleDouble::from_bit_pair(Q[1]),
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_add(
recip,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
let z = DoubleDouble::div(p_num, p_den);
let r = DoubleDouble::div(z, r_sqrt);
let err = r.hi * f64::from_bits(0x3c10000000000000); // 2^-61
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub != lb {
return k1e_asympt_hard(x);
}
r.to_f64()
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.0000000000001,1},14,14},WorkingPrecision->70]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[cold]
#[inline(never)]
fn k1e_asympt_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0xa06c98ff_b1382cb2_be5210ac_f26f25d1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0xc5f546cb_659a39d0_fafbd188_36ca05b9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0xcd0b7cfa_de158d26_7084bbe9_f1bdb66d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xeac7be2f_957d1260_8849508a_2a5a8972_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0xa4d14fec_fecc6444_4c7b0287_dad71a86_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x94e3180c_01df9932_ad2acd8b_bab59c05_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb0de10f8_74918442_94a96368_8eaa4d0d_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8adfea76_d6dbe5d9_46bfaf83_9341f4b5_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -108,
mantissa: 0x8f0a4337_b69b602c_cf187222_f3a3379f_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xbd4c3ebf_c2db0fad_1b425641_cc470043_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0x9b14d29f_9b97e3c8_c1a7b9d0_787f0ddb_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -112,
mantissa: 0x93e670d2_07a553ef_a90d4895_cf1b5011_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0x93e0ee0a_cb4d8910_6b4d3e37_f4f9df49_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -120,
mantissa: 0xff0ce10d_5585abd1_e8a53a12_65131ad4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xf020536d_822cbe51_c8de095a_03367c83_u128,
},
];
static Q: [DyadicFloat128; 15] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x9c729dd5_4828a918_42807f58_d485a511_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -118,
mantissa: 0x9ff6f631_0794001d_433ab0c5_d4c682a9_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb3f81e8b_1e0e85a6_3928342e_c83088a1_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -113,
mantissa: 0xf6b1c203_a60d4294_239ad045_2c67c224_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0xd7a98b14_7a499762_abde5c38_3a5b40e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xf4eb8b77_a2cdc686_afd1273f_d464c8b7_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xb4c1e12a_93ee86fc_930c6f94_cfa6ac3a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -109,
mantissa: 0xaaeaab88_32b776b7_fdd76b0f_24349f41_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -110,
mantissa: 0xc8ec9d61_5bf2ee9b_878b4962_4a5cee85_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -111,
mantissa: 0x8b97bab0_3351673f_22f10d40_fd1c9ff3_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -114,
mantissa: 0xd31cb80a_bf8cbedc_b0dcf7e7_c599f79e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -117,
mantissa: 0x96b354c8_69197193_ea4f608f_81943988_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -122,
mantissa: 0x989af1bb_e48b5c44_7cd09746_f15e935a_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xb7b51326_23c29ed5_8d3dcf5a_79bd9a4f_u128,
},
];
let recip = DyadicFloat128::accurate_reciprocal(x);
let r_sqrt = bessel_rsqrt_hard(x, recip);
let mut p0 = P[14];
for i in (0..14).rev() {
p0 = recip * p0 + P[i];
}
let mut q0 = Q[14];
for i in (0..14).rev() {
q0 = recip * q0 + Q[i];
}
let v = p0 * q0.reciprocal();
let r = v * r_sqrt;
r.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1() {
assert_eq!(f_k1e(0.643), 2.253195748291852);
assert_eq!(f_k1e(0.964), 1.6787831013451477);
assert_eq!(f_k1e(2.964), 0.8123854795542738);
assert_eq!(f_k1e(8.43), 0.4502184086111872);
assert_eq!(f_k1e(16.43), 0.3161307996938612);
assert_eq!(f_k1e(423.43), 0.06096117017402597);
assert_eq!(f_k1e(9044.431), 0.01317914752085687);
assert_eq!(k1e_asympt_hard(16.43), 0.3161307996938612);
assert_eq!(k1e_asympt_hard(423.43), 0.06096117017402597);
assert_eq!(k1e_asympt_hard(9044.431), 0.01317914752085687);
assert_eq!(f_k1e(0.), f64::INFINITY);
assert_eq!(f_k1e(-0.), f64::INFINITY);
assert!(f_k1e(-0.5).is_nan());
assert!(f_k1e(f64::NEG_INFINITY).is_nan());
assert_eq!(f_k1e(f64::INFINITY), 0.);
}
}

240
vendor/pxfm/src/bessel/k1ef.rs vendored Normal file
View File

@@ -0,0 +1,240 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval3, f_polyeval4};
/// Modified exponentially scaled Bessel of the second kind of order 1
///
/// Computes K1(x)exp(x)
///
/// Max ULP 0.5
pub fn f_k1ef(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
let leading_term = 1. / dx + 1.;
if xb <= 0x3109705fu32 {
// |x| <= 2e-9
// taylor series for tiny K1(x)exp(x) ~ 1/x + 1 + O(x)
return leading_term as f32;
}
// taylor series for small K1(x)exp(x) ~ 1/x+1+1/4 (1+2 EulerGamma-2 Log[2]+2 Log[x]) x + O(x^3)
const C: f64 = f64::from_bits(0xbffd8773039049e8); // 1 + 2 EulerGamma-2 Log[2]
let log_x = fast_logf(x);
let r = f_fmla(log_x, 2., C);
let w0 = f_fmla(dx * 0.25, r, leading_term);
return w0 as f32;
}
return k1ef_small(x);
}
k1ef_asympt(x)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,2},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_polyeval4(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555355),
f64::from_bits(0x3f6ebf07f0dbc49b),
f64::from_bits(0x3f1fdc02bf28a8d9),
f64::from_bits(0x3ebb5e7574c700a6),
);
let p_den = f_polyeval3(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa39b64b6135b5a),
f64::from_bits(0x3f3fa729bbe951f9),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two
}
/**
Series for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1, x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,3},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1ef_small(x: f32) -> f32 {
let dx = x as f64;
let rcp = 1. / dx;
let x2 = dx * dx;
let p_num = f_polyeval4(
x2,
f64::from_bits(0xbfd3b5b6028a83d6),
f64::from_bits(0xbfb3fde2c83f7cca),
f64::from_bits(0xbf662b2e5defbe8c),
f64::from_bits(0xbefa2a63cc5c4feb),
);
let p_den = f_polyeval4(
x2,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf9833197207a7c6),
f64::from_bits(0x3f315663bc7330ef),
f64::from_bits(0xbeb9211958f6b8c3),
);
let p = p_num / p_den;
let v_exp = core_expf(x);
let lg = fast_logf(x);
let v_i = i1f_small(x);
let z = f_fmla(lg, v_i, rcp);
let z0 = f_fmla(p, dx, z);
(z0 * v_exp) as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1ef_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let r_sqrt = j1f_rsqrt(dx);
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff6270d),
f64::from_bits(0x402d250670ed7a6c),
f64::from_bits(0x404e517b9b494d38),
f64::from_bits(0x405cb02b7433a838),
f64::from_bits(0x405a03e606a1b871),
f64::from_bits(0x4045c98d4308dbcd),
f64::from_bits(0x401d115c4ce0540c),
f64::from_bits(0x3fd4213e72b24b3a),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x402681096aa3a87d),
f64::from_bits(0x404623ab8d72ceea),
f64::from_bits(0x40530af06ea802b2),
f64::from_bits(0x404d526906fb9cec),
f64::from_bits(0x403281caca389f1b),
f64::from_bits(0x3ffdb93996948bb4),
f64::from_bits(0x3f9a009da07eb989),
);
let v = p_num / p_den;
let pp = v * r_sqrt;
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1f() {
assert_eq!(f_k1ef(0.00000000005423), 18439980000.0);
assert_eq!(f_k1ef(0.0000000043123), 231894820.0);
assert_eq!(f_k1ef(0.3), 4.125158);
assert_eq!(f_k1ef(1.89), 1.0710458);
assert_eq!(f_k1ef(5.89), 0.5477655);
assert_eq!(f_k1ef(101.89), 0.12461915);
assert_eq!(f_k1ef(0.), f32::INFINITY);
assert_eq!(f_k1ef(-0.), f32::INFINITY);
assert!(f_k1ef(-0.5).is_nan());
assert!(f_k1ef(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k1ef(f32::INFINITY), 0.);
}
}

240
vendor/pxfm/src/bessel/k1f.rs vendored Normal file
View File

@@ -0,0 +1,240 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval3, f_polyeval4};
/// Modified Bessel of the second kind of order 1
///
/// Max ULP 0.5
pub fn f_k1f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN;
}
let xb = x.to_bits();
if xb >= 0x42cbc779u32 {
// x > 101.889595
return 0.;
}
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x34000000u32 {
// |x| <= f32::EPSILON
let dx = x as f64;
let leading_term = 1. / dx;
if xb <= 0x3109705fu32 {
// |x| <= 2e-9
// taylor series for tiny K1(x) ~ 1/x + O(x)
return leading_term as f32;
}
// taylor series for small K1(x) ~ 1/x+1/4 (-1+2 EulerGamma-2 Log[2]+2 Log[x]) x + O(x^3)
const C: f64 = f64::from_bits(0xbff3b5b6028a83d7); // -1+2 EulerGamma-2 Log[2]
let log_x = fast_logf(x);
let r = f_fmla(log_x, 2., C);
let w0 = f_fmla(dx * 0.25, r, leading_term);
return w0 as f32;
}
return k1f_small(x);
}
k1f_asympt(x)
}
/**
Computes
I1(x) = x/2 * (1 + 1 * (x/2)^2 + (x/2)^4 * P((x/2)^2))
Generated by Woflram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[1,x]*2/x-1-1/2(x/2)^2)/(x/2)^4
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,2},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i1f_small(x: f32) -> f64 {
let dx = x as f64;
let x_over_two = dx * 0.5;
let x_over_two_sqr = x_over_two * x_over_two;
let x_over_two_p4 = x_over_two_sqr * x_over_two_sqr;
let p_num = f_polyeval4(
x_over_two_sqr,
f64::from_bits(0x3fb5555555555355),
f64::from_bits(0x3f6ebf07f0dbc49b),
f64::from_bits(0x3f1fdc02bf28a8d9),
f64::from_bits(0x3ebb5e7574c700a6),
);
let p_den = f_polyeval3(
x_over_two_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfa39b64b6135b5a),
f64::from_bits(0x3f3fa729bbe951f9),
);
let p = p_num / p_den;
let p1 = f_fmla(0.5, x_over_two_sqr, 1.);
let p2 = f_fmla(x_over_two_p4, p, p1);
p2 * x_over_two
}
/**
Series for
f(x) := BesselK(1, x) - Log(x)*BesselI(1, x) - 1/x
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[1, x]-Log[x]BesselI[1,x]-1/x)/x
g[z_]:=f[Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},3,3},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1f_small(x: f32) -> f32 {
let dx = x as f64;
let rcp = 1. / dx;
let x2 = dx * dx;
let p_num = f_polyeval4(
x2,
f64::from_bits(0xbfd3b5b6028a83d6),
f64::from_bits(0xbfb3fde2c83f7cca),
f64::from_bits(0xbf662b2e5defbe8c),
f64::from_bits(0xbefa2a63cc5c4feb),
);
let p_den = f_polyeval4(
x2,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf9833197207a7c6),
f64::from_bits(0x3f315663bc7330ef),
f64::from_bits(0xbeb9211958f6b8c3),
);
let p = p_num / p_den;
let lg = fast_logf(x);
let v_i = i1f_small(x);
let z = f_fmla(lg, v_i, rcp);
let z0 = f_fmla(p, dx, z);
z0 as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k1f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff6270d),
f64::from_bits(0x402d250670ed7a6c),
f64::from_bits(0x404e517b9b494d38),
f64::from_bits(0x405cb02b7433a838),
f64::from_bits(0x405a03e606a1b871),
f64::from_bits(0x4045c98d4308dbcd),
f64::from_bits(0x401d115c4ce0540c),
f64::from_bits(0x3fd4213e72b24b3a),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x402681096aa3a87d),
f64::from_bits(0x404623ab8d72ceea),
f64::from_bits(0x40530af06ea802b2),
f64::from_bits(0x404d526906fb9cec),
f64::from_bits(0x403281caca389f1b),
f64::from_bits(0x3ffdb93996948bb4),
f64::from_bits(0x3f9a009da07eb989),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k1f() {
assert_eq!(f_k1f(0.3), 3.055992);
assert_eq!(f_k1f(1.89), 0.16180483);
assert_eq!(f_k1f(5.89), 0.0015156545);
assert_eq!(f_k1f(101.89), 0.);
assert_eq!(f_k1f(0.), f32::INFINITY);
assert_eq!(f_k1f(-0.), f32::INFINITY);
assert!(f_k1f(-0.5).is_nan());
assert!(f_k1f(f32::NEG_INFINITY).is_nan());
assert_eq!(f_k1f(f32::INFINITY), 0.);
}
}

262
vendor/pxfm/src/bessel/k2f.rs vendored Normal file
View File

@@ -0,0 +1,262 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::logs::fast_logf;
use crate::polyeval::{f_estrin_polyeval5, f_estrin_polyeval8, f_polyeval4, f_polyeval11};
/// Modified Bessel of the second kind of order 2
///
/// ulp 0.5
pub fn f_k2f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::INFINITY;
}
if x.is_infinite() {
return if x.is_sign_positive() { 0. } else { f32::NAN };
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb >= 0x42cbceefu32 {
// |x| >= 101.90417
return 0.;
}
if xb <= 0x3f800000u32 {
// x <= 1.0
if xb <= 0x3e9eb852u32 {
// x <= 0.31
if xb <= 0x34000000u32 {
// x <= f32::EPSILON
let dx = x as f64;
let r = 2. / (dx * dx);
return r as f32;
}
return k2f_tiny(x);
}
return k2f_small(x);
}
k2f_asympt(x)
}
#[inline]
fn k2f_tiny(x: f32) -> f32 {
// Power series at zero for K2
// 2.0000000000000000/x^2-0.50000000000000000-0.12500000000000000 (-0.86593151565841245+1.0000000000000000 Log[x]) x^2-0.010416666666666667 (-1.5325981823250791+1.0000000000000000 Log[x]) x^4-0.00032552083333333333 (-1.9075981823250791+1.0000000000000000 Log[x]) x^6-0.0000054253472222222222 (-2.1742648489917458+1.0000000000000000 Log[x]) x^8+O[x]^9
//-0.50000000000000000+2.0000000000000000/x^2 + a3 * x^8 + x^6 * a2 + x^4 * a1 + x^2 * a0
let dx = x as f64;
let log_x = fast_logf(x);
let a0 = f_fmla(-4.0000000000000000, log_x, 3.4637260626336498) * 0.031250000000000000;
let a1 = f_fmla(-12.000000000000000, log_x, 18.391178187900949) * 0.00086805555555555556;
let a2 = f_fmla(-24.000000000000000, log_x, 45.782356375801899) * 0.000013563368055555556;
let a3 = (log_x - 2.1742648489917458) * (-0.0000054253472222222222);
let dx_sqr = dx * dx;
let two_over_dx = 2. / dx_sqr;
let p = f_polyeval4(dx_sqr, a0, a1, a2, a3);
let r = f_fmla(p, dx_sqr, two_over_dx) - 0.5;
r as f32
}
/**
Computes
I2(x) = x^2 * R(x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=BesselI[2,x]/x^2
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.3,1},4,4},WorkingPrecision->75]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i2f_small(x: f32) -> f64 {
let dx = x as f64;
let x_sqr = dx * dx;
let p_num = f_estrin_polyeval5(
x_sqr,
f64::from_bits(0x3fc0000000000000),
f64::from_bits(0x3f81520c0669099e),
f64::from_bits(0x3f27310bf5c5e9b0),
f64::from_bits(0x3eb8e2947e0a6098),
f64::from_bits(0x3e336dfad46e2f35),
);
let p_den = f_estrin_polyeval5(
x_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbf900d253bb12edc),
f64::from_bits(0x3f1ed3d9ab228297),
f64::from_bits(0xbea14e6660c00303),
f64::from_bits(0x3e13eb951a6cf38f),
);
let p = p_num / p_den;
p * x_sqr
}
/**
Series for
R(x^2) := (BesselK(2, x) - Log(x)*BesselI(2, x) - 2/x^2)/(1+x^2)
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselK[2,x]-Log[x]BesselI[2,x]-2/(x^2))/(1+x^2)
g[z_]:=f[Sqrt[z]]
{err,approx}=MiniMaxApproximation[g[z],{z,{0.3,1.0},10,10},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k2f_small(x: f32) -> f32 {
let dx = x as f64;
let dx_sqr = dx * dx;
let p_num = f_polyeval11(
dx_sqr,
f64::from_bits(0xbfdff794c9ee3b5c),
f64::from_bits(0xc047d3276f18e5d2),
f64::from_bits(0xc09200ed3702875a),
f64::from_bits(0xc0c39f395c47be27),
f64::from_bits(0xc0e0ec95bd1a3192),
f64::from_bits(0xc0e5973cb871c8d0),
f64::from_bits(0xc0cdaf528de00d53),
f64::from_bits(0xc0afe6d3009de17c),
f64::from_bits(0xc098417b22844112),
f64::from_bits(0x4025c45260bb1b6a),
f64::from_bits(0x402f2bf6b95ffe0c),
);
let p_den = f_polyeval11(
dx_sqr,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x405879a43b253224),
f64::from_bits(0x40a3a501408a0198),
f64::from_bits(0x40d8172abc4a8ccc),
f64::from_bits(0x40f9fcb05e98bdbd),
f64::from_bits(0x4109c45b54be586b),
f64::from_bits(0x4106ad7023dd0b90),
f64::from_bits(0x40ed7e988d2ba5a9),
f64::from_bits(0x40966305e1c1123a),
f64::from_bits(0xc090832b6a87317c),
f64::from_bits(0x403b48eb703f4644),
);
let p = p_num / p_den;
let two_over_dx_sqr = 2. / dx_sqr;
let lg = fast_logf(x);
let v_i = i2f_small(x);
let z = f_fmla(lg, v_i, two_over_dx_sqr);
let z0 = f_fmla(p, f_fmla(dx, dx, 1.), z);
z0 as f32
}
/**
Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[x] BesselK[1,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.000000001,1},7,7},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn k2f_asympt(x: f32) -> f32 {
let dx = x as f64;
let recip = 1. / dx;
let e = core_expf(x);
let r_sqrt = dx.sqrt();
let p_num = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff40d931ff626f2),
f64::from_bits(0x402d954dceb445df),
f64::from_bits(0x405084ea6680d028),
f64::from_bits(0x406242344a8ea488),
f64::from_bits(0x406594aa56f50fea),
f64::from_bits(0x405aa04eb4f0af1c),
f64::from_bits(0x403dd3e8e63849ef),
f64::from_bits(0x4004e85453648d43),
);
let p_den = f_estrin_polyeval8(
recip,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4023da9f4e05358e),
f64::from_bits(0x4040a4e4ceb523c9),
f64::from_bits(0x404725c423c9f990),
f64::from_bits(0x403a60c00deededc),
f64::from_bits(0x40149975b84c3946),
f64::from_bits(0x3fc69439846db871),
f64::from_bits(0xbf6400819bac6f45),
);
let v = p_num / p_den;
let pp = v / (e * r_sqrt);
pp as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_k2f() {
assert!(f_k2f(-1.).is_nan());
assert!(f_k2f(f32::NAN).is_nan());
assert_eq!(f_k2f(0.), f32::INFINITY);
assert_eq!(f_k2f(0.65), 4.3059196);
assert_eq!(f_k2f(1.65), 0.44830766);
}
}

107
vendor/pxfm/src/bessel/mod.rs vendored Normal file
View File

@@ -0,0 +1,107 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![deny(unreachable_pub)]
mod alpha0;
mod alpha1;
mod bessel_exp;
mod beta0;
mod beta1;
mod i0;
mod i0e;
mod i0ef;
mod i0f;
mod i1;
mod i1e;
mod i1ef;
mod i1f;
mod i2;
mod i2f;
mod j0;
mod j0_coeffs_remez;
mod j0_coeffs_taylor;
mod j0f;
mod j0f_coeffs;
mod j1;
mod j1_coeffs;
mod j1_coeffs_taylor;
mod j1f;
mod j1f_coeffs;
mod jincpi;
mod jincpif;
mod k0;
mod k0e;
mod k0ef;
mod k0f;
mod k1;
mod k1e;
mod k1ef;
mod k1f;
mod k2f;
mod trigo_bessel;
mod y0;
mod y0_coeffs;
mod y0_coeffs_taylor;
mod y0f;
mod y0f_coeffs;
mod y1;
mod y1_coeffs;
mod y1_coeffs_taylor;
mod y1f;
mod y1f_coeffs;
pub(crate) use bessel_exp::i0_exp;
pub use i0::f_i0;
pub use i0e::f_i0e;
pub use i0ef::f_i0ef;
pub use i0f::f_i0f;
pub use i1::f_i1;
pub use i1e::f_i1e;
pub use i1ef::f_i1ef;
pub use i1f::f_i1f;
pub use i2::f_i2;
pub use i2f::f_i2f;
pub use j0::f_j0;
pub use j0f::f_j0f;
pub use j1::f_j1;
pub use j1f::f_j1f;
pub use jincpi::f_jincpi;
pub use jincpif::f_jincpif;
pub use k0::f_k0;
pub use k0e::f_k0e;
pub use k0ef::f_k0ef;
pub use k0f::f_k0f;
pub use k1::f_k1;
pub use k1e::f_k1e;
pub use k1ef::f_k1ef;
pub use k1f::f_k1f;
pub use k2f::f_k2f;
pub use y0::f_y0;
pub use y0f::f_y0f;
pub use y1::f_y1;
pub use y1f::f_y1f;

285
vendor/pxfm/src/bessel/trigo_bessel.rs vendored Normal file
View File

@@ -0,0 +1,285 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, min_normal_f64};
use crate::round::RoundFinite;
// Generated by SageMath:
// print("[")
// for k in range(128):
// k = RealField(150)(k) * RealField(150).pi() / RealField(150)(64)
// print(double_to_hex(k.sin()) + ",")
// print("];")
pub(crate) static SIN_K_PI_OVER_64: [u64; 128] = [
0x0000000000000000,
0x3fa91f65f10dd814,
0x3fb917a6bc29b42c,
0x3fc2c8106e8e613a,
0x3fc8f8b83c69a60b,
0x3fcf19f97b215f1b,
0x3fd294062ed59f06,
0x3fd58f9a75ab1fdd,
0x3fd87de2a6aea963,
0x3fdb5d1009e15cc0,
0x3fde2b5d3806f63b,
0x3fe073879922ffee,
0x3fe1c73b39ae68c8,
0x3fe30ff7fce17035,
0x3fe44cf325091dd6,
0x3fe57d69348ceca0,
0x3fe6a09e667f3bcd,
0x3fe7b5df226aafaf,
0x3fe8bc806b151741,
0x3fe9b3e047f38741,
0x3fea9b66290ea1a3,
0x3feb728345196e3e,
0x3fec38b2f180bdb1,
0x3feced7af43cc773,
0x3fed906bcf328d46,
0x3fee212104f686e5,
0x3fee9f4156c62dda,
0x3fef0a7efb9230d7,
0x3fef6297cff75cb0,
0x3fefa7557f08a517,
0x3fefd88da3d12526,
0x3feff621e3796d7e,
0x3ff0000000000000,
0x3feff621e3796d7e,
0x3fefd88da3d12526,
0x3fefa7557f08a517,
0x3fef6297cff75cb0,
0x3fef0a7efb9230d7,
0x3fee9f4156c62dda,
0x3fee212104f686e5,
0x3fed906bcf328d46,
0x3feced7af43cc773,
0x3fec38b2f180bdb1,
0x3feb728345196e3e,
0x3fea9b66290ea1a3,
0x3fe9b3e047f38741,
0x3fe8bc806b151741,
0x3fe7b5df226aafaf,
0x3fe6a09e667f3bcd,
0x3fe57d69348ceca0,
0x3fe44cf325091dd6,
0x3fe30ff7fce17035,
0x3fe1c73b39ae68c8,
0x3fe073879922ffee,
0x3fde2b5d3806f63b,
0x3fdb5d1009e15cc0,
0x3fd87de2a6aea963,
0x3fd58f9a75ab1fdd,
0x3fd294062ed59f06,
0x3fcf19f97b215f1b,
0x3fc8f8b83c69a60b,
0x3fc2c8106e8e613a,
0x3fb917a6bc29b42c,
0x3fa91f65f10dd814,
0xb69f77598338bfdf,
0xbfa91f65f10dd814,
0xbfb917a6bc29b42c,
0xbfc2c8106e8e613a,
0xbfc8f8b83c69a60b,
0xbfcf19f97b215f1b,
0xbfd294062ed59f06,
0xbfd58f9a75ab1fdd,
0xbfd87de2a6aea963,
0xbfdb5d1009e15cc0,
0xbfde2b5d3806f63b,
0xbfe073879922ffee,
0xbfe1c73b39ae68c8,
0xbfe30ff7fce17035,
0xbfe44cf325091dd6,
0xbfe57d69348ceca0,
0xbfe6a09e667f3bcd,
0xbfe7b5df226aafaf,
0xbfe8bc806b151741,
0xbfe9b3e047f38741,
0xbfea9b66290ea1a3,
0xbfeb728345196e3e,
0xbfec38b2f180bdb1,
0xbfeced7af43cc773,
0xbfed906bcf328d46,
0xbfee212104f686e5,
0xbfee9f4156c62dda,
0xbfef0a7efb9230d7,
0xbfef6297cff75cb0,
0xbfefa7557f08a517,
0xbfefd88da3d12526,
0xbfeff621e3796d7e,
0xbff0000000000000,
0xbfeff621e3796d7e,
0xbfefd88da3d12526,
0xbfefa7557f08a517,
0xbfef6297cff75cb0,
0xbfef0a7efb9230d7,
0xbfee9f4156c62dda,
0xbfee212104f686e5,
0xbfed906bcf328d46,
0xbfeced7af43cc773,
0xbfec38b2f180bdb1,
0xbfeb728345196e3e,
0xbfea9b66290ea1a3,
0xbfe9b3e047f38741,
0xbfe8bc806b151741,
0xbfe7b5df226aafaf,
0xbfe6a09e667f3bcd,
0xbfe57d69348ceca0,
0xbfe44cf325091dd6,
0xbfe30ff7fce17035,
0xbfe1c73b39ae68c8,
0xbfe073879922ffee,
0xbfde2b5d3806f63b,
0xbfdb5d1009e15cc0,
0xbfd87de2a6aea963,
0xbfd58f9a75ab1fdd,
0xbfd294062ed59f06,
0xbfcf19f97b215f1b,
0xbfc8f8b83c69a60b,
0xbfc2c8106e8e613a,
0xbfb917a6bc29b42c,
0xbfa91f65f10dd814,
];
#[inline]
pub(crate) fn reduce_small_pi64(x: f64) -> (f64, i64) {
// Generated in SageMath:
// z = RealField(300)(64) / RealField(300).pi()
// n = 32
// x_hi = RealField(n)(z) # convert to f64
// x_mid = RealField(n)(z - RealField(300)(x_hi))
// x_lo = RealField(n)(z - RealField(300)(x_hi) - RealField(300)(x_mid))
// print(double_to_hex(x_hi), ",")
// print(double_to_hex(x_mid), ",")
// print(double_to_hex(x_lo), ",")
const MPI_OVER_SIXTY_FOUR: [u64; 3] =
[0xbfa921fb54400000, 0xbd80b4611a600000, 0xbb53198a2e037073];
const SIXTY_EIGHT_OVER_PI: f64 = f64::from_bits(0x40345f306dc9c883);
let prod_hi = x * SIXTY_EIGHT_OVER_PI;
let kd = prod_hi.round_finite();
// Let y = x - k * (pi/64)
// Then |y| < pi / 64
// With extra rounding errors, we can bound |y| < 1.6 * 2^-7.
let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[0]), x); // Exact
// |u.hi| < 1.6*2^-7
let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[1]), y_hi);
(u_hi, unsafe {
kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
})
}
struct SinCosPi64 {
v_sin: f64,
v_cos: f64,
}
#[inline]
fn sincos_eval_pi64(x: f64) -> SinCosPi64 {
let x2 = x * x;
let x4 = x2 * x2;
// Sin poly generated by Sollya:
// d = [0, pi/64];
// f_sin = sin(x)/x;
// Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|D...|], d);
const S: [u64; 4] = [
0x3ff0000000000000,
0xbfc5555555555451,
0x3f8111111072c563,
0xbf2a01321c030841,
];
let s0 = f_fmla(x2, f64::from_bits(S[1]), f64::from_bits(S[0]));
let s1 = f_fmla(x2, f64::from_bits(S[3]), f64::from_bits(S[2]));
let v_sin = f_fmla(x4, s1, s0) * x;
// Cos poly generated by Sollya:
// d = [0, pi/64];
// f_cos = cos(x);
// Q = fpminimax(f_cos, [|0, 2, 4, 6|], [|1, D...|], d);
const C: [u64; 4] = [
0x3ff0000000000000,
0xbfdffffffffffb6c,
0x3fa5555553f117c1,
0xbf56c0f056672a03,
];
let c0 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
let c1 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
let v_cos = f_fmla(x4, c1, c0);
SinCosPi64 { v_sin, v_cos }
}
#[inline]
pub(crate) fn sin_small(z: f64) -> f64 {
let x_e = (z.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
if x_e < E_BIAS - 26 {
return f_fmla(z, f64::from_bits(0xbc90000000000000), z);
}
let (angle_dd, k) = reduce_small_pi64(z);
let sin_cos = sincos_eval_pi64(angle_dd);
// cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
let sk = SIN_K_PI_OVER_64[((k as u64) & 127) as usize];
let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];
let sin_k = f64::from_bits(sk);
let cos_k = f64::from_bits(ck);
f_fmla(sin_cos.v_cos, sin_k, sin_cos.v_sin * cos_k)
}
#[inline]
pub(crate) fn cos_small(z: f64) -> f64 {
let x_e = (z.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
if x_e < E_BIAS - 27 {
// Signed zeros.
if z == 0.0 {
return 1.0;
}
// For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
return 1.0 - min_normal_f64();
}
let (angle_dd, k) = reduce_small_pi64(z);
let sin_cos = sincos_eval_pi64(angle_dd);
// cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
let sk = SIN_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize];
let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];
let sin_k = f64::from_bits(sk);
let cos_k = f64::from_bits(ck);
f_fmla(sin_cos.v_cos, cos_k, sin_cos.v_sin * sin_k)
}

1017
vendor/pxfm/src/bessel/y0.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1517
vendor/pxfm/src/bessel/y0_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1478
vendor/pxfm/src/bessel/y0_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

336
vendor/pxfm/src/bessel/y0f.rs vendored Normal file
View File

@@ -0,0 +1,336 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::{j0f_asympt_alpha, j0f_asympt_beta, j1f_rsqrt};
use crate::bessel::trigo_bessel::sin_small;
use crate::bessel::y0f_coeffs::{Y0_ZEROS, Y0_ZEROS_VALUES, Y0F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the second kind of order 0 (Y0)
///
/// Max ULP 0.5
pub fn f_y0f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
return f32::NEG_INFINITY;
}
if x.is_infinite() {
if x.is_sign_negative() {
return f32::NAN;
}
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x4296999au32 {
// x <= 75.3
if xb <= 0x40000000u32 {
// x <= 2
if xb <= 0x3faccccdu32 {
// x <= 1.35
return y0f_near_zero(f32::from_bits(xb));
}
// transient zone from 1.35 to 2 have bad behavior for log poly already,
// and not yet good to be easily covered, thus it use its own poly
return y0_transient_area(x);
}
return y0f_small_argument_path(f32::from_bits(xb));
}
// Exceptions:
let xb = x.to_bits();
if xb == 0x5023e87f {
return f32::from_bits(0x28085b2d);
} else if xb == 0x48171521 {
return f32::from_bits(0x2bd244ba);
} else if xb == 0x4398c299 {
return f32::from_bits(0x32c730db);
} else if xb == 0x7f0e5a38 {
return f32::from_bits(0x131f680b);
} else if xb == 0x6ef9be45 {
return f32::from_bits(0x987d8a8f);
}
y0f_asympt(x)
}
/**
Generated by SageMath:
Evaluates:
Y0(x) = 2/pi*(euler_gamma + log(x/2))*J0(x) - sum((-1)^m*(x/2)^(2*m)/(m!)^2*sum(1+1/2 + ... 1/m))
expressed as:
Y0(x)=log(x)*W0(x) - Z0(x)
```python
from sage.all import *
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 10 # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()
# Define J0(x) Taylor expansion at x = 0
def j_series(n, x):
return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])
J0_series = j_series(0, x)
def z_series(x):
return sum([(-1)**m * (x/2)**(ZZ(2)*ZZ(m)) / ZZ(m).factorial()**ZZ(2) * sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) for m in range(1, N)])
W0 = (d2/pi) * J0_series
Z0 = -gamma * (d2/pi) * J0_series + RealField(300)(2).log() * (d2/pi) * J0_series + (d2/pi) * z_series(x)
# see the series
print(W0)
print(Z0)
```
**/
#[inline]
fn y0f_near_zero(x: f32) -> f32 {
const W: [u64; 10] = [
0x3fe45f306dc9c883,
0xbfc45f306dc9c883,
0x3f845f306dc9c883,
0xbf321bb945252402,
0x3ed21bb945252402,
0xbe672db9f21b0f5f,
0x3df49a6c656d62ff,
0xbd7ae90af76a4d0f,
0x3cfae90af76a4d0f,
0xbc754331c053fdad,
];
let dx = x as f64;
let x2 = dx * dx;
let w0 = f_polyeval10(
x2,
f64::from_bits(W[0]),
f64::from_bits(W[1]),
f64::from_bits(W[2]),
f64::from_bits(W[3]),
f64::from_bits(W[4]),
f64::from_bits(W[5]),
f64::from_bits(W[6]),
f64::from_bits(W[7]),
f64::from_bits(W[8]),
f64::from_bits(W[9]),
);
const Z: [u64; 10] = [
0x3fb2e4d699cbd01f,
0xbfc6bbcb41034286,
0x3f9075b1bbf41364,
0xbf41a6206b7b973d,
0x3ee3e99794203bbd,
0xbe7bce4a600d3ea4,
0x3e0a6ee796b871b6,
0xbd92393d82c6b2e4,
0x3d131085da82054c,
0xbc8f4ed4b492ebcc,
];
let z0 = f_polyeval10(
x2,
f64::from_bits(Z[0]),
f64::from_bits(Z[1]),
f64::from_bits(Z[2]),
f64::from_bits(Z[3]),
f64::from_bits(Z[4]),
f64::from_bits(Z[5]),
f64::from_bits(Z[6]),
f64::from_bits(Z[7]),
f64::from_bits(Z[8]),
f64::from_bits(Z[9]),
);
let w_log = fast_logf(x);
f_fmla(w0, w_log, -z0) as f32
}
#[inline]
fn y0_transient_area(x: f32) -> f32 {
let dx = x as f64;
// first Y0 bessel zero
const ZERO: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
let r = (dx - ZERO.hi) - ZERO.lo;
/*
Poly generated by Wolfram Matematica:
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990]
{approx,error} = MiniMaxApproximation[f[x],{x,{ 1.35 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
poly=error[[1]];
coeffs=CoefficientList[poly,x];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
*/
let p = f_polyeval18(
r,
f64::from_bits(0x3fe0aa48442f8375),
f64::from_bits(0x3de601d3b959b8d8),
f64::from_bits(0xbfd0aa4840bb8529),
f64::from_bits(0x3fa439fc16d4835e),
f64::from_bits(0x3f80d2dcd97d2b4f),
f64::from_bits(0x3f4f833368f9f047),
f64::from_bits(0xbf541a702ee92277),
f64::from_bits(0x3f3abc113cf0f4da),
f64::from_bits(0xbefac1ded6f17ba8),
f64::from_bits(0x3f33ef372e24df82),
f64::from_bits(0x3f3bf8b42322df40),
f64::from_bits(0x3f4582f9daec9ca7),
f64::from_bits(0x3f479fc07175494e),
f64::from_bits(0x3f4477a5e32b723a),
f64::from_bits(0x3f39fbfd6a6d6f0c),
f64::from_bits(0x3f2760a66816527b),
f64::from_bits(0x3f0a68fdeeba224f),
f64::from_bits(0x3edd78c6c87089e1),
);
p as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y0f_small_argument_path(x: f32) -> f32 {
let x_abs = x as f64;
// let avg_step = 74.607799 / 47.0;
// let inv_step = 1.0 / avg_step;
const INV_STEP: f64 = 0.6299609508652038;
let fx = x_abs * INV_STEP;
const Y0_ZEROS_COUNT: f64 = (Y0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
// Really should not happen here, but if it is then to log expansion
return y0f_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(Y0_ZEROS_VALUES[idx]) as f32;
}
let c = &Y0F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval18(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
f64::from_bits(c[14]),
f64::from_bits(c[15]),
f64::from_bits(c[16]),
f64::from_bits(c[17]),
);
p as f32
}
/*
Evaluates:
Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
*/
#[inline]
fn y0f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j0f_asympt_alpha(dx);
let beta = j0f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = sin_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
#[cfg(test)]
mod tests {
use crate::f_y0f;
#[test]
fn test_y0f() {
assert_eq!(f_y0f(90.5), 0.08254846);
assert_eq!(f_y0f(77.5), 0.087678276);
assert_eq!(f_y0f(1.5), 0.3824489);
assert_eq!(f_y0f(0.5), -0.44451874);
assert!(f_y0f(-1.).is_nan());
assert_eq!(f_y0f(0.), f32::NEG_INFINITY);
assert_eq!(f_y0f(-0.), f32::NEG_INFINITY);
assert_eq!(f_y0f(f32::INFINITY), 0.);
assert!(f_y0f(f32::NEG_INFINITY).is_nan());
}
}

1236
vendor/pxfm/src/bessel/y0f_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1112
vendor/pxfm/src/bessel/y1.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1068
vendor/pxfm/src/bessel/y1_coeffs.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1030
vendor/pxfm/src/bessel/y1_coeffs_taylor.rs vendored Normal file

File diff suppressed because it is too large Load Diff

348
vendor/pxfm/src/bessel/y1f.rs vendored Normal file
View File

@@ -0,0 +1,348 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bessel::j0f::j1f_rsqrt;
use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
use crate::bessel::trigo_bessel::cos_small;
use crate::bessel::y1f_coeffs::{Y1_ZEROS, Y1_ZEROS_VALUES, Y1F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18, f_polyeval19};
use crate::sincos_reduce::rem2pif_any;
/// Bessel of the second kind of order 1 (Y1)
///
/// Max ULP 0.5
pub fn f_y1f(x: f32) -> f32 {
let ux = x.to_bits();
if ux >= 0xffu32 << 23 || ux == 0 {
// |x| == 0, |x| == inf, |x| == NaN, x < 0
if ux.wrapping_shl(1) == 0 {
// |x| == 0
return f32::NEG_INFINITY;
}
if x.is_infinite() {
if x.is_sign_negative() {
return f32::NAN;
}
return 0.;
}
return x + f32::NAN; // x == NaN
}
let xb = x.to_bits();
if xb <= 0x424e0000u32 {
// x <= 51.5
if xb <= 0x40000000u32 {
// x <= 2
if xb <= 0x3fb5c28fu32 {
// x <= 1.42
return y1f_near_zero(x);
}
// transient zone from 1.42 to 2 have bad behavior for log poly already,
// and not yet good to be easily covered, thus it use its own poly
return y1_transient_area(x);
}
return y1f_small_argument_path(x);
}
// Exceptions
let bx = x.to_bits();
if bx == 0x47037a3d {
return f32::from_bits(0x2deededb);
} else if bx == 0x65ce46e4 {
return f32::from_bits(0x9eed85c4);
} else if bx == 0x6bf68a7b {
return f32::from_bits(0x9dc70a09);
} else if bx == 0x76d84625 {
return f32::from_bits(0x15d7a68b);
} else if bx == 0x7e3dcda0 {
return f32::from_bits(0x12b81111);
}
y1f_asympt(x)
}
/**
Generated by SageMath:
Evaluates:
y2 = -J1(x)*log(x) + 1/x * (1 - sum((-1)^m*(H(m)+H(m-1))/(2^m*m!*(m-1)!)*x^(2*m))
Y1(x) = 2/pi*(-y2(x)+(euler_gamma - log(2))*J1(x))
expressed as:
Y1(x)=log(x)*W1(x) - Z1(x) - 2/(pi*x)
```python
from sage.all import *
R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 16 # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()
log2 = RealField(300)(2).log()
def j_series(n, x):
return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])
J1_series = j_series(1, x)
def harmony(m):
return sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1))
def z_series(x):
return sum([(-1)**m * (x)**(ZZ(2)*ZZ(m)) / (ZZ(2)**(2*m) * ZZ(m).factorial() * (ZZ(m) - ZZ(1)).factorial()) * (harmony(m) + harmony(m - 1)) for m in range(1, N)])
W1 = d2/pi * J1_series
Z1 = -(d2/(x*pi) * z_series(x) + d2/pi * gamma * J1_series(x) - d2/pi * log2 * J1_series(x))
# see the series
print(W0)
print(Z0)
```
See ./notes/bessel_y1_taylor.ipynb for generation
**/
#[inline]
fn y1f_near_zero(x: f32) -> f32 {
const W: [u64; 10] = [
0x3fd45f306dc9c883,
0xbfa45f306dc9c883,
0x3f5b2995e7b7b604,
0xbf021bb945252402,
0x3e9cf9286ea1d337,
0xbe2ee7a29824147f,
0x3db78be9987d036d,
0xbd3ae90af76a4d0f,
0x3cb7eb97f85e7d62,
0xbc31028e3376648a,
];
let dx = x as f64;
let x2 = dx * dx;
let w0 = f_polyeval10(
x2,
f64::from_bits(W[0]),
f64::from_bits(W[1]),
f64::from_bits(W[2]),
f64::from_bits(W[3]),
f64::from_bits(W[4]),
f64::from_bits(W[5]),
f64::from_bits(W[6]),
f64::from_bits(W[7]),
f64::from_bits(W[8]),
f64::from_bits(W[9]),
) * dx;
const Z: [u64; 10] = [
0x3fc91866143cbc8a,
0xbfabd3975c75b4a7,
0x3f6835b97894be5b,
0xbf12c7dbffcde97d,
0x3eb0a780ac776eac,
0xbe432e5a4ddeea30,
0x3dcf0ce34d2066a6,
0xbd52a4e1aea45c18,
0x3cd1474ade9154ac,
0xbc4978ba84f218c0,
];
let z0 = f_polyeval10(
x2,
f64::from_bits(Z[0]),
f64::from_bits(Z[1]),
f64::from_bits(Z[2]),
f64::from_bits(Z[3]),
f64::from_bits(Z[4]),
f64::from_bits(Z[5]),
f64::from_bits(Z[6]),
f64::from_bits(Z[7]),
f64::from_bits(Z[8]),
f64::from_bits(Z[9]),
) * dx;
let w_log = fast_logf(x);
const TWO_OVER_PI: f64 = f64::from_bits(0x3fe45f306dc9c883);
let recip = 1. / dx;
let z = f_fmla(w0, w_log, -z0);
f_fmla(recip, -TWO_OVER_PI, z) as f32
}
#[inline]
fn y1_transient_area(x: f32) -> f32 {
let dx = x as f64;
// first Y0 bessel zero
const ZERO: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
let r = (dx - ZERO.hi) - ZERO.lo;
/*
Poly generated by Wolfram Matematica:
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990]
{approx,error} = MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
poly=error[[1]];
coeffs=CoefficientList[poly,x];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
*/
let p = f_polyeval18(
r,
f64::from_bits(0x3d9b15a8283b069b),
f64::from_bits(0x3fe0aa484455fd09),
f64::from_bits(0xbfbe56f80802fa38),
f64::from_bits(0xbfa0d2ac9d0409ad),
f64::from_bits(0xbf73a619b3551650),
f64::from_bits(0x3f7e6c480057ecbb),
f64::from_bits(0xbf650dc773a5df4d),
f64::from_bits(0x3f531e9ccab7d4da),
f64::from_bits(0xbf29b76999169b0e),
f64::from_bits(0x3f509c829abceaf7),
f64::from_bits(0x3f575aee5697c4d8),
f64::from_bits(0x3f63f7f9598be176),
f64::from_bits(0x3f67a6ae61541282),
f64::from_bits(0x3f665e6d3de19021),
f64::from_bits(0x3f5ee8837b9197f6),
f64::from_bits(0x3f4e6924f270fd7e),
f64::from_bits(0x3f32ca61e5b74925),
f64::from_bits(0x3f0725735bc3890b),
);
p as f32
}
/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y1f_small_argument_path(x: f32) -> f32 {
let x_abs = x as f64;
// let avg_step = 51.03 / 33.0;
// let inv_step = 1.0 / avg_step;
//
// println!("inv_step {}", inv_step);
const INV_STEP: f64 = 0.6466784244562023;
let fx = x_abs * INV_STEP;
const Y1_ZEROS_COUNT: f64 = (Y1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe { fx.ceil().min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let found_zero0 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
// Really should not happen here, but if it is then to log expansion
return y1f_near_zero(x);
}
// We hit exact zero, value, better to return it directly
if dist == 0. {
return f64::from_bits(Y1_ZEROS_VALUES[idx]) as f32;
}
let c = &Y1F_COEFFS[idx - 1];
let r = (x_abs - found_zero.hi) - found_zero.lo;
let p = f_polyeval19(
r,
f64::from_bits(c[0]),
f64::from_bits(c[1]),
f64::from_bits(c[2]),
f64::from_bits(c[3]),
f64::from_bits(c[4]),
f64::from_bits(c[5]),
f64::from_bits(c[6]),
f64::from_bits(c[7]),
f64::from_bits(c[8]),
f64::from_bits(c[9]),
f64::from_bits(c[10]),
f64::from_bits(c[11]),
f64::from_bits(c[12]),
f64::from_bits(c[13]),
f64::from_bits(c[14]),
f64::from_bits(c[15]),
f64::from_bits(c[16]),
f64::from_bits(c[17]),
f64::from_bits(c[18]),
);
p as f32
}
/*
Evaluates:
Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x))
Discarding 1/2*PI gives:
Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x)))
*/
#[inline]
fn y1f_asympt(x: f32) -> f32 {
let dx = x as f64;
let alpha = j1f_asympt_alpha(dx);
let beta = j1f_asympt_beta(dx);
let angle = rem2pif_any(x);
const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_cos = -cos_small(r0);
let z0 = beta * m_cos;
let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);
(scale * z0) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bessel_zero() {
assert_eq!(f_y1f(700.76), 0.024876066);
assert_eq!(f_y1f(35.76), 0.121432826);
assert_eq!(f_y1f(1.76), -0.24787569);
assert_eq!(f_y1f(0.87), -0.9030042);
assert_eq!(f_y1f(f32::INFINITY), 0.0);
assert!(f_y1f(f32::NEG_INFINITY).is_nan());
assert!(f_y1f(f32::NAN).is_nan());
}
}

923
vendor/pxfm/src/bessel/y1f_coeffs.rs vendored Normal file
View File

@@ -0,0 +1,923 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
Y1 zeros and extremum on [0, 52] interval
Generated by SageMath:
```python
R120 = RealField(120)
zeros = []
mp.prec = 150
step = mpf("0.1")
epsilon = mpf("1e-35")
x = mpf("1.25")
previous_zero = R120(0)
y1_zeros = []
while x < mpf("52.0"):
f1 = bessely(1, x)
f2 = bessely(1, x + step)
if f1 * f2 < 0:
zero = findroot(lambda t: bessely(1, t), (x, x + step), solver='secant', tol=mp.mpf("1e-41"))
previous_zero = zero
y1_zeros.append(zero)
if previous_zero is not None and abs(x - mpf(f'{round(x)}')) < epsilon:
zeros.append(previous_zero)
x += step
y1_extrema = []
x = mpf("1.25")
while x < mpf("52.0"):
d1 = mp.diff(lambda t: bessely(1, t), x)
d2 = mp.diff(lambda t: bessely(1, t), x + step)
if d1 * d2 < 0:
extremum = findroot(lambda t: mp.diff(lambda u: bessely(1, u), t), (x, x + step), solver='secant', tol=mp.mpf("1e-41"))
y1_extrema.append(extremum)
x += step
y1_zeros.extend(y1_extrema)
y1_zeros = sorted(y1_zeros)
print(f"pub(crate) static Y1_ZEROS: [(u64, u64); {len(y1_zeros)}] = [")
print(f"(0x0, 0x0),")
for z in y1_zeros:
k = split_double_double(z)
hi = double_to_hex(k[1])
lo = double_to_hex(k[0])
print(f"({lo}, {hi}),")
print("];")
```
**/
pub(crate) static Y1_ZEROS: [(u64, u64); 33] = [
(0x0, 0x0), // not really used, just a stab to avoid indices messing
(0xbc8bd1e50d219bfd, 0x400193bed4dff243),
(0x3c53bac0714e4129, 0x400d76d4affba175),
(0x3cbdfe7bac228e8c, 0x4015b7fe4e87b02e),
(0x3ca7960b6b1c46ac, 0x401bc41890588553),
(0x3cb479cc068d9046, 0x40213127ae6169b4),
(0x3cc8f4ba5d68e440, 0x40243f2ee51e8c7e),
(0x3c80fc786ce06080, 0x40277f9138d43206),
(0xbcaf6ef7a3571593, 0x402a924ee4a3e52c),
(0xbcc5e091a50f8e05, 0x402dcb7d88de848b),
(0x3cc07320221cd5e5, 0x403070a7a43daae6),
(0xbcda1ee4c5487ede, 0x40320b1c695f1e3b),
(0xbcd2903124fef7e3, 0x4033971a15717510),
(0x3cd391b14410528f, 0x40353025492188cd),
(0xbcc15ec09721b746, 0x4036bcefd7de87a3),
(0x3cb52f75f025b205, 0x403854fa303820ca),
(0x3cb6f57f7696f493, 0x4039e262715f12a9),
(0xbcbcf130fbea3b24, 0x403b79acee8cfb7d),
(0xbc912142b10a5c65, 0x403d079247e8f51b),
(0xbc9e7a77047d6166, 0x403e9e480605283c),
(0x3cb1452eb07cd937, 0x40401649819af8fa),
(0xbce96beabef7ecf4, 0x4040e16907f8fb56),
(0x3cec6086fb5dd335, 0x4041a8b8a142d536),
(0x3cd2481e87adfe57, 0x404273a7b35a7aff),
(0x3cd7df5b6f701c7a, 0x40433b1ac0375e31),
(0x3cda8ffacaac8461, 0x404405e18393afb5),
(0xbce5b5acaff0a867, 0x4044cd72d2adfb0c),
(0x3cbfe463face2c1c, 0x4045981787d668db),
(0xbcefcba6ea61df1b, 0x40465fc2f7ca5b81),
(0xbce26390f25f01cb, 0x40472a4a85cc317e),
(0xbcbba46ca6ef9b6f, 0x4047f20cbfc32967),
(0xbcdcc667e557a177, 0x4048bc7b10ed3960),
(0x3cea473d4f209faf, 0x4049845158040451),
];
/**
Value at zero or extremum of Y1 belongs to [Y1_ZEROS]
Generated by MPFR:
```text
let mut arr = vec![];
for zeros in Y1_ZEROS.iter() {
if zeros.1 == 0 {
arr.push(0);
} else {
let mpfr = Float::with_val(107, f64::from_bits(zeros.1)).y1();
arr.push(mpfr.to_f64().to_bits());
}
}
println!(
"arr: [{}]",
arr.iter()
.map(|x| format!("0x{:016x}", x))
.collect::<Vec<_>>()
.join(", ")
);
```
**/
pub(crate) static Y1_ZEROS_VALUES: [u64; 33] = [
0x0000000000000000,
0x3c7cf9f8d5e1a475,
0x3fdaabb4011ed330,
0x3ca46a40b234169c,
0xbfd36732d4b96094,
0xbc963bc010b45f46,
0x3fd00ef3745e0e3c,
0x3c5f96d32c02f147,
0xbfcc075da85beb4f,
0x3ca213201464272b,
0x3fc931a5a0ae5aa0,
0xbcb39d4c41d5839f,
0xbfc713fc51664c74,
0xbcab20138dd047ec,
0x3fc56b97f8091ac5,
0x3c8b67dbfd3bd179,
0xbfc413644356a52b,
0x3c919e290514c619,
0x3fc2f4e70d6c7e01,
0xbc719366994a86bc,
0xbfc20198200b699d,
0x3cbbec2345ac18e2,
0x3fc12f9870d68e18,
0x3ca334eb74fdcfd1,
0xbfc077eede4a0d89,
0xbcaaca291aee3c35,
0x3fbfab0b166d23d8,
0x3c8ef92aa9c8e53c,
0xbfbe891b327da16d,
0x3cb13e3c695ea088,
0x3fbd84391bb2748d,
0xbcaa1c861fdd1438,
0xbfbc97d79918527d,
];
/**
Series expansion at point for Y1
Generated by SageMath and Sollya:
```python
def compute_intervals(zeros):
intervals = []
for i in range(0, len(zeros)):
if i == 0:
a = 2 - zeros[i]
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
elif i + 1 > len(zeros) - 1:
a = (zeros[i - 1] + zeros[i]) / 2 - 0.05 - zeros[i]
b = (zeros[i]) + 0.83 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
else:
a = (zeros[i - 1] + zeros[i]) / 2 - zeros[i] - 0.05
b = (zeros[i] + zeros[i + 1]) / 2 + 0.05 - zeros[i]
intervals.append((RealField(18)(a), RealField(18)(b), RealField(110)(zeros[i])))
return intervals
intervals = compute_intervals(y1_zeros)
def build_sollya_script(a, b, zero, deg):
return f"""
prec = 250;
bessel_y1 = library("./notes/bessel_sollya/cmake-build-release/libbessel_sollya.dylib");
f = bessel_y1(x + {zero});
d = [{a}, {b}];
pf = remez(f, {deg}, d);
for i from 0 to degree(pf) do {{
write(coeff(pf, i)) >> "coefficients.txt";
write("\\n") >> "coefficients.txt";
}};
"""
def load_coefficients(filename):
with open(filename, "r") as f:
return [RR(line.strip()) for line in f if line.strip()]
def call_sollya_on_interval(a, b, zero, degree=12):
sollya_script = build_sollya_script(a, b, zero, degree)
with open("tmp_interval.sollya", "w") as f:
f.write(sollya_script)
import subprocess
if os.path.exists("coefficients.txt"):
os.remove("coefficients.txt")
try:
result = subprocess.run(
["sollya", "tmp_interval.sollya"],
check=True,
capture_output=True,
text=True
)
except subprocess.CalledProcessError as e:
return
degree = 18
print(f"pub(crate) static Y1F_COEFFS: [[u64;{degree + 1}]; {len(intervals)}] = [")
for i in range(0, len(intervals)):
interval = intervals[i]
call_sollya_on_interval(interval[0], interval[1], interval[2], degree)
coeffs = load_coefficients(f"coefficients.txt")
print("[")
for c in coeffs:
print(double_to_hex(c) + ",")
print("],")
print("];")
```
**/
pub(crate) static Y1F_COEFFS: [[u64; 19]; 32] = [
[
0x3bdca2ee18606a4b,
0x3fe0aa48442f014b,
0xbfbe56f82217b8f4,
0xbfa0d2af4e932400,
0xbf73a6dec3726cd5,
0x3f7e671c7d12ea48,
0xbf65429dc5c0e9d4,
0x3f517ab4af4655e4,
0xbf40b2d8647a250d,
0x3f2eea7b1b675766,
0xbf1c3fb728e7d2ff,
0x3f09d1da72e12f44,
0xbef7964bf8511e22,
0x3ee57c2a83e1f972,
0xbed33f4211a00375,
0x3ec02bcdac2103fd,
0xbea6fefcf033ab9d,
0x3e874128ed97d3bb,
0xbe57d5b1eac16658,
],
[
0x3fdaabb4011ed330,
0x3c54da7c52fcf446,
0xbfc8b45babe797b6,
0x3f8e147099a6f00d,
0x3f88c5af1eeb2143,
0xbf4133fa47d8ea48,
0xbf3bf8af93e7a2f0,
0x3f021d64bd4e2cd8,
0x3eb44d2c32fdaf23,
0x3eb14c3b9e7960c1,
0xbe9b8ee25c629be6,
0x3e7a85b5b497dc6c,
0xbe5bfa422fb8d949,
0x3e3f0ad81d293f5a,
0xbe20e6844c6faba7,
0x3e0214b2d826d072,
0xbde4ff658967d425,
0x3dcb549cdc774a83,
0xbda6b5df4d9c1682,
],
[
0x3b7ff35240713789,
0xbfd5c7c556f0c19a,
0x3fa00b9f8571ca1f,
0x3faa15d92dfe3e27,
0xbf710a329e2c23f5,
0xbf61be6db9923ac9,
0x3f2337c7e138eb84,
0x3f085b940eb5f37f,
0xbec80619146a1e65,
0xbea255e6cf4b3254,
0x3e5b62ccdc392c5a,
0x3e380b1a5a61e6b5,
0xbdfa7ec7fd0d2925,
0x3d840d04ff01d1b2,
0xbd938dc1b2e33eca,
0x3d74839c586126ca,
0xbd4b045bc7ad769b,
0x3d261d10a8575c45,
0xbd052a6cc14bcc54,
],
[
0xbfd36732d4b96094,
0x3b3886a5ed6fd628,
0x3fc3001c8002caf8,
0xbf7bf5a03bab4999,
0xbf8751ea028c1953,
0x3f423874cd8d0402,
0x3f364f6610d6493b,
0xbef02978de38394f,
0xbed72f0766d0d9c7,
0x3e8f2081874e556c,
0x3e6defd5dce91973,
0xbe2205c70046a2c7,
0xbdfb6432eb3ab7ea,
0x3db028a1c0572973,
0x3d807791dcab03a0,
0xbd29778204deee13,
0xbd08342db2e7148e,
0x3cc898efb37f9dad,
0xbc84e2adc305e2ab,
],
[
0xbac1435819592d4c,
0x3fd15f993fceab5c,
0xbf902b3933cf21b1,
0xbfa6395dfe49fcd4,
0x3f63ced2a2e69180,
0x3f607a678d6000bb,
0xbf1b50d7e1d3201e,
0xbf06f7bab104f34b,
0x3ec176e72bf94a3a,
0x3ea2becb2b6bacd1,
0xbe5a384eebfb23c2,
0xbe341e7a921f7f66,
0x3de9e3284b918a26,
0x3dbec40b21f2c78f,
0xbd726865da6190a9,
0xbd416f4fe7eed351,
0x3cf3160bd2bd6c64,
0x3cbf6d61c945b95c,
0xbc706809636e0aec,
],
[
0x3fd00ef3745e0e3c,
0x3aff192f298c81c3,
0xbfbfcdacdda138f2,
0x3f706cc34cd829fa,
0x3f84641bb10c16cb,
0xbf37fac943e2a16d,
0xbf34769ed32e14a2,
0x3ee80608ecda1508,
0x3ed5cc8242d77e23,
0xbe888c8f2538feb8,
0xbe6ce5908c1e5174,
0x3e1ed16257e17417,
0x3dfa30d623eda066,
0xbdaa5076123e3ecf,
0xbd814cd297d2be7e,
0x3d306166947e23e9,
0x3d01635f73179569,
0xbcaeafcf4c2f127b,
0xbc7b0828175d92fa,
],
[
0x3aba1488e1b7782d,
0xbfcdc14ea14e89f9,
0x3f84429fef5b5fbd,
0x3fa367d7d608e4ba,
0xbf59d6eb2bc49e35,
0xbf5dc4f991b3db86,
0x3f1315ec04d6e6bb,
0x3f0571814a1aa2f5,
0xbeba2977fa42f00f,
0xbea1e864230850b8,
0x3e54a7b82d3fa1e5,
0x3e33906609f9fe4c,
0xbde549e8b0e16969,
0xbdbe32cf2ce99d6f,
0x3d6eff542dd345c3,
0x3d415e2a9c2f4933,
0xbcf0d48dde3c3ffe,
0xbcbeac3c36b4bce2,
0x3c6af1612c5ddab0,
],
[
0xbfcc075da85beb4f,
0xbafcfa84f4024782,
0x3fbbdeb6ff9f55e1,
0xbf661eefb74da882,
0xbf8229ea914b846e,
0x3f30cbcc6778fd37,
0x3f32aa59f5091f7b,
0xbee1c15d5251ae54,
0xbed4583f15abd654,
0x3e831d151a12624a,
0x3e6b74e57c21e022,
0xbe19044f1339b061,
0xbdf93b1ec70c7bbc,
0x3da61a4e437e8105,
0x3d80d4305f038451,
0xbd2c3aad6f3b35c7,
0xbd010dec3a02c58c,
0x3cab15901b6d0925,
0x3c7ab2531f00c501,
],
[
0xbab392a85abdc950,
0x3fca7022be084d99,
0xbf7c650b6b83109a,
0xbfa163191c30aa62,
0x3f526b045287ddca,
0x3f5b17602840abf5,
0xbf0c0a9cee3c8429,
0xbf03e398cbc472de,
0x3eb3f35db1ff19f5,
0x3ea0e9b612dbc0ea,
0xbe5056babcd79a11,
0xbe32c1a8c8d768b1,
0x3de161b6a84838d0,
0x3dbd4ca9d2d67d78,
0xbd69fdd67a999eab,
0xbd4101919ce84a07,
0x3cecd91fa7851496,
0x3cbe3f8588ebbfdf,
0xbc67a4499c96e38d,
],
[
0x3fc931a5a0ae5aa0,
0x3afa23fd08be9891,
0xbfb919c8a3f203fa,
0x3f602a38da6262a9,
0x3f807ced48910819,
0xbf2900f33a00690a,
0xbf31278d46fd153c,
0x3edb2595529cf19f,
0x3ed2f7c2d608e0bb,
0xbe7e212d23787793,
0xbe69f3fcf3631e9c,
0x3e144fbf033f1974,
0x3df82268e7ab0cdb,
0xbda26cc2714815d4,
0xbd80418b35c32375,
0x3d28122e50410f0a,
0x3d009aba27e11464,
0xbca78943175d4e84,
0xbc7a379f959c0224,
],
[
0x3aaf25ce7e30cbc6,
0xbfc80781c32422e7,
0x3f754eda697a0098,
0x3f9fbe6df840847f,
0xbf4be318d61276e1,
0xbf58efee4094379c,
0x3f059145b4f0e4dd,
0x3f0282d26a74c382,
0xbeaf56c29d9ad6c8,
0xbe9fdd03174f6b47,
0x3e4a44a7907d0ec6,
0x3e31df6533090779,
0xbddc96e9cb6ee22b,
0xbdbc3439a99213c4,
0x3d65d387fc8083e0,
0x3d40830db4ec8a6e,
0xbce8ad426f9ce3f5,
0xbcbd93c0cf35d116,
0x3c649b19a5449ffa,
],
[
0xbfc713fc51664c74,
0xbaf73aab14face16,
0x3fb7049760cde490,
0xbf58ef5f1cbe4874,
0xbf7e5f53caf3bead,
0x3f237b0b62ddadd1,
0x3f2fd3bac08286da,
0xbed5789803de3adb,
0xbed1c0faa8999393,
0x3e7845b49b063dc7,
0x3e6886872800e226,
0xbe10b03677687883,
0xbdf7049d17bd230b,
0x3d9edd9ca057f252,
0x3d7f445f42a168e6,
0xbd24866878075342,
0xbd0015a37275b46d,
0x3ca463bd3d4059be,
0x3c79974848138496,
],
[
0xbaa9a62f9227c851,
0x3fc62d94d97e859c,
0xbf70bf614807033c,
0xbf9d5f857a2a6107,
0x3f46081b0b7fe572,
0x3f57307b03e248f8,
0xbf0132c0aa83d0db,
0xbf0154ed4598d2e4,
0x3ea94f64f476e3f5,
0x3e9e1272585385c0,
0xbe4588c758dd66db,
0xbe31021cdd7a4f3a,
0x3dd7cfa7a39f5d48,
0x3dbb0e00d41ec645,
0xbd6276c9a451cdb1,
0xbd3fe8cf17671ae1,
0x3ce52f1a6f7ae06f,
0x3cbcc2eb893d62ce,
0xbc61f4c0af8bd0fb,
],
[
0x3fc56b97f8091ac5,
0x3af48a947d2475cd,
0xbfb560fcc8c08469,
0x3f53fafa39618883,
0x3f7c49141623372f,
0xbf1f69980694fd17,
0xbf2dc5f848aa9d33,
0x3ed178fc979b779d,
0x3ed0b494a4bafca8,
0xbe73fc3884c243a5,
0xbe673afb9fb48ff7,
0x3e0bd903464b077a,
0x3df5f3bafabcdabe,
0xbd9a1c27612b5f03,
0xbd7e04553366c10e,
0x3d219970f1564c7c,
0x3cff128fbd867c78,
0xbca1b4d2be53f3ad,
0xbc78e13fb654b036,
],
[
0x3aa5951bb8e2b477,
0xbfc4b2a38f1ab9b4,
0x3f6b3878aadeb34d,
0x3f9b750d89a9b35f,
0xbf41f6911725a956,
0xbf55beee6fd51c8a,
0x3efc3625d7a65087,
0x3f005375a588a71f,
0xbea4ee5e4e7cafc0,
0xbe9c7b3d81b5dc31,
0x3e41fce14f464e1e,
0x3e30346643a98dcb,
0xbdd41c86191a49ce,
0xbdb9eed9da04017a,
0x3d5f8cee5e5b42b4,
0x3d3ec41075d33352,
0xbce24e44459e28b0,
0xbcbbe16f7d769c15,
0x3c5f670ad9138f1f,
],
[
0xbfc413644356a52b,
0xbaf22d9ab9060f8f,
0x3fb40bb88c6f2b85,
0xbf5078d13cfc400e,
0xbf7a9191262ab9d5,
0x3f1a005297618f35,
0x3f2c0cbad847a60e,
0xbecd1a72e7c35fa0,
0xbecf9a2654099c0b,
0x3e70c6b06e20d1c0,
0x3e66136d6425acf0,
0xbe0797767778226d,
0xbdf4f77b30ed58c3,
0x3d96572059bf2445,
0x3d7cd12649b82d6f,
0xbd1e6ce514a88f2d,
0xbcfdfabaf5c37514,
0x3c9eea8202989176,
0x3c782260f7596e02,
],
[
0xbaa27e57c2b07d4b,
0x3fc37aaceac987b9,
0xbf66afe4fe0bc0f7,
0xbf99de7a33bc3a97,
0x3f3e024f567ac487,
0x3f548843c426abe0,
0xbef7a8e14711c0f4,
0xbefeeceb341ad81c,
0x3ea1a743e05b383f,
0x3e9b143d39c8eb5f,
0xbe3e8e00011fabc3,
0xbe2ef28e31ff924c,
0x3dd137a1bd136742,
0x3db8e0878264a773,
0xbd5b3dc655a5a5f4,
0xbd3da652e8239897,
0x3cdfe34eace42448,
0x3cbafd0cc7251807,
0xbc5b9b0102453020,
],
[
0x3fc2f4e70d6c7e01,
0x3af022defda0ec45,
0xbfb2ef24d6f7526a,
0x3f4bc33c9dc6ec82,
0x3f7920414ee2acbe,
0xbf15f9173916a219,
0xbf2a94fdbdcec471,
0x3ec8b309990f94db,
0x3ece087ff4517bd5,
0xbe6ca22ab12c685c,
0xbe650d1f28632753,
0x3e044415529c950b,
0x3df411b8a7d9d1bc,
0xbd9354e8c7a8bfd7,
0xbd7bb16e8ee8c711,
0x3d1a881fddcb8d86,
0x3cfcecee70233b69,
0xbc9b2b6cccd3802a,
0xbc77637662fa6ba8,
],
[
0x3aa00f5dbb23e90b,
0xbfc2740819f1caaa,
0x3f6349369dc780bb,
0x3f98868d7401bf2e,
0xbf398cd1bebe1445,
0xbf537eef9aadeee2,
0x3ef43394c95b2d29,
0x3efd6dfcdb026013,
0xbe9e448fbc8a1c95,
0xbe99d764ee07a6b7,
0x3e3a53958c8a71d8,
0x3e2da0e1c86368ce,
0xbdcdd7f914e496e5,
0xbdb7e67ff45daf48,
0x3d57c2e32861f41c,
0x3d3c96e18ab6db69,
0xbcdc0099b11f0478,
0xbcba1dfeafeb6e19,
0x3c586b4c940f74bc,
],
[
0xbfc20198200b699d,
0xbaecc875d54af9d0,
0x3fb1fd242a74e630,
0xbf47cf261dfbf19a,
0xbf77e4820ec1dde4,
0x3f12e1bd281dfcba,
0x3f2950bb06c6fdf9,
0xbec54a38ab6af51a,
0xbecca94f38024fb4,
0x3e68c7e75971843f,
0x3e6423fc7e24ed40,
0xbe019fe1d8a6e0d8,
0xbdf34198c7517f5a,
0x3d90e78c95f157aa,
0x3d7aa74c4042e051,
0xbd1756942b9afcaf,
0xbcfbedc3e7dae4e4,
0x3c980b9567289463,
0x3c76a9e024cc6a52,
],
[
0xba9c33661811b8ff,
0x3fc192f2627a74e3,
0xbf60a846a83fecf2,
0xbf975eceaabf7f86,
0x3f3617c581be35b1,
0x3f529934b7a84483,
0xbef18123e8751889,
0xbefc1f05a2d85150,
0x3e9a4e0bc09262e9,
0x3e98be81ad44b8a4,
0xbe36f73795dfb5c7,
0xbe2c70ab155167d1,
0x3dca26218cc79400,
0x3db7011269271056,
0xbd54ec138a5f86cd,
0xbd3b98bf6fa2fe47,
0x3cd8c95d73f0c84c,
0x3cb948b2dd021429,
0xbc55bd7c63fa9765,
],
[
0x3fc12f9870d68e18,
0x3ae9cd1ac1fa64f9,
0xbfb12c11811945f9,
0x3f44b638f21f0f76,
0x3f76d2a897d58353,
0xbf10732e5458ba20,
0xbf2835929300df3e,
0x3ec297283816a814,
0x3ecb73adedf11a1f,
0xbe65b455b903b389,
0xbe6353f0797a3bf1,
0x3dfefc9ac10b87d9,
0x3df2853545ffa79d,
0xbd8dd8945079a88f,
0xbd79b28860cd63f8,
0x3d14b29ba2797832,
0x3cfaff02f362ca7e,
0xbc956d8436ee55ed,
0xbc75f881bb0137f5,
],
[
0x3a9900b85a085cfa,
0xbfc0cf3ee98f769b,
0x3f5d26e7af251f79,
0x3f965d05948a946a,
0xbf335959b8482e40,
0xbf51cff175d05c2a,
0x3eeeb59416879104,
0x3efaf7544eeac751,
0xbe9720522bb1fa69,
0xbe97c41261703475,
0x3e343fa0ea5ba663,
0x3e2b5e23abb21a5f,
0xbdc722397b59adb6,
0xbdb62f213532a0b8,
0x3d5294a89e377c98,
0x3d3aac95aead6ada,
0xbcd61abb584f3fc2,
0xbcb87f3345758fd5,
0x3c537a3b70fc94b7,
],
[
0xbfc077eede4a0d89,
0xbae73fb2e67b1968,
0x3fb0751548b2924d,
0xbf423b5d46a73864,
0xbf75e2467c8fb832,
0x3f0cfe5c189d6e4d,
0x3f273bbd8c7aef2c,
0xbec06974d3d04263,
0xbeca6081d36e6a0b,
0x3e6334a83cf5d21d,
0x3e6299571cb4bb1e,
0xbdfb7f5bc046450f,
0xbdf1da63b49ed896,
0x3d8a92885fb339ae,
0x3d78d1d6e93bb23d,
0xbd127ea4434f9fb5,
0xbcfa20dab6b920e9,
0x3c93389d892643a0,
0x3c7550b88147fd02,
],
[
0xba96582ab366c758,
0x3fc0230ba90f2871,
0xbf59ca16f0c9734e,
0xbf9579c1bdbcfc99,
0x3f3120ecfac5c017,
0x3f511dd26bbe2946,
0xbeeb37e7c9a57147,
0xbef9f01e7c19098c,
0x3e94887fe7a88a4d,
0x3e96e3723883fe87,
0xbe3204b644d485a1,
0xbe2a659b13b69c6b,
0x3dc4a40c8498625a,
0x3db56f0212f628e4,
0xbd50a0fef4ac5a44,
0xbd39d1d92cf50973,
0x3cd3d93917ae0666,
0x3cb7c222c421cbf8,
0xbc518e27cddeecfd,
],
[
0x3fbfab0b166d23d8,
0x3ae50cd9856106aa,
0xbfafa65c1ce7ebd6,
0x3f4035bf503ffc1f,
0x3f750d1b04713c41,
0xbf09cd14a92842a1,
0xbf265d504af5d8fe,
0x3ebd3feeb33d9cae,
0x3ec96a257062f750,
0xbe61254f302b04d0,
0xbe61f11585e02bfc,
0x3df89a7674827723,
0x3df13f0ba458182e,
0xbd87d67ae3559fb7,
0xbd78038124810666,
0x3d10a3cffab7b16e,
0x3cf952a4679b4020,
0xbc9158ffabf6b26f,
0xbc74b2e8c7ca451f,
],
[
0x3a9422b204fbf27f,
0xbfbf13fb0c0e6fcd,
0x3f5706ed3d935d00,
0x3f94af74cbd77bef,
0xbf2e9a9e66e5a792,
0xbf507ec9ed824fcb,
0x3ee856d4518ab29c,
0x3ef9040de830648a,
0xbe9262f69c56c4a2,
0xbe9618c94a54555e,
0x3e3029d2c8bd8b0e,
0x3e2983bca06d479e,
0xbdc28e29fd7e309a,
0xbdb4beea8ebaabe8,
0x3d4df87d00b82fa1,
0x3d39076f370434b4,
0xbcd1ef67bd03c16c,
0xbcb7115a994eb5d5,
0x3c4fd28ad1effa7c,
],
[
0xbfbe891b327da16d,
0xbae325aba995f36e,
0x3fae8533ce07bdb8,
0xbf3d1253218e31b0,
0xbf744e6826476498,
0x3f07271a9b5e3cb4,
0x3f2595b697c8ec04,
0xbeba46b03ecb3892,
0xbec88c173e076203,
0x3e5ed9b1754f626a,
0x3e615891ef312cd4,
0xbdf62ca3527c988f,
0xbdf0b14767922479,
0x3d85879b58ff9d05,
0x3d7745bb7346aea9,
0xbd0e20fb122a7c2a,
0xbcf89354d05f7b8c,
0x3c8f7da941b1f5c8,
0x3c741ef462ba56a3,
],
[
0xba923c4506ec812e,
0x3fbe018dac1c17e3,
0xbf54b994dd05c1fb,
0xbf93f9e0db07e7ef,
0x3f2b8e55b75b13ab,
0x3f4fdf68a78bb3d2,
0xbee5ee9d17106a07,
0xbef82ee6dfdfedd8,
0x3e90962d7f6d601e,
0x3e9560edce7d4b08,
0xbe2d34381cff4d39,
0xbe28b5a0e715b17d,
0x3dc0cab951aa6b0b,
0x3db41d218be1cc79,
0xbd4b2d2a51de7089,
0xbd384c06737cf8d9,
0x3cd04c96e113bd0a,
0x3cb66c4b3dc74284,
0xbc4cfe4908cedcb8,
],
[
0x3fbd84391bb2748d,
0x3ae17f232c3596e7,
0xbfad80edb3c4ea05,
0x3f3a431f5421f7ef,
0x3f73a282fe7b63a8,
0xbf04ed653e607bdc,
0xbf24e15832bda3c4,
0x3eb7c5832dd13719,
0x3ec7c2b80da8df7e,
0xbe5bf160ccd30135,
0xbe60cda2141b0afb,
0x3df41d31d25837e8,
0x3df02f6ca822716e,
0xbd83901f0d351db4,
0xbd7696cdd91c27b7,
0x3d0b6e1487e46414,
0x3cf7e1bb61dba0fc,
0xbc8cbb194749390a,
0xbc7394699d841c82,
],
[
0x3a909e71c2163ed3,
0xbfbd09b21e36c0bd,
0x3f52c84acfb586b4,
0x3f9355b904fbf7ee,
0xbf28fb570465af0d,
0xbf4edc3292ba6cfd,
0x3ee3e552ee8c2575,
0x3ef76d44f6a83510,
0xbe8e1ee2dc4a3910,
0xbe94b944bbd4ab57,
0x3e2a8c3a4ce04c1b,
0x3e27f8c9ef133245,
0xbdbe92957079603b,
0xbdb388138abed52f,
0x3d48c7ca6d80cb32,
0x3d379e45475751e8,
0xbccdc773f7abde68,
0xbcb5d2331c76adb9,
0x3c4a8aa5ca753696,
],
[
0xbfbc97d79918527d,
0xbae4d780a21f2057,
0x3fac95081ab2b511,
0xbf37e0b14f7d7c3f,
0xbf730688f6836a76,
0x3f030941f6e78e36,
0x3f243d5898657a6f,
0xbeb5a39a94f2ad1c,
0xbec70b18406146b4,
0x3e597607f952cd69,
0x3e604e788f4f18d9,
0xbdf2598918fbb9ee,
0xbdef701541acccd2,
0x3d81df5bcb47b899,
0x3d75f524c73bc009,
0xbd0919aede2dcbf2,
0xbcf73c65c9ce91ed,
0x3c8a553df0f1b569,
0x3c72fe9f3f0dcae2,
],
];

78
vendor/pxfm/src/bits.rs vendored Normal file
View File

@@ -0,0 +1,78 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[inline]
pub(crate) const fn get_exponent_f32(x: f32) -> i32 {
let bits = x.to_bits();
(((bits >> 23) & 0xFF) as i32).wrapping_sub(127)
}
#[inline]
pub(crate) const fn mantissa_f32(x: f32) -> u32 {
x.to_bits() & ((1u32 << 23) - 1)
}
#[inline]
pub(crate) const fn mantissa_f64(x: f64) -> u64 {
x.to_bits() & ((1u64 << 52) - 1)
}
#[inline]
pub(crate) const fn get_exponent_f64(x: f64) -> i64 {
((x.to_bits() as i64 & EXP_MASK as i64) >> 52).wrapping_sub(1023)
}
#[inline]
pub(crate) const fn biased_exponent_f64(x: f64) -> i64 {
(x.to_bits() as i64 & EXP_MASK as i64) >> 52
}
#[inline]
pub(crate) const fn mask_trailing_ones(len: u64) -> u64 {
if len >= 64 {
u64::MAX
} else {
(1u64 << len).wrapping_sub(1)
}
}
pub(crate) const EXP_MASK: u64 = mask_trailing_ones(11) << 52;
#[inline]
pub(crate) fn set_exponent_f64(x: u64, new_exp: u64) -> u64 {
let encoded_mask = new_exp.wrapping_shl(52) & EXP_MASK;
x ^ ((x ^ encoded_mask) & EXP_MASK)
}
#[inline]
pub(crate) const fn min_normal_f32(sign: bool) -> f32 {
let sign_bit = if sign { 1u32 << 31 } else { 0 };
let exponent = 1u32 << 23;
f32::from_bits(sign_bit | exponent)
}

136
vendor/pxfm/src/ceil.rs vendored Normal file
View File

@@ -0,0 +1,136 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::{get_exponent_f32, get_exponent_f64};
#[inline]
pub const fn ceilf(x: f32) -> f32 {
// If x is infinity NaN or zero, return it.
if !x.is_normal() {
return x;
}
let is_neg = x.is_sign_negative();
let exponent = get_exponent_f32(x);
// If the exponent is greater than the most negative mantissa
// exponent, then x is already an integer.
const FRACTION_LENGTH: u32 = 23;
if exponent >= FRACTION_LENGTH as i32 {
return x;
}
if exponent <= -1 {
return if is_neg { -0.0 } else { 1.0 };
}
let trim_size = (FRACTION_LENGTH as i32).wrapping_sub(exponent);
let x_u = x.to_bits();
let trunc_u = x_u
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
// If x is already an integer, return it.
if trunc_u == x_u {
return x;
}
let trunc_value = f32::from_bits(trunc_u);
// If x is negative, the ceil operation is equivalent to the trunc operation.
if is_neg {
return trunc_value;
}
trunc_value + 1.0
}
#[inline]
pub const fn ceil(x: f64) -> f64 {
// If x is infinity NaN or zero, return it.
if !x.is_normal() {
return x;
}
let is_neg = x.is_sign_negative();
let exponent = get_exponent_f64(x);
// If the exponent is greater than the most negative mantissa
// exponent, then x is already an integer.
const FRACTION_LENGTH: u64 = 52;
if exponent >= FRACTION_LENGTH as i64 {
return x;
}
if exponent <= -1 {
return if is_neg { -0.0 } else { 1.0 };
}
let trim_size = (FRACTION_LENGTH as i64).wrapping_sub(exponent);
let x_u = x.to_bits();
let trunc_u = x_u
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
// If x is already an integer, return it.
if trunc_u == x_u {
return x;
}
let trunc_value = f64::from_bits(trunc_u);
// If x is negative, the ceil operation is equivalent to the trunc operation.
if is_neg {
return trunc_value;
}
trunc_value + 1.0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ceilf() {
assert_eq!(ceilf(0.0), 0.0);
assert_eq!(ceilf(10.0), 10.0);
assert_eq!(ceilf(10.1), 11.0);
assert_eq!(ceilf(-9.0), -9.0);
assert_eq!(ceilf(-9.5), -9.0);
}
#[test]
fn test_ceil() {
assert_eq!(ceil(0.0), 0.0);
assert_eq!(ceil(10.0), 10.0);
assert_eq!(ceil(10.1), 11.0);
assert_eq!(ceil(-9.0), -9.0);
assert_eq!(ceil(-9.5), -9.0);
}
}

405
vendor/pxfm/src/common.rs vendored Normal file
View File

@@ -0,0 +1,405 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::EXP_MASK;
use num_traits::MulAdd;
use std::ops::{Add, Mul};
#[inline]
pub(crate) fn is_integerf(x: f32) -> bool {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
))]
{
x.round_ties_even() == x
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
)))]
{
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK_F32) >> 23;
let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
const UNIT_EXPONENT: u32 = E_BIAS + 23;
x_e + lsb >= UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_odd_integerf(x: f32) -> bool {
#[cfg(target_arch = "aarch64")]
{
(x as i32 & 1) != 0
}
#[cfg(not(target_arch = "aarch64"))]
{
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK_F32) >> 23;
let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
const UNIT_EXPONENT: u32 = E_BIAS + 23;
x_e + lsb == UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_integer(n: f64) -> bool {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
))]
{
n == n.round_ties_even()
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "sse4.1"
),
target_arch = "aarch64"
)))]
{
let x_u = n.to_bits();
let x_e = (x_u & EXP_MASK) >> 52;
let lsb = (x_u | EXP_MASK).trailing_zeros();
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const UNIT_EXPONENT: u64 = E_BIAS + 52;
x_e + lsb as u64 >= UNIT_EXPONENT
}
}
#[inline]
pub(crate) fn is_odd_integer(x: f64) -> bool {
let x_u = x.to_bits();
let x_e = (x_u & EXP_MASK) >> 52;
let lsb = (x_u | EXP_MASK).trailing_zeros();
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
const UNIT_EXPONENT: u64 = E_BIAS + 52;
x_e + lsb as u64 == UNIT_EXPONENT
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
#[inline(always)]
pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
acc: T,
a: T,
b: T,
) -> T {
MulAdd::mul_add(a, b, acc)
}
#[inline(always)]
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
acc: T,
a: T,
b: T,
) -> T {
acc + a * b
}
#[inline]
pub(crate) const fn rintfk(x: f32) -> f32 {
(if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
}
#[inline(always)]
pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
c + a * b
}
#[inline(always)]
pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f32::mul_add(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
a * b + c
}
}
/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
#[inline(always)]
pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f64::mul_add(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
a * b + c
}
}
#[inline(always)]
pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
c + a * b
}
/// Executes mandatory FMA
/// if not available will be simulated through Dekker and Veltkamp
#[inline(always)]
pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmla(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::double_double::DoubleDouble;
DoubleDouble::dd_f64_mul_add(a, b, c)
}
}
// Executes mandatory FMA
// if not available will be simulated through dyadic float 128
#[inline(always)]
pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmla(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::dyadic_float::DyadicFloat128;
let z = DyadicFloat128::new_from_f64(a);
let k = DyadicFloat128::new_from_f64(b);
let p = z * k + DyadicFloat128::new_from_f64(c);
p.fast_as_f64()
}
}
// Executes mandatory FMA
// if not available will be simulated through Dekker and Veltkamp
#[inline(always)]
#[allow(unused)]
pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
f_fmlaf(a, b, c)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
(a as f64 * b as f64 + c as f64) as f32
}
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) fn c_mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
a: T,
b: T,
c: T,
) -> T {
mlaf(c, a, b)
}
/// Copies sign from `y` to `x`
#[inline]
pub const fn copysignfk(x: f32, y: f32) -> f32 {
f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
}
// #[inline]
// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
// (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
// }
//
// #[inline]
// // Founds a in x=a+𝑛ln(2)
// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
// f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
// }
#[inline]
pub(crate) const fn pow2if(q: i32) -> f32 {
f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
}
/// Round towards whole integral number
#[inline]
pub(crate) const fn rintk(x: f64) -> f64 {
(if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
}
/// Computes 2^n
#[inline(always)]
pub(crate) const fn pow2i(q: i32) -> f64 {
f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
}
// #[inline]
// pub(crate) const fn ilogb2k(d: f64) -> i32 {
// (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
// }
//
// #[inline]
// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
// f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
// }
/// Copies sign from `y` to `x`
#[inline]
pub const fn copysignk(x: f64, y: f64) -> f64 {
f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
}
#[inline]
pub(crate) const fn min_normal_f64() -> f64 {
let exponent_bits = 1u64 << 52;
let bits = exponent_bits;
f64::from_bits(bits)
}
#[inline]
const fn mask_trailing_ones_u32(len: u32) -> u32 {
if len >= 32 {
u32::MAX // All ones if length is 64 or more
} else {
(1u32 << len).wrapping_sub(1)
}
}
pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
#[inline]
pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_integer() {
assert_eq!(is_integer(5.), true);
assert_eq!(is_integer(6.), true);
assert_eq!(is_integer(6.01), false);
assert_eq!(is_odd_integer(5.), true);
assert_eq!(is_odd_integer(6.), false);
assert_eq!(is_odd_integer(6.01), false);
assert_eq!(is_integerf(5.), true);
assert_eq!(is_integerf(6.), true);
assert_eq!(is_integerf(6.01), false);
assert_eq!(is_odd_integerf(5.), true);
assert_eq!(is_odd_integerf(6.), false);
assert_eq!(is_odd_integerf(6.01), false);
}
}

485
vendor/pxfm/src/compound/compound_d.rs vendored Normal file
View File

@@ -0,0 +1,485 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::{log1p_f64_dyadic, log1p_fast_dd};
use crate::pow_exec::{exp_dyadic, pow_exp_dd};
use crate::triple_double::TripleDouble;
/// Computes (1+x)^y
///
pub fn f_compound(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ay = y.to_bits() & 0x7fff_ffff_ffff_ffff;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 1.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 1.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 1.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { 0.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { 0.0 };
}
match y_a {
0x3fe0_0000_0000_0000 => {
// TODO: speed up x^(-1/2) with rsqrt(x) when available.
if x == 0.0 {
return 1.0;
}
let z = DoubleDouble::from_full_exact_add(x, 1.0).sqrt();
return if y_sign {
z.recip().to_f64()
} else {
z.to_f64()
};
}
0x3ff0_0000_0000_0000 => {
return if y_sign {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z = DyadicFloat128::new_from_f64(x) + ONES;
z.reciprocal().fast_as_f64()
} else {
DoubleDouble::from_full_exact_add(x, 1.0).to_f64()
};
}
0x4000_0000_0000_0000 => {
let z0 = DoubleDouble::from_full_exact_add(x, 1.0);
let z = DoubleDouble::quick_mult(z0, z0);
return if y_sign {
z.recip().to_f64()
} else {
f64::copysign(z.to_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
0.0
};
}
// y is finite and non-zero.
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// compound(1, y) = 1
return 2.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -0.0 } else { 0.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
let min_abs = f64::min(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let max_abs = f64::max(f64::from_bits(ax), f64::from_bits(ay)).to_bits();
let min_exp = min_abs.wrapping_shr(52);
let max_exp = max_abs.wrapping_shr(52);
if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
let scale_up = min_exp < 128u64;
let scale_down = max_exp > 0x7ffu64 - 128u64;
// At least one input is denormal, multiply both numerator and denominator
// then will go with hard path
if scale_up || scale_down {
return compound_accurate(x, y, s);
}
}
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
// y is integer and in [-102;102] and |x|<2^10
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return compound_accurate(x, y, s);
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_exp_dd(r, s);
let res_min = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), -res.hi, res.lo);
let res_max = res.hi + f_fmla(f64::from_bits(0x3bf0000000000000), res.hi, res.lo);
if res_min == res_max {
return res_max;
}
compound_accurate(x, y, s)
}
#[cold]
fn compound_accurate(x: f64, y: f64, s: f64) -> f64 {
/* the idea of returning res_max instead of res_min is due to Laurent
Théry: it is better in case of underflow since res_max = +0 always. */
let f_y = DyadicFloat128::new_from_f64(y);
let r = log1p_f64_dyadic(x) * f_y;
let mut result = exp_dyadic(r);
// 2^R.ex <= R < 2^(R.ex+1)
/* case R < 2^-1075: underflow case */
if result.exponent < -1075 {
return 0.5 * (s * f64::from_bits(0x0000000000000001));
}
if result.exponent >= 1025 {
return 1.0;
}
result.sign = if s == -1.0 {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
result.fast_as_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
let mut s = TripleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
TripleDouble::new(0., 0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = TripleDouble::quick_mult(s, s);
if iter_count % 2 != 0 {
acc = TripleDouble::quick_mult(acc, s);
}
}
if y.is_sign_negative() {
acc.recip().to_f64()
} else {
acc.to_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound() {
assert_eq!(f_compound(4831835136., -13.),0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012780345669344118 );
assert_eq!(
f_compound(11468322278342656., 2.9995136260713475),
1481455956234813000000000000000000000000000000000.
);
assert_eq!(f_compound(0.9999999999999999, 3.), 7.999999999999999);
assert_eq!(
f_compound(1.0039215087890625, 10.000000000349134),
1044.2562119607103
);
assert_eq!(f_compound(10., 18.0), 5559917313492231000.0);
assert_eq!(
f_compound(131071.65137729312, 2.000001423060894),
17180328027.532265
);
assert_eq!(f_compound(2., 5.), 243.);
assert_eq!(f_compound(126.4324324, 126.4324324), 1.4985383310514043e266);
assert_eq!(f_compound(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound(0.0, 0.0), 1.0);
assert_eq!(f_compound(0.0, -1. / 2.), 1.0);
assert_eq!(f_compound(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound(f64::INFINITY, -1. / 2.), 0.0);
assert_eq!(f_compound(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound(46.3828125, 46.3828125), 5.248159634773675e77);
}
#[test]
fn test_compound_exotic_cases() {
assert_eq!(f_compound(0.9999999850987819, -1.), 0.5000000037253046);
assert_eq!(
f_compound(22427285907987670000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1.),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004458854290718438
);
assert_eq!(f_compound(0.786438105629145, 607.999512419221),
1616461095392737200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound( 1.0000002381857613, 960.8218657970428),
17228671476562465000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_compound(1., 1.0000000000000284), 2.);
assert_eq!(f_compound(1., f64::INFINITY), f64::INFINITY);
assert_eq!(
f_compound(10.000000000000007, -8.),
0.00000000466507380209731
);
}
}

573
vendor/pxfm/src/compound/compound_m1.rs vendored Normal file
View File

@@ -0,0 +1,573 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log1p_fast_dd;
use crate::pow_exec::pow_expm1_1;
/// Computes (1+x)^y - 1
///
/// max found ULP 0.56
pub fn f_compound_m1(x: f64, y: f64) -> f64 {
/*
Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let x_sign = x.is_sign_negative();
let y_sign = y.is_sign_negative();
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let y_abs = y.to_bits() & 0x7fff_ffff_ffff_ffff;
const MANTISSA_MASK: u64 = (1u64 << 52) - 1;
let y_mant = y.to_bits() & MANTISSA_MASK;
let x_u = x.to_bits();
let x_a = x_abs;
let y_a = y_abs;
// If x or y is signaling NaN
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
let mut s = 1.0;
// The double precision number that is closest to 1 is (1 - 2^-53), which has
// log2(1 - 2^-53) ~ -1.715...p-53.
// So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite:
// |y * log2(x)| = 0 or > 1075.
// Hence, x^y will either overflow or underflow if x is not zero.
if y_mant == 0
|| y_a > 0x43d7_4910_d52d_3052
|| x_u == 1f64.to_bits()
|| x_u >= f64::INFINITY.to_bits()
|| x_u < f64::MIN.to_bits()
{
// Exceptional exponents.
if y == 0.0 {
return 0.0;
}
// (h) compound(qNaN, n) is qNaN for n ≠ 0
if x.is_nan() {
if y != 0. {
return x;
} // propagate qNaN
return 0.0;
}
// (d) compound(±0, n) is 1
if x == 0.0 {
return 0.0;
}
// (e, f) compound(+Inf, n)
if x.is_infinite() && x > 0.0 {
return if y > 0. { x } else { -1.0 };
}
// (g) compound(x, n) is qNaN and signals invalid for x < -1
if x < -1.0 {
// Optional: raise invalid explicitly
return f64::NAN;
}
// (b, c) compound(-1, n)
if x == -1.0 {
return if y < 0. { f64::INFINITY } else { -1.0 };
}
match y_a {
// 0x3fe0_0000_0000_0000 => {
// if x == 0.0 {
// return 0.0;
// }
// let z = Dekker::from_full_exact_add(x, 1.0).sqrt();
// if y_sign {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z.reciprocal() + M_ONES).fast_as_f64()
// } else {
// const M_ONES: DyadicFloat128 = DyadicFloat128 {
// sign: DyadicSign::Neg,
// exponent: -127,
// mantissa: 0x80000000_00000000_00000000_00000000_u128,
// };
// let z = DyadicFloat128::new_from_f64(z.to_f64());
// (z + M_ONES).fast_as_f64()
// };
// }
0x3ff0_0000_0000_0000 => {
return if y_sign {
let z = DyadicFloat128::new_from_f64(x);
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let p = (z + ONES).reciprocal() + M_ONES;
p.fast_as_f64()
} else {
x
};
}
0x4000_0000_0000_0000 => {
const ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let z0 = DyadicFloat128::new_from_f64(x) + ONES;
let z = z0 * z0;
const M_ONES: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
return if y_sign {
(z.reciprocal() + M_ONES).fast_as_f64()
} else {
f64::copysign((z + M_ONES).fast_as_f64(), x)
};
}
_ => {}
}
// |y| > |1075 / log2(1 - 2^-53)|.
if y_a >= 0x7ff0_0000_0000_0000 {
// y is inf or nan
if y_mant != 0 {
// y is NaN
// pow(1, NaN) = 1
// pow(x, NaN) = NaN
return if x_u == 1f64.to_bits() { 1.0 } else { y };
}
// Now y is +-Inf
if f64::from_bits(x_abs).is_nan() {
// pow(NaN, +-Inf) = NaN
return x;
}
if x_a == 0x3ff0_0000_0000_0000 {
// pow(+-1, +-Inf) = 1.0
return 0.0;
}
if x == 0.0 && y_sign {
// pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
return f64::INFINITY;
}
// pow (|x| < 1, -inf) = +inf
// pow (|x| < 1, +inf) = 0.0
// pow (|x| > 1, -inf) = 0.0
// pow (|x| > 1, +inf) = +inf
return if (x_a < 1f64.to_bits()) == y_sign {
f64::INFINITY
} else {
-1.0
};
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return -1.0;
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return if out_is_neg { -1.0 } else { 1.0 };
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
// x is finite and negative, and y is a finite integer.
if x_sign {
if is_integer(y) {
if is_odd_integer(y) {
// sign = -1.0;
static CS: [f64; 2] = [1.0, -1.0];
// set sign to 1 for y even, to -1 for y odd
let y_parity = if (y.abs()) >= f64::from_bits(0x4340000000000000) {
0usize
} else {
(y as i64 & 0x1) as usize
};
s = CS[y_parity];
}
} else {
// pow( negative, non-integer ) = NaN
return f64::NAN;
}
}
// y is finite and non-zero.
if x_u == 1f64.to_bits() {
// pow(1, y) = 1
return 0.0;
}
if x == 0.0 {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
// pow(0, negative number) = inf
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// pow(0, positive number) = 0
return if out_is_neg { -0.0 } else { 0.0 };
}
if x_a == f64::INFINITY.to_bits() {
let out_is_neg = x_sign && is_odd_integer(y);
if y_sign {
return -1.;
}
return if out_is_neg {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_a > f64::INFINITY.to_bits() {
// x is NaN.
// pow (aNaN, 0) is already taken care above.
return x;
}
}
// evaluate (1+x)^y explicitly for integer y in [-1024,1024] range and |x|<2^64
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
let straight_path_precondition: bool = true;
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
let straight_path_precondition: bool = y.is_sign_positive();
// this is correct only for positive exponent number without FMA,
// otherwise reciprocal may overflow.
if is_integer(y)
&& y_a <= 0x4059800000000000u64
&& x_a <= 0x4090000000000000u64
&& x_a > 0x3cc0_0000_0000_0000
&& straight_path_precondition
{
let mut s = DoubleDouble::from_full_exact_add(1.0, x);
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 {
s
} else {
DoubleDouble::new(0., 1.)
};
while {
iter_count >>= 1;
iter_count
} != 0
{
s = DoubleDouble::mult(s, s);
if iter_count % 2 != 0 {
acc = DoubleDouble::mult(acc, s);
}
}
let mut dz = if y.is_sign_negative() {
acc.recip()
} else {
acc
};
dz = DoubleDouble::full_add_f64(dz, -1.);
let ub = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), -dz.hi, dz.lo); // 2^-59
let lb = dz.hi + f_fmla(f64::from_bits(0x3c40000000000000), dz.hi, dz.lo); // 2^-59
if ub == lb {
return dz.to_f64();
}
return mul_fixed_power_hard(x, y);
}
// approximate log1p(x)
let l = log1p_fast_dd(x);
let ey = ((y.to_bits() >> 52) & 0x7ff) as i32;
if ey < 0x36 || ey >= 0x7f5 {
return 0.;
}
let r = DoubleDouble::quick_mult_f64(l, y);
let res = pow_expm1_1(r, s);
res.to_f64()
}
#[cold]
#[inline(never)]
fn mul_fixed_power_hard(x: f64, y: f64) -> f64 {
const ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
const M_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let mut s = DyadicFloat128::new_from_f64(x) + ONE;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { ONE };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc = acc * s;
}
}
if y.is_sign_negative() {
(acc.reciprocal() + M_ONE).fast_as_f64()
} else {
(acc + M_ONE).fast_as_f64()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compound_exotic() {
assert_eq!(
f_compound_m1(0.000152587890625, -8.484374999999998),
-0.0012936766014690006
);
assert_eq!(
f_compound_m1(
0.00000000000000799360578102344,
-0.000000000000000000000001654361225106131
),
-0.000000000000000000000000000000000000013224311452909338
);
assert_eq!(
f_compound_m1( 4.517647064592699, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000055329046628180653),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009449932890153435
);
assert_eq!(f_compound_m1(
11944758478933760000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
-1242262631503757300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.,
), -1.);
}
#[test]
fn test_compound_m1() {
assert_eq!(
f_compound_m1(0.0000000000000009991998751296936, -4.),
-0.000000000000003996799500518764
);
assert_eq!(f_compound_m1(-0.003173828125, 25.), -0.0763960132649781);
assert_eq!(f_compound_m1(3., 2.8927001953125), 54.154259038961406);
assert_eq!(
f_compound_m1(-0.43750000000000044, 19.),
-0.9999821216263793
);
assert_eq!(
f_compound_m1(127712., -2.0000000000143525),
-0.9999999999386903
);
assert_eq!(
f_compound_m1(-0.11718749767214207, 2893226081485815000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(2418441935074801400000000., 512.),
f64::INFINITY
);
assert_eq!(
f_compound_m1(32.50198364245834, 128000.00000000093),
f64::INFINITY
);
assert_eq!(
f_compound_m1(1.584716796877785, 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004168916810703412),
0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003958869879428553
);
assert_eq!(
f_compound_m1(
-0.000000000000000000000000000000001997076793037533,
366577337071337140000000000000000f64
),
-0.5190938261758579
);
assert_eq!(f_compound_m1(2.1075630259863374, 0.5), 00.7628281328553664);
assert_eq!(f_compound_m1(2.1078916412661783, 0.5), 0.7629213372315222);
assert_eq!(f_compound_m1(3.0000000000001115, -0.5), -0.500000000000007);
assert_eq!(
f_compound_m1(0.0004873839215895903, 3.),
0.0014628645098045245
);
assert_eq!(f_compound_m1(-0.483765364602732, 3.), -0.862424399516842);
assert_eq!(f_compound_m1(3.0000001192092896, -2.), -0.9375000037252902);
assert_eq!(f_compound_m1(29.38323424607434, -1.), -0.9670871115332561);
assert_eq!(f_compound_m1(-0.4375, 4.), -0.8998870849609375);
assert_eq!(
f_compound_m1(-0.0039033182037826464, 3.),
-0.011664306402886494
);
assert_eq!(
f_compound_m1(0.000000000000000000000000000000000000007715336350455947,
-262034087537726030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.,
);
assert_eq!(f_compound_m1(10.000000059604645, 10.), 25937426005.44638);
assert_eq!(f_compound_m1(10., -308.25471555814863), -1.0);
assert_eq!(
f_compound_m1(5.4172231599824623E-312, 9.4591068440831498E+164),
5.124209266851586e-147
);
assert_eq!(
f_compound_m1(5.8776567263633397E-39, 3.4223548116804511E-310),
0.0
);
assert_eq!(
f_compound_m1(5.8639503496997932E-148, -7.1936801558778956E+305),
0.0
);
assert_eq!(
f_compound_m1(0.9908447265624999,
-19032028850336152000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006952247559980936,
5069789834563405000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
3.524643400695958e-163
);
assert_eq!(
f_compound_m1(1.000000000000341,
-69261261804788370000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-1.
);
assert_eq!(
f_compound_m1(
0.0000000000000001053438024827798,
0.0000000000000001053438024827798
),
0.000000000000000000000000000000011097316721530923
);
assert_eq!(
f_compound_m1(
0.00000000000000010755285551056508,
0.00000000000000010755285551056508
),
0.00000000000000000000000000000001156761672847649
);
assert_eq!(f_compound_m1(2.4324324, 1.4324324), 4.850778380908823);
assert_eq!(f_compound_m1(2., 5.), 242.);
assert_eq!(f_compound_m1(0.4324324, 126.4324324), 5.40545942023447e19);
assert!(f_compound_m1(-0.4324324, 126.4324324).is_nan());
assert_eq!(f_compound_m1(0.0, 0.0), 0.0);
assert_eq!(f_compound_m1(0.0, -1. / 2.), 0.0);
assert_eq!(f_compound_m1(-1., -1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(f64::INFINITY, -1. / 2.), -1.0);
assert_eq!(f_compound_m1(f64::INFINITY, 1. / 2.), f64::INFINITY);
assert_eq!(f_compound_m1(46.3828125, 46.3828125), 5.248159634773675e77);
}
}

438
vendor/pxfm/src/compound/compound_m1f.rs vendored Normal file
View File

@@ -0,0 +1,438 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, compoundf_exp2_poly2, compoundf_log2p1_accurate,
compoundf_log2p1_fast,
};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1_accurate_tiny;
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
// INVLOG2 = 1/log(2) * (1 + eps1) with |eps1| < 2^-55.976
const INVLOG2: f64 = f64::from_bits(0x3ff71547652b82fe);
#[cold]
#[inline(never)]
fn as_compoundm1f_special(x: f32, y: f32) -> f32 {
let nx = x.to_bits();
let ny = y.to_bits();
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ay == 0 {
// x or y is 0
if ax == 0 {
// compound(0,y) = 1 except for y = sNaN
return if y.is_nan() { x + y } else { 0.0 };
}
if ay == 0 {
// compound (x, 0)
if x.is_nan() {
return x + y;
} // x = sNaN
return if x < -1.0 {
f32::NAN // rule (g)
} else {
0.0
}; // rule (a)
}
}
let mone = (-1.0f32).to_bits();
if ay >= 0xffu32 << 24 {
// y=Inf/NaN
// the case x=0 was already checked above
if ax > 0xffu32 << 24 {
return x + y;
} // x=NaN
if ay == 0xffu32 << 24 {
// y = +/-Inf
if nx > mone {
return f32::NAN;
} // rule (g)
let sy = ny >> 31; // sign bit of y
if nx == mone {
return if sy == 0 {
-1. // Rule (c)
} else {
f32::INFINITY // Rule (b)
};
}
if x < 0.0 {
return if sy == 0 { -1. } else { f32::INFINITY };
}
if x > 0.0 {
return if sy != 0 { -1. } else { f32::INFINITY };
}
return 0.0;
}
return x + y; // case y=NaN
}
if nx >= mone || nx >= 0xffu32 << 23 {
// x is Inf, NaN or <= -1
if ax == 0xffu32 << 24 {
// x is +Inf or -Inf
if (nx >> 31) != 0 {
return f32::NAN;
} // x = -Inf, rule (g)
// (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
return (if (ny >> 31) != 0 { 1.0 / x } else { x }) - 1.;
}
if ax > 0xffu32 << 24 {
return x + y;
} // x is NaN
if nx > mone {
return f32::NAN; // x < -1.0: rule (g)
}
// now x = -1
return if (ny >> 31) != 0 {
// y < 0
f32::INFINITY
} else {
// y > 0
-1.0
};
}
-1.
}
/* for |z| <= 2^-6, returns an approximation of 2^z
with absolute error < 2^-43.540 */
#[inline]
pub(crate) fn compoundf_expf_poly(z: f64) -> f64 {
/* Q is a degree-4 polynomial generated by Sollya (cf compoundf_expf.sollya)
with absolute error < 2^-43.549 */
const Q: [u64; 5] = [
0x3fe62e42fefa39ef,
0x3fcebfbdff8098eb,
0x3fac6b08d7045dc3,
0x3f83b2b276ce985d,
0x3f55d8849c67ace4,
];
let z2 = z * z;
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
dd_fmla(c2, z2, c0) * z
}
/* return the correct rounding of (1+x)^y, otherwise -1.0
where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
assuming 0x1.7154759a0df53p-24 <= |t| <= 150
exact is non-zero iff (1+x)^y is exact or midpoint */
fn exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe { ((k.to_int_unchecked::<i64>() as u64).wrapping_add(0x3ffu64)) << 52 }; // k is already integer
s *= f64::from_bits(su);
let q_poly = compoundf_expf_poly(r);
v = q_poly.to_bits();
/* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
bounded by ulp(v) + 2^0.5s * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
< 2^-40.849. */
let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(f64::from_bits(v), s, s - 1f64).to_bits();
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(f64::from_bits(v), s);
v = DoubleDouble::add(z, p0).to_f64().to_bits();
}
// in case of potential underflow, we defer to the accurate path
if f64::from_bits(v) < f64::from_bits(0x3d61d00000000000) {
return -1.0;
}
err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
if lb != rb {
return -1.0;
} // rounding test failed
f64::from_bits(v)
}
fn compoundf_exp2m1_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
if y == 1.0 {
let res = x;
return res;
}
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
// if x_dd.hi.abs() <= f64::from_bits(0x3fc0000000000000u64) {
// /* the relative error between h and y*log2(1+x) is bounded by
// (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
// 2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
// The above threshold is such that h*(1+2^-48.444) < H0. */
// return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
// }
let k = x_dd.hi.round_ties_even_finite(); // |k| <= 150
// check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
/* the relative error between h and y*log2(1+x) is bounded by
(1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
The above threshold is such that h*(1+2^-48.444) < H0. */
// let z0 = 1.0 + x_dd.hi * 0.5;
// let k = Dekker::from_exact_sub(z0, 1.);
// return k.to_f64() as f32;
return exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
}
let r = x_dd.hi - k; // |r| <= 1/2, exact
// since r is an integer multiple of ulp(h), fast_two_sum() below is exact
let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
// h is near (i-16)/2^5
v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |h| <= 2^-6
// 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
let q = compoundf_exp2_poly2(v_dd);
/* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
|qh + ql - 2^(h+l)| < 2^-85.210 */
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
let mut q = DoubleDouble::quick_mult(exp2u, q);
q = DoubleDouble::from_exact_add(q.hi, q.lo);
let mut du = unsafe {
k.to_int_unchecked::<i64>()
.wrapping_add(0x3ff)
.wrapping_shl(52) as u64
};
du = f64::from_bits(du).to_bits();
let scale = f64::from_bits(du);
q.hi *= scale;
q.lo *= scale;
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(q.hi, -1.0);
q.lo += zf.lo;
q.hi = zf.hi;
v = q.to_f64().to_bits();
f64::from_bits(v) as f32
}
// at input, exact is non-zero iff (1+x)^y is exact
// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
#[cold]
#[inline(never)]
fn compoundm1f_accurate(x: f32, y: f32) -> f32 {
let mut v = compoundf_log2p1_accurate(x as f64);
v = DoubleDouble::quick_mult_f64(v, y as f64);
compoundf_exp2m1_accurate(v, x, y)
}
/// Computes compound (1.0 + x)^y - 1
///
/// Max ULP 0.5
#[inline]
pub fn f_compound_m1f(x: f32, y: f32) -> f32 {
/* Rules from IEEE 754-2019 for compound (x, n) with n integer:
(a) compound (x, 0) is 1 for x >= -1 or quiet NaN
(b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
(c) compound (-1, n) is +0 for n > 0
(d) compound (+/-0, n) is 1
(e) compound (+Inf, n) is +Inf for n > 0
(f) compound (+Inf, n) is +0 for n < 0
(g) compound (x, n) is qNaN and signals the invalid exception for x < -1
(h) compound (qNaN, n) is qNaN for n <> 0.
*/
let mone = (-1.0f32).to_bits();
let nx = x.to_bits();
let ny = y.to_bits();
if nx >= mone {
return as_compoundm1f_special(x, y);
} // x <= -1
// now x > -1
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
return as_compoundm1f_special(x, y);
} // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
// evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
if ax <= 0x62000000u32 {
return 1.0 + y * x;
} // does it work for |x|<2^-29 and |y|<=16?
let mut s = x as f64 + 1.;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
// exponentiation by squaring: O(log(y)) complexity
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc *= s;
}
}
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
return DoubleDouble::from_full_exact_add(dz, -1.).to_f64() as f32;
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = if ax < 0x62000000u32 {
// |x| < 2^-29
/* |log2(1+x) - 1/log(2) * (x - x^2/2)| < 2^-59.584 * |log2(1+x)|
(cf compoundf.sollya) */
let t = xd - (xd * xd) * 0.5;
/* since x is epresentable in binary32, x*x is exact, and so is (x * x) * 0.5.
Thus the only error in the computation of t is the final rounding, which
is bounded by ulp(t): t = (x - x^2/2) * (1 + eps2) with |eps2| < 2^-52
*/
INVLOG2 * t
/* since INVLOG2 = 1/log(2) * (1 + eps1) and
and t = (x - x^2/2) * (1 + eps2)
let u = o(INVLOG2 * t) then u = INVLOG2 * t * (1 + eps3) with |eps3|<2^-53
thus u = 1/log(2) * (x - x^2/2) * (1 + eps1)*(1 + eps2)*(1 + eps3)
= 1/log(2) * (x - x^2/2) * (1 + eps4) with |eps4| < 2^-50.954
Now Sollya says the relative error by approximating log2(1+x) by
1/log(2) * (x - x^2/2) for |x| < 2^-29 is bounded by 2^-59.584
(file compoundf.sollya), thus:
u = log2(1+x) * (1+eps4)*(1+eps5) with |eps5| < 2^-59.584
= log2(1+x) * (1+eps6) with |eps6| < 2^-50.950 */
} else {
compoundf_log2p1_fast(f64::from_bits(tx))
};
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let t: u64 = (l * f64::from_bits(ty)).to_bits();
/* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = exp2m1_fast(f64::from_bits(t));
if res != -1.0 {
return res as f32;
}
compoundm1f_accurate(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::compound::compound_m1f::{compoundf_exp2m1_accurate, exp2m1_fast};
use crate::double_double::DoubleDouble;
#[test]
fn test_compoundf() {
assert_eq!(
f_compound_m1f(-0.000000000000001191123, -0.000000000000001191123),
0.0000000000000000000000000000014187741
);
assert_eq!(f_compound_m1f(-0.000000000000001191123, 16.), 1.0);
assert_eq!(f_compound_m1f(0.91123, 16.), 31695.21);
assert_eq!(f_compound_m1f(0.91123, -16.), -0.99996847);
}
#[test]
fn test_compoundf_expm1_fast() {
assert_eq!(exp2m1_fast(3.764), 12.585539943149435);
}
#[test]
fn test_compoundf_expm1_accurate() {
assert_eq!(
compoundf_exp2m1_accurate(DoubleDouble::new(0., 2.74), 12., 53.),
5.680703,
);
}
}

1000
vendor/pxfm/src/compound/compoundf.rs vendored Normal file

File diff suppressed because it is too large Load Diff

41
vendor/pxfm/src/compound/mod.rs vendored Normal file
View File

@@ -0,0 +1,41 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod compound_d;
mod compound_m1;
mod compound_m1f;
mod compoundf;
mod powm1;
mod powm1f;
pub use compound_d::f_compound;
pub use compound_m1::f_compound_m1;
pub use compound_m1f::f_compound_m1f;
pub use compoundf::f_compoundf;
pub use powm1::f_powm1;
pub use powm1f::f_powm1f;

224
vendor/pxfm/src/compound/powm1.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{is_integer, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXPM1_T0, EXPM1_T1, ldexp};
use crate::pow_exec::pow_log_1;
use crate::round_ties_even::RoundTiesEven;
/// Computes x^y - 1
pub fn f_powm1(x: f64, y: f64) -> f64 {
let ax: u64 = x.to_bits().wrapping_shl(1);
let ay: u64 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0x7ffu64 << 53 || ay == 0 || ay >= 0x7ff64 << 53 {
if x.is_nan() || y.is_nan() {
return f64::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f64::INFINITY;
} else if y > 0.0 {
f64::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f64::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integer(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f64::INFINITY
} else {
f64::NEG_INFINITY
};
pow - 1.0
} else {
f64::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f64::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f64::INFINITY
}
} else {
// |x| == 1
f64::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f64::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integer(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f64::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integer(y) {
negative_parity = true;
}
}
let (mut l, _) = pow_log_1(x);
l = DoubleDouble::from_exact_add(l.hi, l.lo);
let r = DoubleDouble::quick_mult_f64(l, y);
if r.hi < -37.42994775023705 {
// underflow
return -1.;
}
let res = powm1_expm1_1(r);
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
DoubleDouble::full_add_f64(-res, -2.).to_f64()
} else {
res.to_f64()
}
}
#[inline]
pub(crate) fn powm1_expm1_1(r: DoubleDouble) -> DoubleDouble {
let ax = r.hi.to_bits() & 0x7fffffffffffffffu64;
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
if ax <= 0x3f80000000000000 {
// |x| < 2^-7
if ax < 0x3970000000000000 {
// |x| < 2^-104
return r;
}
let d = crate::pow_exec::expm1_poly_dd_tiny(r);
return d;
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).round_ties_even_finite();
let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
let bk = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
let tbh = DoubleDouble::quick_mult(t1, t0);
let mut de = tbh;
// exp(k)=2^k*exp(r) + (2^k - 1)
let q = crate::pow_exec::expm1_poly_fast(z);
de = DoubleDouble::quick_mult(de, q);
de = DoubleDouble::add(tbh, de);
let ie = mk - 0x3ff;
let off: f64 = f64::from_bits((2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64);
let e: f64;
if ie < 53 {
let fhz = DoubleDouble::from_exact_add(off, de.hi);
de.hi = fhz.hi;
e = fhz.lo;
} else if ie < 104 {
let fhz = DoubleDouble::from_exact_add(de.hi, off);
de.hi = fhz.hi;
e = fhz.lo;
} else {
e = 0.;
}
de.lo += e;
de.hi = ldexp(de.to_f64(), ie as i32);
de.lo = 0.;
de
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1() {
assert_eq!(f_powm1(f64::INFINITY, f64::INFINITY), f64::INFINITY);
assert_eq!(f_powm1(50850368932909610000000000., 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000023201985303960773), 1.3733470789307166e-303);
assert_eq!(f_powm1(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1(1.83329e-40, 2.4645883e-32), -2.255031542428047e-30);
assert_eq!(f_powm1(3., 2.), 8.);
assert_eq!(f_powm1(3., 3.), 26.);
assert_eq!(f_powm1(5., 2.), 24.);
assert_eq!(f_powm1(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1(-5., 2.), 24.);
assert_eq!(f_powm1(-5., 3.), -126.);
assert_eq!(
f_powm1(196560., 0.000000000000000000000000000000000000001193773),
1.4550568430468268e-38
);
}
}

260
vendor/pxfm/src/compound/powm1f.rs vendored Normal file
View File

@@ -0,0 +1,260 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::*;
use crate::compound::compound_m1f::compoundf_expf_poly;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, LOG2P1_COMPOUNDF_INV, LOG2P1_COMPOUNDF_LOG2_INV,
};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
#[inline]
fn powm1f_log2_fast(x: f64) -> f64 {
/* for x > 0, 1+x is exact when 2^-29 <= x < 2^53
for x < 0, 1+x is exact when -1 < x <= 2^-30 */
// double u = (x >= 0x1p53) ? x : 1.0 + x;
/* For x < 0x1p53, x + 1 is exact thus u = x+1.
For x >= 2^53, we estimate log2(x) instead of log2(1+x),
since log2(1+x) = log2(x) + log2(1+1/x),
log2(x) >= 53 and |log2(1+1/x)| < 2^-52.471, the additional relative
error is bounded by 2^-52.471/53 < 2^-58.198 */
let mut v = x.to_bits();
let m: u64 = v & 0xfffffffffffffu64;
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
// 2^e/sqrt(2) < u < 2^e*sqrt(2), with -29 <= e <= 128
v = v.wrapping_sub((e << 52) as u64);
let t = f64::from_bits(v);
// u = 2^e*t with 1/sqrt(2) < t < sqrt(2)
// thus log2(u) = e + log2(t)
v = (f64::from_bits(v) + 2.0).to_bits(); // add 2 so that v.f is always in the binade [2, 4)
let i = (v >> 45) as i32 - 0x2002d; // 0 <= i <= 45
let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
let z = dd_fmla(r, t, -1.0); // exact, -1/64 <= z <= 1/64
// we approximates log2(t) by -log2(r) + log2(r*t)
let p = crate::compound::compoundf::log2p1_polyeval_1(z);
// p approximates log2(r*t) with rel. error < 2^-49.642, and |p| < 2^-5.459
e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}
/// Computes x^y - 1
pub fn f_powm1f(x: f32, y: f32) -> f32 {
let ax: u32 = x.to_bits().wrapping_shl(1);
let ay: u32 = y.to_bits().wrapping_shl(1);
// filter out exceptional cases
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
if x.is_nan() || y.is_nan() {
return f32::NAN;
}
// Handle infinities
if x.is_infinite() {
return if x.is_sign_positive() {
if y.is_infinite() {
return f32::INFINITY;
} else if y > 0.0 {
f32::INFINITY // inf^positive -> inf
} else if y < 0.0 {
-1.0 // inf^negative -> 0, so powm1 = -1
} else {
f32::NAN // inf^0 -> NaN (0^0 conventionally 1, inf^0 = NaN)
}
} else {
// x = -inf
if y.is_infinite() {
return -1.0;
}
if is_integerf(y) {
// Negative base: (-inf)^even = +inf, (-inf)^odd = -inf
let pow = if y as i32 % 2 == 0 {
f32::INFINITY
} else {
f32::NEG_INFINITY
};
pow - 1.0
} else {
f32::NAN // Negative base with non-integer exponent
}
};
}
// Handle y infinite
if y.is_infinite() {
return if x.abs() > 1.0 {
if y.is_sign_positive() {
f32::INFINITY
} else {
-1.0
}
} else if x.abs() < 1.0 {
if y.is_sign_positive() {
-1.0
} else {
f32::INFINITY
}
} else {
// |x| == 1
f32::NAN // 1^inf or -1^inf is undefined
};
}
// Handle zero base
if x == 0.0 {
return if y > 0.0 {
-1.0 // 0^positive -> 0, powm1 = -1
} else if y < 0.0 {
f32::INFINITY // 0^negative -> inf
} else {
0.0 // 0^0 -> conventionally 1, powm1 = 0
};
}
}
let y_integer = is_integerf(y);
let mut negative_parity: bool = false;
let mut x = x;
// Handle negative base with non-integer exponent
if x < 0.0 {
if !y_integer {
return f32::NAN; // x < 0 and non-integer y
}
x = x.abs();
if is_odd_integerf(y) {
negative_parity = true;
}
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = powm1f_log2_fast(f64::from_bits(tx));
/* l approximates log2(1+x) with relative error < 2^-47.997,
and 2^-149 <= |l| < 128 */
let dt = l * f64::from_bits(ty);
let t: u64 = dt.to_bits();
// detect overflow/underflow
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
// |t| >= 128
if t >= 0x3018bu64 << 46 {
// t <= -150
return -1.;
} else if (t >> 63) == 0 {
// t >= 128: overflow
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = powm1_exp2m1_fast(f64::from_bits(t));
// For x < 0 and integer y = n:
// if n is even: x^n = |x|^n → powm1 = |x|^n - 1 (same sign as res).
// if n is odd: x^n = -|x|^n → powm1 = -|x|^n - 1 = - (|x|^n + 1).
if negative_parity {
(-res - 2.) as f32
} else {
res as f32
}
}
#[inline]
pub(crate) fn powm1_exp2m1_fast(t: f64) -> f64 {
let k = t.round_ties_even_finite(); // 0 <= |k| <= 150
let mut r = t - k; // |r| <= 1/2, exact
let mut v: f64 = 3.015625 + r; // 2.5 <= v <= 3.5015625
// we add 2^-6 so that i is rounded to nearest
let i: i32 = (v.to_bits() >> 46) as i32 - 0x10010; // 0 <= i <= 32
r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
// now |r| <= 2^-6
// 2^t = 2^k * exp2_U[i][0] * 2^r
let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe {
k.to_int_unchecked::<i64>().wrapping_shl(52) // k is already integer
};
s = f64::from_bits(s.to_bits().wrapping_add(su as u64));
let q_poly = compoundf_expf_poly(r);
v = q_poly;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
v = f_fmla(v, s, s - 1f64);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::double_double::DoubleDouble;
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(v, s);
v = DoubleDouble::add(z, p0).to_f64();
}
v
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_powm1f() {
assert_eq!(f_powm1f(1.83329e-40, 2.4645883e-32), -2.2550315e-30);
assert_eq!(f_powm1f(f32::INFINITY, f32::INFINITY), f32::INFINITY);
assert_eq!(f_powm1f(-3.375, -9671689000000000000000000.), -1.);
assert_eq!(f_powm1f(3., 2.), 8.);
assert_eq!(f_powm1f(3., 3.), 26.);
assert_eq!(f_powm1f(5., 2.), 24.);
assert_eq!(f_powm1f(5., -2.), 1. / 25. - 1.);
assert_eq!(f_powm1f(-5., 2.), 24.);
assert_eq!(f_powm1f(-5., 3.), -126.);
assert_eq!(
f_powm1f(196560., 0.000000000000000000000000000000000000001193773),
1.455057e-38
);
assert!(f_powm1f(f32::NAN, f32::INFINITY).is_nan());
assert!(f_powm1f(f32::INFINITY, f32::NAN).is_nan());
}
}

219
vendor/pxfm/src/cosm1.rs vendored Normal file
View File

@@ -0,0 +1,219 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::f_polyeval4;
use crate::sin::{range_reduction_small, sincos_eval};
use crate::sin_helper::sincos_eval_dd;
use crate::sin_table::SIN_K_PI_OVER_128;
use crate::sincos_reduce::LargeArgumentReduction;
#[cold]
#[inline(never)]
fn cosm1_accurate(y: DoubleDouble, sin_k: DoubleDouble, cos_k: DoubleDouble) -> f64 {
let r_sincos = sincos_eval_dd(y);
// k is an integer and -pi / 256 <= y <= pi / 256.
// Then sin(x) = sin((k * pi/128 + y)
// = sin(y) * cos(k*pi/128) + cos(y) * sin(k*pi/128)
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
let mut rr = DoubleDouble::full_dd_add(sin_k_cos_y, cos_k_sin_y);
// Computing cos(x) - 1 as follows:
// cos(x) - 1 = -2*sin^2(x/2)
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = DoubleDouble::quick_mult(rr, rr);
rr = DoubleDouble::quick_mult_f64(rr, -2.);
rr.to_f64()
}
#[cold]
fn cosm1_tiny_hard(x: f64) -> f64 {
// Generated by Sollya:
// d = [2^-27, 2^-7];
// f_cosm1 = cos(x) - 1;
// Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, 107...|], d);
// See ./notes/cosm1_hard.sollya
const C: [(u64, u64); 3] = [
(0x3c453997dc8ae20d, 0x3fa5555555555555),
(0x3bf6100c76a1827a, 0xbf56c16c16c15749),
(0x3b918f45acdd1fb2, 0x3efa019ddf5a583a),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let mut p = DoubleDouble::mul_add(
x2,
DoubleDouble::from_bit_pair(C[2]),
DoubleDouble::from_bit_pair(C[1]),
);
p = DoubleDouble::mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
p = DoubleDouble::mul_add_f64(x2, p, f64::from_bits(0xbfe0000000000000));
p = DoubleDouble::quick_mult(p, x2);
p.to_f64()
}
/// Computes cos(x) - 1
pub fn f_cosm1(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let y: DoubleDouble;
let k;
let mut argument_reduction = LargeArgumentReduction::default();
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
if x_e < E_BIAS + 16 {
// |x| < 2^-7
if x_e < E_BIAS - 7 {
// |x| < 2^-26
if x_e < E_BIAS - 27 {
// Signed zeros.
if x == 0.0 {
return 0.0;
}
// Taylor expansion for small cos(x) - 1 ~ -x^2/2 + x^4/24 + O(x^6)
let x_sqr = x * x;
const A0: f64 = -1. / 2.;
const A1: f64 = 1. / 24.;
let r0 = f_fmla(x_sqr, A1, A0);
return r0 * x_sqr;
}
// Generated by Sollya:
// d = [2^-27, 2^-7];
// f_cosm1 = (cos(x) - 1);
// Q = fpminimax(f_cosm1, [|2,4,6,8|], [|0, D...|], d);
// See ./notes/cosm1.sollya
let x2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval4(
x2.hi,
f64::from_bits(0xbfe0000000000000),
f64::from_bits(0x3fa5555555555555),
f64::from_bits(0xbf56c16c16b9c2b7),
f64::from_bits(0x3efa014d03f38855),
);
let r = DoubleDouble::quick_mult_f64(x2, p);
let eps = x * f_fmla(
x2.hi,
f64::from_bits(0x3d00000000000000), // 2^-47
f64::from_bits(0x3be0000000000000), // 2^-65
);
let ub = r.hi + (r.lo + eps);
let lb = r.hi + (r.lo - eps);
if ub == lb {
return r.to_f64();
}
return cosm1_tiny_hard(x);
} else {
// // Small range reduction.
(y, k) = range_reduction_small(x * 0.5);
}
} else {
// Inf or NaN
if x_e > 2 * E_BIAS {
// cos(+-Inf) = NaN
return x + f64::NAN;
}
// Large range reduction.
// k = argument_reduction.high_part(x);
(k, y) = argument_reduction.reduce(x * 0.5);
}
// Computing cos(x) - 1 as follows:
// cos(x) - 1 = -2*sin^2(x/2)
let r_sincos = sincos_eval(y);
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
let sin_k = DoubleDouble::from_bit_pair(sk);
let cos_k = DoubleDouble::from_bit_pair(ck);
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
// sin_k_cos_y is always >> cos_k_sin_y
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = DoubleDouble::quick_mult(rr, rr);
rr = DoubleDouble::quick_mult_f64(rr, -2.);
let rlp = rr.lo + r_sincos.err;
let rlm = rr.lo - r_sincos.err;
let r_upper = rr.hi + rlp; // (rr.lo + ERR);
let r_lower = rr.hi + rlm; // (rr.lo - ERR);
// Ziv's accuracy test
if r_upper == r_lower {
return rr.to_f64();
}
cosm1_accurate(y, sin_k, cos_k)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_cosm1f_test() {
assert_eq!(f_cosm1(0.0017700195313803402), -0.000001566484161754997);
assert_eq!(
f_cosm1(0.0000000011641532182693484),
-0.0000000000000000006776263578034406
);
assert_eq!(f_cosm1(0.006164513528517324), -0.000019000553351160402);
assert_eq!(f_cosm1(6.2831853071795862), -2.999519565323715e-32);
assert_eq!(f_cosm1(0.00015928394), -1.2685686744140693e-8);
assert_eq!(f_cosm1(0.0), 0.0);
assert_eq!(f_cosm1(0.0), 0.0);
assert_eq!(f_cosm1(std::f64::consts::PI), -2.);
assert_eq!(f_cosm1(0.5), -0.12241743810962728);
assert_eq!(f_cosm1(0.7), -0.23515781271551153);
assert_eq!(f_cosm1(1.7), -1.1288444942955247);
assert!(f_cosm1(f64::INFINITY).is_nan());
assert!(f_cosm1(f64::NEG_INFINITY).is_nan());
assert!(f_cosm1(f64::NAN).is_nan());
assert_eq!(f_cosm1(0.0002480338), -3.0760382813519806e-8);
}
}

152
vendor/pxfm/src/csc.rs vendored Normal file
View File

@@ -0,0 +1,152 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::sin::{get_sin_k_rational, range_reduction_small, sincos_eval};
use crate::sin_table::SIN_K_PI_OVER_128;
use crate::sincos_dyadic::{range_reduction_small_f128, sincos_eval_dyadic};
use crate::sincos_reduce::LargeArgumentReduction;
#[cold]
fn csc_accurate(x: f64, argument_reduction: &mut LargeArgumentReduction, x_e: u64, k: u64) -> f64 {
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let u_f128 = if x_e < EXP_BIAS + 16 {
range_reduction_small_f128(x)
} else {
argument_reduction.accurate()
};
let sin_cos = sincos_eval_dyadic(&u_f128);
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sin_k_f128 = get_sin_k_rational(k);
let cos_k_f128 = get_sin_k_rational(k.wrapping_add(64));
// sin(x) = sin(k * pi/128 + u)
// = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
let r = (sin_k_f128 * sin_cos.v_cos) + (cos_k_f128 * sin_cos.v_sin);
r.reciprocal().fast_as_f64()
}
/// Cosecant for double precision
///
/// ULP 0.5
pub fn f_csc(x: f64) -> f64 {
let x_e = (x.to_bits() >> 52) & 0x7ff;
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let y: DoubleDouble;
let k;
let mut argument_reduction = LargeArgumentReduction::default();
// |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA)
if x_e < E_BIAS + 16 {
// |x| < 2^-26
if x_e < E_BIAS - 26 {
// Signed zeros.
if x == 0.0 {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if x_e < E_BIAS - 52 {
return 1. / x;
}
// For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
let rcp = DoubleDouble::from_quick_recip(x);
return DoubleDouble::f64_mul_f64_add(x, f64::from_bits(0x3fc5555555555555), rcp)
.to_f64();
}
// // Small range reduction.
(y, k) = range_reduction_small(x);
} else {
// Inf or NaN
if x_e > 2 * E_BIAS {
// sin(+-Inf) = NaN
return x + f64::NAN;
}
// Large range reduction.
(k, y) = argument_reduction.reduce(x);
}
let r_sincos = sincos_eval(y);
// Fast look up version, but needs 256-entry table.
// cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
let sk = SIN_K_PI_OVER_128[(k & 255) as usize];
let ck = SIN_K_PI_OVER_128[((k.wrapping_add(64)) & 255) as usize];
let sin_k = DoubleDouble::from_bit_pair(sk);
let cos_k = DoubleDouble::from_bit_pair(ck);
let sin_k_cos_y = DoubleDouble::quick_mult(r_sincos.v_cos, sin_k);
let cos_k_sin_y = DoubleDouble::quick_mult(r_sincos.v_sin, cos_k);
// sin_k_cos_y is always >> cos_k_sin_y
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::from_exact_add(rr.hi, rr.lo);
rr = rr.recip();
let rlp = rr.lo + r_sincos.err;
let rlm = rr.lo - r_sincos.err;
let r_upper = rr.hi + rlp; // (rr.lo + ERR);
let r_lower = rr.hi + rlm; // (rr.lo - ERR);
// Ziv's accuracy test
if r_upper == r_lower {
return rr.to_f64();
}
csc_accurate(x, &mut argument_reduction, x_e, k)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csc() {
assert_eq!(f_csc(0.000000014901161055069778), 67108864.62500001);
assert_eq!(f_csc( 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000541722315998), f64::INFINITY);
assert_eq!(f_csc(0.0), f64::INFINITY);
assert_eq!(f_csc(-0.0), f64::NEG_INFINITY);
assert!(f_csc(f64::NAN).is_nan());
assert_eq!(f_csc(1.0), 1.1883951057781212);
assert_eq!(f_csc(-0.5), -2.085829642933488);
}
}

137
vendor/pxfm/src/cube_roots/cbrt.rs vendored Normal file
View File

@@ -0,0 +1,137 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::cube_roots::cbrtf::halley_refine_d;
use crate::double_double::DoubleDouble;
use crate::exponents::fast_ldexp;
use crate::polyeval::f_polyeval4;
/// Computes cube root
///
/// Max found ULP 0.5
pub fn f_cbrt(x: f64) -> f64 {
// 1; 2^{1/3}; 2^{2/3}
static ESCALE: [f64; 3] = [
1.0,
f64::from_bits(0x3ff428a2f98d728b),
f64::from_bits(0x3ff965fea53d6e3d),
];
let bits = x.to_bits();
let mut exp = ((bits >> 52) & 0x7ff) as i32;
let mut mant = bits & ((1u64 << 52) - 1);
if exp == 0x7ff || x == 0.0 {
return x + x;
}
// Normalize subnormal
if exp == 0 && x != 0.0 {
let norm = x * f64::from_bits(0x4350000000000000); // * 2^54
let norm_bits = norm.to_bits();
mant = norm_bits & ((1u64 << 52) - 1);
exp = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
}
exp -= 1023;
mant |= 0x3ff << 52;
let m = f64::from_bits(mant);
// Polynomial for x^(1/3) on [1.0; 2.0]
// Generated by Sollya:
// d = [1.0, 2.0];
// f_cbrt = x^(1/3);
// Q = fpminimax(f_cbrt, 4, [|D...|], d, relative, floating);
// See ./notes/cbrt.sollya
let p = f_polyeval4(
m,
f64::from_bits(0x3fe1b0babceeaafa),
f64::from_bits(0x3fe2c9a3e8e06a3c),
f64::from_bits(0xbfc4dc30afb71885),
f64::from_bits(0x3f97a8d3e05458e4),
);
// split exponent e = 3*q + r with r in {0,1,2}
// use div_euclid/rem_euclid to get r >= 0
let q = exp.div_euclid(3);
let rem_scale = exp.rem_euclid(3);
let z = p * ESCALE[rem_scale as usize];
let mm = fast_ldexp(m, rem_scale); // bring mantissa into [1;8]
let r = 1.0 / mm;
// One Halley's method step
// then refine in partial double-double precision with Newton-Raphson iteration
let y0 = halley_refine_d(z, mm);
let d2y = DoubleDouble::from_exact_mult(y0, y0);
let d3y = DoubleDouble::quick_mult_f64(d2y, y0);
// Newton-Raphson step
// h = (x^3 - a) * r
// y1 = y0 - 1/3 * h * y0
let h = ((d3y.hi - mm) + d3y.lo) * r;
// y1 = y0 - 1/3*y0*(h.lo + h.hi) = y0 - 1/3 *y0*h.lo - 1/3 * y0 * h.hi
let y = f_fmla(-f64::from_bits(0x3fd5555555555555), y0 * h, y0);
f64::copysign(fast_ldexp(y, q), x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cbrt() {
assert_eq!(f_cbrt(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005432309223745),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017579026781511548);
assert_eq!(f_cbrt(1.225158611559834), 1.0700336588124544);
assert_eq!(f_cbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158), 1.1173329935611586e-103);
assert_eq!(f_cbrt(27.0), 3.0);
assert_eq!(f_cbrt(64.0), 4.0);
assert_eq!(f_cbrt(125.0), 5.0);
assert_eq!(f_cbrt(216.0), 6.0);
assert_eq!(f_cbrt(343.0), 7.0);
assert_eq!(f_cbrt(512.0), 8.0);
assert_eq!(f_cbrt(729.0), 9.0);
assert_eq!(f_cbrt(-729.0), -9.0);
assert_eq!(f_cbrt(-512.0), -8.0);
assert_eq!(f_cbrt(-343.0), -7.0);
assert_eq!(f_cbrt(-216.0), -6.0);
assert_eq!(f_cbrt(-125.0), -5.0);
assert_eq!(f_cbrt(-64.0), -4.0);
assert_eq!(f_cbrt(-27.0), -3.0);
assert_eq!(f_cbrt(0.0), 0.0);
assert_eq!(f_cbrt(f64::INFINITY), f64::INFINITY);
assert_eq!(f_cbrt(f64::NEG_INFINITY), f64::NEG_INFINITY);
assert!(f_cbrt(f64::NAN).is_nan());
}
}

129
vendor/pxfm/src/cube_roots/cbrtf.rs vendored Normal file
View File

@@ -0,0 +1,129 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
#[inline(always)]
pub(crate) fn halley_refine_d(x: f64, a: f64) -> f64 {
let tx = x * x * x;
x * f_fmla(2., a, tx) / f_fmla(2., tx, a)
}
#[inline(always)]
const fn halley_refine(x: f32, a: f32) -> f32 {
let tx = x * x * x;
x * (tx + 2f32 * a) / (2f32 * tx + a)
}
/// Cbrt for given value for const context.
/// This is simplified version just to make a good approximation on const context.
#[inline]
pub const fn cbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if au == 0 {
return x; /* +-0 */
}
}
const B1: u32 = 709958130;
let mut t: f32;
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
hx = (hx / 3).wrapping_add(B1);
ui &= 0x80000000;
ui |= hx;
t = f32::from_bits(ui);
t = halley_refine(t, x);
halley_refine(t, x)
}
/// Computes cube root
///
/// Peak ULP on 64 bit = 0.49999577
#[inline]
pub fn f_cbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if au == 0 {
return x; /* +-0 */
}
}
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
if hx < 0x00800000 {
/* zero or subnormal? */
if hx == 0 {
return x; /* cbrt(+-0) is itself */
}
const TWO_EXP_24: f32 = f32::from_bits(0x4b800000);
ui = (x * TWO_EXP_24).to_bits();
hx = ui & 0x7fffffff;
const B2: u32 = 642849266;
hx = (hx / 3).wrapping_add(B2);
} else {
const B1: u32 = 709958130;
hx = (hx / 3).wrapping_add(B1);
}
ui &= 0x80000000;
ui |= hx;
let mut t = f32::from_bits(ui) as f64;
let dx = x as f64;
t = halley_refine_d(t, dx);
halley_refine_d(t, dx) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fcbrtf() {
assert_eq!(f_cbrtf(0.0), 0.0);
assert_eq!(f_cbrtf(-27.0), -3.0);
assert_eq!(f_cbrtf(27.0), 3.0);
assert_eq!(f_cbrtf(64.0), 4.0);
assert_eq!(f_cbrtf(-64.0), -4.0);
assert_eq!(f_cbrtf(f32::NEG_INFINITY), f32::NEG_INFINITY);
assert_eq!(f_cbrtf(f32::INFINITY), f32::INFINITY);
assert!(f_cbrtf(f32::NAN).is_nan());
}
}

37
vendor/pxfm/src/cube_roots/mod.rs vendored Normal file
View File

@@ -0,0 +1,37 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod cbrt;
mod cbrtf;
mod rcbrt;
mod rcbrtf;
pub use cbrt::f_cbrt;
pub use cbrtf::{cbrtf, f_cbrtf};
pub use rcbrt::f_rcbrt;
pub use rcbrtf::f_rcbrtf;

191
vendor/pxfm/src/cube_roots/rcbrt.rs vendored Normal file
View File

@@ -0,0 +1,191 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::exponents::fast_ldexp;
use crate::polyeval::f_polyeval6;
//
// // y1 = y0 + 1/3 * y0 * (1 - a * y0 * y0 * y0)
// #[inline]
// fn raphson_step(x: f64, a: f64) -> f64 {
// let h = f_fmla(-a * x, x * x, 1.0);
// f_fmla(1. / 3. * h, x, x)
// }
// y1 = y0(k1 c(k2 k3c), c = x*y0*y0*y0
// k1 = 14/9 , k2 = 7/9 , k3 = 2/9
#[inline(always)]
fn halleys_div_free(x: f64, a: f64) -> f64 {
const K3: f64 = 2. / 9.;
const K2: f64 = 7. / 9.;
const K1: f64 = 14. / 9.;
let c = a * x * x * x;
let mut y = f_fmla(-K3, c, K2);
y = f_fmla(-c, y, K1);
y * x
}
/// Computes 1/cbrt(x)
///
/// ULP 0.5
pub fn f_rcbrt(a: f64) -> f64 {
// Decompose a = m * 2^e, with m in [0.5, 1)
let xu = a.to_bits();
let exp = ((xu >> 52) & 0x7ff) as i32;
let mut e = ((xu >> 52) & 0x7ff) as i32;
let mut mant = xu & ((1u64 << 52) - 1);
if exp == 0x7ff {
if a.is_infinite() {
return if a.is_sign_negative() { -0.0 } else { 0.0 };
}
return a + a;
}
if exp == 0 && a == 0. {
return if a.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
// Normalize subnormal
if exp == 0 {
let norm = a * f64::from_bits(0x4350000000000000); // * 2^54
let norm_bits = norm.to_bits();
mant = norm_bits & ((1u64 << 52) - 1);
e = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
}
e -= 1023;
mant |= 0x3ff << 52;
let m = f64::from_bits(mant);
// Polynomial for x^(-1/3) on [1.0; 2.0]
// Generated by Sollya:
// d = [1.0, 2.0];
// f_inv_cbrt = x^(-1/3);
// Q = fpminimax(f_inv_cbrt, 5, [|D...|], d, relative, floating);
// See ./notes/inv_cbrt.sollya
let p = f_polyeval6(
m,
f64::from_bits(0x3ffc7f365bceaf71),
f64::from_bits(0xbff90e741fb9c896),
f64::from_bits(0x3ff3e68b9b2cd237),
f64::from_bits(0xbfe321c5eb24a185),
f64::from_bits(0x3fc3fa269b897f69),
f64::from_bits(0xbf916d6f13849fd1),
);
// split exponent e = 3*q + r with r in {0,1,2}
// use div_euclid/rem_euclid to get r >= 0
let q = e.div_euclid(3);
let rem_scale = e.rem_euclid(3);
// 1; 2^{-1/3}; 2^{-2/3}
static ESCALE: [u64; 3] = [1.0f64.to_bits(), 0x3fe965fea53d6e3d, 0x3fe428a2f98d728b];
let z = p * f64::from_bits(ESCALE[rem_scale as usize]);
let mm = fast_ldexp(m, rem_scale); // bring domain into [1;8]
// One Halley's method step
// then refine in partial double-double precision with Newton-Raphson iteration
let y0 = halleys_div_free(z, mm);
let d2y = DoubleDouble::from_exact_mult(y0, y0);
let d3y = DoubleDouble::quick_mult_f64(d2y, y0);
let hb = DoubleDouble::quick_mult_f64(d3y, mm);
let y: f64;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
// decompose double-double in linear FMA sums
// r = (1.0 - hb.hi - hb.lo) * y0 = y0 - hb.hi * y0 - hb.lo * y0 = fma(-hb.lo, y0, fma(-hb.hi, y0, y0))
let r = f_fmla(-hb.lo, y0, f_fmla(hb.hi, -y0, y0));
// // y1 = y0 + 1/3 * y0 * (1 - a * y0 * y0 * y0) = y0 + 1/3 * r
y = f_fmla(1. / 3., r, y0);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let m_hb = DoubleDouble::full_add_f64(-hb, 1.0);
let r = DoubleDouble::quick_mult_f64(m_hb, y0);
y = f_fmla(1. / 3., r.to_f64(), y0);
}
f64::copysign(fast_ldexp(y, -q), a)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rcbrt() {
assert_eq!(f_rcbrt(0.9999999999999717), 1.0000000000000095);
assert_eq!(f_rcbrt(-68355745214719140000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-0.000000000000000000000000000000000000000002445728958868668);
assert_eq!(f_rcbrt(-96105972807656840000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
-0.0000000000000000000000000000000000000000000000000000000002183148143573148);
assert_eq!(f_rcbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158),
8949883389846071000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
assert_eq!(f_rcbrt(0.00008386280387617153), 22.846001824951983);
assert_eq!(f_rcbrt(-125.0), -0.2);
assert_eq!(f_rcbrt(125.0), 0.2);
assert_eq!(f_rcbrt(1.0), 1.0);
assert_eq!(f_rcbrt(-1.0), -1.0);
assert_eq!(f_rcbrt(0.0), f64::INFINITY);
assert_eq!(f_rcbrt(-27.0), -1. / 3.);
assert_eq!(
f_rcbrt(2417851639214765300000000.),
0.000000007450580596938716
);
assert_eq!(f_rcbrt(27.0), 1. / 3.);
assert_eq!(f_rcbrt(64.0), 0.25);
assert_eq!(f_rcbrt(-64.0), -0.25);
assert_eq!(f_rcbrt(f64::NEG_INFINITY), -0.0);
assert_eq!(f_rcbrt(f64::INFINITY), 0.0);
assert!(f_rcbrt(f64::NAN).is_nan());
}
}

122
vendor/pxfm/src/cube_roots/rcbrtf.rs vendored Normal file
View File

@@ -0,0 +1,122 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// // y1 = y0 * (2+x*y0^3)/(1+2*x*y0^3)
// #[inline(always)]
// fn halley_refine_d(x: f64, a: f64) -> f64 {
// let tx = x * x * x;
// x * f_fmla(tx, a, 2.0) / f_fmla(2. * a, tx, 1.0)
// }
#[inline(always)]
fn rapshon_refine_inv_cbrt(x: f64, a: f64) -> f64 {
x * f_fmla(-1. / 3. * a, x * x * x, 4. / 3.)
}
// y1 = y0(k1 c(k2 k3c), c = x*y0*y0*y0
// k1 = 14/9 , k2 = 7/9 , k3 = 2/9
#[inline(always)]
fn halleys_div_free(x: f64, a: f64) -> f64 {
const K3: f64 = 2. / 9.;
const K2: f64 = 7. / 9.;
const K1: f64 = 14. / 9.;
let c = a * x * x * x;
let mut y = f_fmla(-K3, c, K2);
y = f_fmla(-c, y, K1);
y * x
}
/// Computes 1/cbrt(x)
///
/// ULP 0.5
#[inline]
pub fn f_rcbrtf(x: f32) -> f32 {
let u = x.to_bits();
let au = u.wrapping_shl(1);
if au < (1u32 << 24) || au >= (0xffu32 << 24) {
if x.is_infinite() {
return if x.is_sign_negative() { -0.0 } else { 0.0 };
}
if au >= (0xffu32 << 24) {
return x + x; /* inf, nan */
}
if x == 0. {
return if x.is_sign_positive() {
f32::INFINITY
} else {
f32::NEG_INFINITY
}; /* +-inf */
}
}
let mut ui: u32 = x.to_bits();
let mut hx: u32 = ui & 0x7fffffff;
if hx < 0x00800000 {
/* zero or subnormal? */
if hx == 0 {
return x; /* cbrt(+-0) is itself */
}
const TWO_EXP_24: f32 = f32::from_bits(0x4b800000);
ui = (x * TWO_EXP_24).to_bits();
hx = ui & 0x7fffffff;
const B: u32 = 0x54a21d2au32 + (8u32 << 23);
hx = B.wrapping_sub(hx / 3);
} else {
hx = 0x54a21d2au32.wrapping_sub(hx / 3);
}
ui &= 0x80000000;
ui |= hx;
let t = f32::from_bits(ui) as f64;
let dx = x as f64;
let mut t = halleys_div_free(t, dx);
t = halleys_div_free(t, dx);
t = rapshon_refine_inv_cbrt(t, dx);
t as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fcbrtf() {
assert_eq!(f_rcbrtf(0.0), f32::INFINITY);
assert_eq!(f_rcbrtf(-0.0), f32::NEG_INFINITY);
assert_eq!(f_rcbrtf(-27.0), -1. / 3.);
assert_eq!(f_rcbrtf(27.0), 1. / 3.);
assert_eq!(f_rcbrtf(64.0), 0.25);
assert_eq!(f_rcbrtf(-64.0), -0.25);
assert_eq!(f_rcbrtf(f32::NEG_INFINITY), -0.0);
assert_eq!(f_rcbrtf(f32::INFINITY), 0.0);
assert!(f_rcbrtf(f32::NAN).is_nan());
}
}

1012
vendor/pxfm/src/double_double.rs vendored Normal file

File diff suppressed because it is too large Load Diff

881
vendor/pxfm/src/dyadic_float.rs vendored Normal file
View File

@@ -0,0 +1,881 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::bits::EXP_MASK;
use crate::common::f_fmla;
use std::ops::{Add, Mul, Sub};
#[repr(u8)]
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub(crate) enum DyadicSign {
Pos = 0,
Neg = 1,
}
impl DyadicSign {
#[inline]
pub(crate) fn negate(self) -> Self {
match self {
DyadicSign::Pos => DyadicSign::Neg,
DyadicSign::Neg => DyadicSign::Pos,
}
}
#[inline]
pub(crate) const fn to_bit(self) -> u8 {
match self {
DyadicSign::Pos => 0,
DyadicSign::Neg => 1,
}
}
#[inline]
pub(crate) const fn mult(self, rhs: Self) -> Self {
if (self as u8) ^ (rhs as u8) != 0 {
DyadicSign::Neg
} else {
DyadicSign::Pos
}
}
}
const BITS: u32 = 128;
#[derive(Copy, Clone, Debug)]
pub(crate) struct DyadicFloat128 {
pub(crate) sign: DyadicSign,
pub(crate) exponent: i16,
pub(crate) mantissa: u128,
}
#[inline]
pub(crate) const fn f64_from_parts(sign: DyadicSign, exp: u64, mantissa: u64) -> f64 {
let r_sign = (if sign.to_bit() == 0 { 0u64 } else { 1u64 }).wrapping_shl(63);
let r_exp = exp.wrapping_shl(52);
f64::from_bits(r_sign | r_exp | mantissa)
}
#[inline]
pub(crate) fn mulhi_u128(a: u128, b: u128) -> u128 {
let a_lo = a as u64 as u128;
let a_hi = (a >> 64) as u64 as u128;
let b_lo = b as u64 as u128;
let b_hi = (b >> 64) as u64 as u128;
let lo_lo = a_lo * b_lo;
let lo_hi = a_lo * b_hi;
let hi_lo = a_hi * b_lo;
let hi_hi = a_hi * b_hi;
let carry = (lo_lo >> 64)
.wrapping_add(lo_hi & 0xffff_ffff_ffff_ffff)
.wrapping_add(hi_lo & 0xffff_ffff_ffff_ffff);
let mid = (lo_hi >> 64)
.wrapping_add(hi_lo >> 64)
.wrapping_add(carry >> 64);
hi_hi.wrapping_add(mid)
}
#[inline]
const fn explicit_exponent(x: f64) -> i16 {
let exp = ((x.to_bits() >> 52) & ((1u64 << 11) - 1u64)) as i16 - 1023;
if x == 0. {
return 0;
} else if x.is_subnormal() {
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
return 1i16 - EXP_BIAS as i16;
}
exp
}
#[inline]
const fn explicit_mantissa(x: f64) -> u64 {
const MASK: u64 = (1u64 << 52) - 1;
let sig_bits = x.to_bits() & MASK;
if x.is_subnormal() || x == 0. {
return sig_bits;
}
(1u64 << 52) | sig_bits
}
impl DyadicFloat128 {
#[inline]
pub(crate) const fn zero() -> Self {
Self {
sign: DyadicSign::Pos,
exponent: 0,
mantissa: 0,
}
}
#[inline]
pub(crate) const fn new_from_f64(x: f64) -> Self {
let sign = if x.is_sign_negative() {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
let exponent = explicit_exponent(x) - 52;
let mantissa = explicit_mantissa(x) as u128;
let mut new_val = Self {
sign,
exponent,
mantissa,
};
new_val.normalize();
new_val
}
#[inline]
pub(crate) fn new(sign: DyadicSign, exponent: i16, mantissa: u128) -> Self {
let mut new_item = DyadicFloat128 {
sign,
exponent,
mantissa,
};
new_item.normalize();
new_item
}
#[inline]
pub(crate) fn accurate_reciprocal(a: f64) -> Self {
let mut r = DyadicFloat128::new_from_f64(4.0 / a); /* accurate to about 53 bits */
r.exponent -= 2;
/* we use Newton's iteration: r -> r + r*(1-a*r) */
let ba = DyadicFloat128::new_from_f64(-a);
let mut q = ba * r;
const F128_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
q = F128_ONE + q;
q = r * q;
r + q
}
#[inline]
pub(crate) fn from_div_f64(a: f64, b: f64) -> Self {
let reciprocal = DyadicFloat128::accurate_reciprocal(b);
let da = DyadicFloat128::new_from_f64(a);
reciprocal * da
}
/// Multiply self by integer scalar `b`.
/// Returns a new normalized DyadicFloat128.
#[inline]
pub(crate) fn mul_int64(&self, b: i64) -> DyadicFloat128 {
if b == 0 {
return DyadicFloat128::zero();
}
let abs_b = b.unsigned_abs();
let sign = if (b < 0) ^ (self.sign == DyadicSign::Neg) {
DyadicSign::Neg
} else {
DyadicSign::Pos
};
let mut hi_prod = (self.mantissa >> 64).wrapping_mul(abs_b as u128);
let m = hi_prod.leading_zeros();
hi_prod <<= m;
let mut lo_prod = (self.mantissa & 0xffff_ffff_ffff_ffff).wrapping_mul(abs_b as u128);
lo_prod = (lo_prod << (m - 1)) >> 63;
let (mut product, overflow) = hi_prod.overflowing_add(lo_prod);
let mut result = DyadicFloat128 {
sign,
exponent: self.exponent + 64 - m as i16,
mantissa: product,
};
if overflow {
// Overflow means an implicit bit in the 129th place, which we shift down.
product += product & 0x1;
result.mantissa = (product >> 1) | (1u128 << 127);
result.shift_right(1);
}
result.normalize();
result
}
#[inline]
fn shift_right(&mut self, amount: u32) {
if amount < BITS {
self.exponent += amount as i16;
self.mantissa = self.mantissa.wrapping_shr(amount);
} else {
self.exponent = 0;
self.mantissa = 0;
}
}
#[inline]
fn shift_left(&mut self, amount: u32) {
if amount < BITS {
self.exponent -= amount as i16;
self.mantissa = self.mantissa.wrapping_shl(amount);
} else {
self.exponent = 0;
self.mantissa = 0;
}
}
// Don't forget to call if manually created
#[inline]
pub(crate) const fn normalize(&mut self) {
if self.mantissa != 0 {
let shift_length = self.mantissa.leading_zeros();
self.exponent -= shift_length as i16;
self.mantissa = self.mantissa.wrapping_shl(shift_length);
}
}
#[inline]
pub(crate) fn negated(&self) -> Self {
Self {
sign: self.sign.negate(),
exponent: self.exponent,
mantissa: self.mantissa,
}
}
#[inline]
pub(crate) fn quick_sub(&self, rhs: &Self) -> Self {
self.quick_add(&rhs.negated())
}
#[inline]
pub(crate) fn quick_add(&self, rhs: &Self) -> Self {
if self.mantissa == 0 {
return *rhs;
}
if rhs.mantissa == 0 {
return *self;
}
let mut a = *self;
let mut b = *rhs;
let exp_diff = a.exponent.wrapping_sub(b.exponent);
// If exponent difference is too large, b is negligible
if exp_diff.abs() >= BITS as i16 {
return if a.sign == b.sign {
// Adding very small number to large: return a
return if a.exponent > b.exponent { a } else { b };
} else if a.exponent > b.exponent {
a
} else {
b
};
}
// Align exponents
if a.exponent > b.exponent {
b.shift_right((a.exponent - b.exponent) as u32);
} else if b.exponent > a.exponent {
a.shift_right((b.exponent - a.exponent) as u32);
}
let mut result = DyadicFloat128::zero();
if a.sign == b.sign {
// Addition
result.sign = a.sign;
result.exponent = a.exponent;
result.mantissa = a.mantissa;
let (sum, is_overflow) = result.mantissa.overflowing_add(b.mantissa);
result.mantissa = sum;
if is_overflow {
// Mantissa addition overflow.
result.shift_right(1);
result.mantissa |= 1u128 << 127;
}
// Result is already normalized.
return result;
}
// Subtraction
if a.mantissa >= b.mantissa {
result.sign = a.sign;
result.exponent = a.exponent;
result.mantissa = a.mantissa.wrapping_sub(b.mantissa);
} else {
result.sign = b.sign;
result.exponent = b.exponent;
result.mantissa = b.mantissa.wrapping_sub(a.mantissa);
}
result.normalize();
result
}
#[inline]
pub(crate) fn quick_mul(&self, rhs: &Self) -> Self {
let mut result = DyadicFloat128 {
sign: if self.sign != rhs.sign {
DyadicSign::Neg
} else {
DyadicSign::Pos
},
exponent: self.exponent + rhs.exponent + BITS as i16,
mantissa: 0,
};
if !(self.mantissa == 0 || rhs.mantissa == 0) {
result.mantissa = mulhi_u128(self.mantissa, rhs.mantissa);
// Check the leading bit directly, should be faster than using clz in
// normalize().
if result.mantissa >> 127 == 0 {
result.shift_left(1);
}
} else {
result.mantissa = 0;
}
result
}
#[inline]
pub(crate) fn fast_as_f64(&self) -> f64 {
if self.mantissa == 0 {
return if self.sign == DyadicSign::Pos {
0.
} else {
-0.0
};
}
// Assume that it is normalized, and output is also normal.
const PRECISION: u32 = 52 + 1;
// SIG_MASK - FRACTION_MASK
const SIG_MASK: u64 = (1u64 << 52) - 1;
const FRACTION_MASK: u64 = (1u64 << 52) - 1;
const IMPLICIT_MASK: u64 = SIG_MASK - FRACTION_MASK;
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let mut exp_hi = self.exponent as i32 + ((BITS - 1) as i32 + EXP_BIAS as i32);
if exp_hi > 2 * EXP_BIAS as i32 {
// Results overflow.
let d_hi = f64_from_parts(self.sign, 2 * EXP_BIAS, IMPLICIT_MASK);
// volatile prevents constant propagation that would result in infinity
// always being returned no matter the current rounding mode.
let two = 2.0f64;
let r = two * d_hi;
return r;
}
let mut denorm = false;
let mut shift = BITS - PRECISION;
if exp_hi <= 0 {
// Output is denormal.
denorm = true;
shift = (BITS - PRECISION) + (1 - exp_hi) as u32;
exp_hi = EXP_BIAS as i32;
}
let exp_lo = exp_hi.wrapping_sub(PRECISION as i32).wrapping_sub(1);
let m_hi = if shift >= BITS {
0
} else {
self.mantissa >> shift
};
let d_hi = f64_from_parts(
self.sign,
exp_hi as u64,
(m_hi as u64 & SIG_MASK) | IMPLICIT_MASK,
);
let round_mask = if shift > BITS {
0
} else {
1u128.wrapping_shl(shift.wrapping_sub(1))
};
let sticky_mask = round_mask.wrapping_sub(1u128);
let round_bit = (self.mantissa & round_mask) != 0;
let sticky_bit = (self.mantissa & sticky_mask) != 0;
let round_and_sticky = round_bit as i32 * 2 + sticky_bit as i32;
let d_lo: f64;
if exp_lo <= 0 {
// d_lo is denormal, but the output is normal.
let scale_up_exponent = 1 - exp_lo;
let scale_up_factor = f64_from_parts(
DyadicSign::Pos,
EXP_BIAS + scale_up_exponent as u64,
IMPLICIT_MASK,
);
let scale_down_factor = f64_from_parts(
DyadicSign::Pos,
EXP_BIAS - scale_up_exponent as u64,
IMPLICIT_MASK,
);
d_lo = f64_from_parts(
self.sign,
(exp_lo + scale_up_exponent) as u64,
IMPLICIT_MASK,
);
return f_fmla(d_lo, round_and_sticky as f64, d_hi * scale_up_factor)
* scale_down_factor;
}
d_lo = f64_from_parts(self.sign, exp_lo as u64, IMPLICIT_MASK);
// Still correct without FMA instructions if `d_lo` is not underflow.
let r = f_fmla(d_lo, round_and_sticky as f64, d_hi);
if denorm {
const SIG_LEN: u64 = 52;
// Exponent before rounding is in denormal range, simply clear the
// exponent field.
let clear_exp: u64 = (exp_hi as u64) << SIG_LEN;
let mut r_bits: u64 = r.to_bits() - clear_exp;
if r_bits & EXP_MASK == 0 {
// Output is denormal after rounding, clear the implicit bit for 80-bit
// long double.
r_bits -= IMPLICIT_MASK;
}
return f64::from_bits(r_bits);
}
r
}
// Approximate reciprocal - given a nonzero `a`, make a good approximation to 1/a.
// The method is Newton-Raphson iteration, based on quick_mul.
#[inline]
pub(crate) fn reciprocal(self) -> DyadicFloat128 {
// Computes the reciprocal using Newton-Raphson iteration:
// Given an approximation x ≈ 1/a, we refine via:
// x' = x * (2 - a * x)
// This squares the error term: if ax ≈ 1 - e, then ax' ≈ 1 - e².
let guess = 1. / self.fast_as_f64();
let mut x = DyadicFloat128::new_from_f64(guess);
// The constant 2, which we'll need in every iteration
let twos = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
x = x * (twos - (self * x));
x = x * (twos - (self * x));
x
}
// // Approximate reciprocal - given a nonzero `a`, make a good approximation to 1/a.
// // The method is Newton-Raphson iteration, based on quick_mul.
// // *This is very crude guess*
// #[inline]
// fn approximate_reciprocal(&self) -> DyadicFloat128 {
// // Given an approximation x to 1/a, a better one is x' = x(2-ax).
// //
// // You can derive this by using the Newton-Raphson formula with the function
// // f(x) = 1/x - a. But another way to see that it works is to say: suppose
// // that ax = 1-e for some small error e. Then ax' = ax(2-ax) = (1-e)(1+e) =
// // 1-e^2. So the error in x' is the square of the error in x, i.e. the number
// // of correct bits in x' is double the number in x.
//
// // An initial approximation to the reciprocal
// let mut x = DyadicFloat128 {
// sign: DyadicSign::Pos,
// exponent: -32 - self.exponent - BITS as i16,
// mantissa: self.mantissa >> (BITS - 32),
// };
// x.normalize();
//
// // The constant 2, which we'll need in every iteration
// let two = DyadicFloat128::new(DyadicSign::Pos, 1, 1);
//
// // We expect at least 31 correct bits from our 32-bit starting approximation
// let mut ok_bits = 31usize;
//
// // The number of good bits doubles in each iteration, except that rounding
// // errors introduce a little extra each time. Subtract a bit from our
// // accuracy assessment to account for that.
// while ok_bits < BITS as usize {
// x = x * (two - (*self * x));
// ok_bits = 2 * ok_bits - 1;
// }
//
// x
// }
}
impl Add<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn add(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_add(&rhs)
}
}
impl DyadicFloat128 {
#[inline]
pub(crate) fn biased_exponent(&self) -> i16 {
self.exponent + (BITS as i16 - 1)
}
#[inline]
pub(crate) fn trunc_to_i64(&self) -> i64 {
if self.exponent <= -(BITS as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return 0;
}
let hi = self.mantissa >> 64;
let norm_exp = self.biased_exponent();
if norm_exp > 63 {
return if self.sign == DyadicSign::Neg {
i64::MIN
} else {
i64::MAX
};
}
let r: i64 = (hi >> (63 - norm_exp)) as i64;
if self.sign == DyadicSign::Neg { -r } else { r }
}
#[inline]
pub(crate) fn round_to_nearest(&self) -> DyadicFloat128 {
if self.exponent == -(BITS as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return DyadicFloat128 {
sign: self.sign,
exponent: -(BITS as i16 - 1),
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
}
if self.exponent <= -((BITS + 1) as i16) {
// Absolute value of x is greater than equal to 0.5 but less than 1.
return DyadicFloat128 {
sign: self.sign,
exponent: 0,
mantissa: 0u128,
};
}
const FRACTION_LENGTH: u32 = BITS - 1;
let trim_size =
(FRACTION_LENGTH as i64).wrapping_sub(self.exponent as i64 + (BITS - 1) as i64) as u128;
let half_bit_set =
self.mantissa & (1u128.wrapping_shl(trim_size.wrapping_sub(1) as u32)) != 0;
let trunc_u: u128 = self
.mantissa
.wrapping_shr(trim_size as u32)
.wrapping_shl(trim_size as u32);
if trunc_u == self.mantissa {
return *self;
}
let truncated = DyadicFloat128::new(self.sign, self.exponent, trunc_u);
if !half_bit_set {
// Franctional part is less than 0.5 so round value is the
// same as the trunc value.
truncated
} else if self.sign == DyadicSign::Neg {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -(BITS as i16 - 1),
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
truncated - ones
} else {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -(BITS as i16 - 1),
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
truncated + ones
}
}
#[inline]
pub(crate) fn round_to_nearest_f64(&self) -> f64 {
self.round_to_nearest().fast_as_f64()
}
}
impl Sub<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn sub(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_sub(&rhs)
}
}
impl Mul<DyadicFloat128> for DyadicFloat128 {
type Output = DyadicFloat128;
#[inline]
fn mul(self, rhs: DyadicFloat128) -> Self::Output {
self.quick_mul(&rhs)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dyadic_float() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt0 = minus_0_5.fast_as_f64();
assert_eq!(cvt0, -1.0 / 2.0);
let minus_1_f4 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -132,
mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
};
let cvt0 = minus_1_f4.fast_as_f64();
assert_eq!(cvt0, -1.0 / 24.0);
let minus_1_f8 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -143,
mantissa: 0xd00d00d0_0d00d00d_00d00d00_d00d00d0_u128,
};
let cvt0 = minus_1_f8.fast_as_f64();
assert_eq!(cvt0, 1.0 / 40320.0);
}
#[test]
fn dyadic_float_add() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt0 = ones.quick_add(&minus_0_5).fast_as_f64();
assert_eq!(cvt0, 0.5);
}
#[test]
fn dyadic_float_mul() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let product = ones.quick_mul(&minus_0_5);
let cvt0 = product.fast_as_f64();
assert_eq!(cvt0, -0.5);
let twos = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
};
let cvt = twos.fast_as_f64();
assert_eq!(cvt, 2.0);
}
#[test]
fn dyadic_round_trip() {
let z00 = 0.0;
let zvt00 = DyadicFloat128::new_from_f64(z00);
let b00 = zvt00.fast_as_f64();
assert_eq!(b00, z00);
let zvt000 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: 0,
mantissa: 0,
};
let b000 = zvt000.fast_as_f64();
assert_eq!(b000, z00);
let z0 = 1.0;
let zvt0 = DyadicFloat128::new_from_f64(z0);
let b0 = zvt0.fast_as_f64();
assert_eq!(b0, z0);
let z1 = 0.5;
let zvt1 = DyadicFloat128::new_from_f64(z1);
let b1 = zvt1.fast_as_f64();
assert_eq!(b1, z1);
let z2 = -0.5;
let zvt2 = DyadicFloat128::new_from_f64(z2);
let b2 = zvt2.fast_as_f64();
assert_eq!(b2, z2);
let z3 = -532322.54324324232;
let zvt3 = DyadicFloat128::new_from_f64(z3);
let b3 = zvt3.fast_as_f64();
assert_eq!(b3, z3);
}
#[test]
fn dyadic_float_reciprocal() {
let ones = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
}
.reciprocal();
let cvt = ones.fast_as_f64();
assert_eq!(cvt, 1.0);
let minus_0_5 = DyadicFloat128::new_from_f64(4.).reciprocal();
let cvt0 = minus_0_5.fast_as_f64();
assert_eq!(cvt0, 0.25);
}
#[test]
fn dyadic_float_from_div() {
let from_div = DyadicFloat128::from_div_f64(1.0, 4.0);
let cvt = from_div.fast_as_f64();
assert_eq!(cvt, 0.25);
}
#[test]
fn dyadic_float_accurate_reciprocal() {
let from_div = DyadicFloat128::accurate_reciprocal(4.0);
let cvt = from_div.fast_as_f64();
assert_eq!(cvt, 0.25);
}
#[test]
fn dyadic_float_mul_int() {
let from_div = DyadicFloat128::new_from_f64(4.0);
let m1 = from_div.mul_int64(-2);
assert_eq!(m1.fast_as_f64(), -8.0);
let from_div = DyadicFloat128::new_from_f64(-4.0);
let m1 = from_div.mul_int64(-2);
assert_eq!(m1.fast_as_f64(), 8.0);
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.mul_int64(2);
assert_eq!(m1.fast_as_f64(), 5.0);
}
#[test]
fn dyadic_float_round() {
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 3.0);
let from_div = DyadicFloat128::new_from_f64(0.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 1.0);
let from_div = DyadicFloat128::new_from_f64(-0.5);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, -1.0);
let from_div = DyadicFloat128::new_from_f64(-0.351);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, (-0.351f64).round());
let from_div = DyadicFloat128::new_from_f64(0.351);
let m1 = from_div.round_to_nearest_f64();
assert_eq!(m1, 0.351f64.round());
let z00 = 25.6;
let zvt00 = DyadicFloat128::new_from_f64(z00);
let b00 = zvt00.round_to_nearest_f64();
assert_eq!(b00, 26.);
}
#[test]
fn dyadic_int_trunc() {
let from_div = DyadicFloat128::new_from_f64(-2.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, -2);
let from_div = DyadicFloat128::new_from_f64(2.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 2);
let from_div = DyadicFloat128::new_from_f64(0.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(-0.5);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(-0.351);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
let from_div = DyadicFloat128::new_from_f64(0.351);
let m1 = from_div.trunc_to_i64();
assert_eq!(m1, 0);
}
}

319
vendor/pxfm/src/err/erf.rs vendored Normal file
View File

@@ -0,0 +1,319 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::err::erf_poly::{ERF_POLY, ERF_POLY_C2};
use crate::floor::FloorFinite;
/* double-double approximation of 2/sqrt(pi) to nearest */
const TWO_OVER_SQRT_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c71ae3a914fed80),
f64::from_bits(0x3ff20dd750429b6d),
);
pub(crate) struct Erf {
pub(crate) result: DoubleDouble,
pub(crate) err: f64,
}
/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
#[cold]
fn cr_erf_accurate_tiny(x: f64) -> DoubleDouble {
static P: [u64; 15] = [
0x3ff20dd750429b6d,
0x3c71ae3a914fed80,
0xbfd812746b0379e7,
0x3c6ee12e49ca96ba,
0x3fbce2f21a042be2,
0xbc52871bc0a0a0d0,
0xbf9b82ce31288b51,
0x3c21003accf1355c,
0x3f7565bcd0e6a53f,
0xbf4c02db40040cc3,
0x3f1f9a326fa3cf50,
0xbeef4d25e3c73ce9,
0x3ebb9eb332b31646,
0xbe864a4bd5eca4d7,
0x3e6c0acc2502e94e,
];
let z2 = x * x;
let mut h = f64::from_bits(P[21 / 2 + 4]); /* degree 21 */
for a in (12..=19).rev().step_by(2) {
h = dd_fmla(h, z2, f64::from_bits(P[(a / 2 + 4) as usize]))
}
let mut l = 0.;
for a in (8..=11).rev().step_by(2) {
let mut t = DoubleDouble::from_exact_mult(h, x);
t.lo = dd_fmla(l, x, t.lo);
let mut k = DoubleDouble::from_exact_mult(t.hi, x);
k.lo = dd_fmla(t.lo, x, k.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(P[(a / 2 + 4) as usize]), k.hi);
l = k.lo + p.lo;
h = p.hi;
}
for a in (1..=7).rev().step_by(2) {
let mut t = DoubleDouble::from_exact_mult(h, x);
t.lo = dd_fmla(l, x, t.lo);
let mut k = DoubleDouble::from_exact_mult(t.hi, x);
k.lo = dd_fmla(t.lo, x, k.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(P[a - 1]), k.hi);
l = k.lo + p.lo + f64::from_bits(P[a]);
h = p.hi;
}
/* multiply by z */
let p = DoubleDouble::from_exact_mult(h, x);
l = dd_fmla(l, x, p.lo);
DoubleDouble::new(l, p.hi)
}
/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an accurate
approximation of erf(z).
Assumes z >= 2^-61, thus no underflow can occur. */
#[cold]
#[inline(never)]
pub(crate) fn erf_accurate(x: f64) -> DoubleDouble {
if x < 0.125
/* z < 1/8 */
{
return cr_erf_accurate_tiny(x);
}
let v = (8.0 * x).floor_finite();
let i: u32 = (8.0 * x) as u32;
let z = (x - 0.0625) - 0.125 * v;
/* now |z| <= 1/16 */
let p = ERF_POLY_C2[(i - 1) as usize];
let mut h = f64::from_bits(p[26]); /* degree-18 */
for a in (11..=17).rev() {
h = dd_fmla(h, z, f64::from_bits(p[(8 + a) as usize])); /* degree j */
}
let mut l: f64 = 0.;
for a in (8..=10).rev() {
let mut t = DoubleDouble::from_exact_mult(h, z);
t.lo = dd_fmla(l, z, t.lo);
let p = DoubleDouble::from_exact_add(f64::from_bits(p[(8 + a) as usize]), t.hi);
h = p.hi;
l = p.lo + t.lo;
}
for a in (0..=7).rev() {
let mut t = DoubleDouble::from_exact_mult(h, z);
t.lo = dd_fmla(l, z, t.lo);
/* add p[2*j] + p[2*j+1] to th + tl: we use two_sum() instead of
fast_two_sum because for example for i=3, the coefficient of
degree 7 is tiny (0x1.060b78c935b8ep-13) with respect to that
of degree 8 (0x1.678b51a9c4b0ap-7) */
let v = DoubleDouble::from_exact_add(f64::from_bits(p[(2 * a) as usize]), t.hi);
h = v.hi;
l = v.lo + t.lo + f64::from_bits(p[(2 * a + 1) as usize]);
}
DoubleDouble::new(l, h)
}
/* Assuming 0 <= z <= 5.9215871957945065, put in h+l an approximation
of erf(z). Return err the maximal relative error:
|(h + l)/erf(z) - 1| < err*|h+l| */
#[inline]
pub(crate) fn erf_fast(x: f64) -> Erf {
/* we split [0,5.9215871957945065] into intervals i/16 <= z < (i+1)/16,
and for each interval, we use a minimax polynomial:
* for i=0 (0 <= z < 1/16) we use a polynomial evaluated at zero,
since if we evaluate in the middle 1/32, we will get bad accuracy
for tiny z, and moreover z-1/32 might not be exact
* for 1 <= i <= 94, we use a polynomial evaluated in the middle of
the interval, namely i/16+1/32
*/
if x < 0.0625
/* z < 1/16 */
{
/* the following is a degree-11 minimax polynomial for erf(x) on [0,1/16]
generated by Sollya, with double-double coefficients for degree 1 and 3,
and double coefficients for degrees 5 to 11 (file erf0.sollya).
The maximal relative error is 2^-68.935. */
let z2 = DoubleDouble::from_exact_mult(x, x);
const C: [u64; 8] = [
0x3ff20dd750429b6d,
0x3c71ae3a7862d9c4,
0xbfd812746b0379e7,
0x3c6f1a64d72722a2,
0x3fbce2f21a042b7f,
0xbf9b82ce31189904,
0x3f7565bbf8a0fe0b,
0xbf4bf9f8d2c202e4,
];
let z4 = z2.hi * z2.hi;
let c9 = dd_fmla(f64::from_bits(C[7]), z2.hi, f64::from_bits(C[6]));
let mut c5 = dd_fmla(f64::from_bits(C[5]), z2.hi, f64::from_bits(C[4]));
c5 = dd_fmla(c9, z4, c5);
/* compute c0[2] + c0[3] + z2h*c5 */
let mut t = DoubleDouble::from_exact_mult(z2.hi, c5);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[2]), t.hi);
v.lo += t.lo + f64::from_bits(C[3]);
/* compute c0[0] + c0[1] + (z2h + z2l)*(h + l) */
t = DoubleDouble::from_exact_mult(z2.hi, v.hi);
let h_c = v.hi;
t.lo += dd_fmla(z2.hi, v.lo, f64::from_bits(C[1]));
v = DoubleDouble::from_exact_add(f64::from_bits(C[0]), t.hi);
v.lo += dd_fmla(z2.lo, h_c, t.lo);
v = DoubleDouble::quick_mult_f64(v, x);
return Erf {
result: v,
err: f64::from_bits(0x3ba7800000000000),
}; /* err < 2.48658249618372e-21, cf Analyze0() */
}
let v = (16.0 * x).floor_finite();
let i: u32 = (16.0 * x) as u32;
/* i/16 <= z < (i+1)/16 */
/* For 0.0625 0 <= z <= 0x1.7afb48dc96626p+2, z - 0.03125 is exact:
(1) either z - 0.03125 is in the same binade as z, then 0.03125 is
an integer multiple of ulp(z), so is z - 0.03125
(2) if z - 0.03125 is in a smaller binade, both z and 0.03125 are
integer multiple of the ulp() of that smaller binade.
Also, subtracting 0.0625 * v is exact. */
let z = (x - 0.03125) - 0.0625 * v;
/* now |z| <= 1/32 */
let c = ERF_POLY[(i - 1) as usize];
let z2 = z * z;
let z4 = z2 * z2;
/* the degree-10 coefficient is c[12] */
let c9 = dd_fmla(f64::from_bits(c[12]), z, f64::from_bits(c[11]));
let mut c7 = dd_fmla(f64::from_bits(c[10]), z, f64::from_bits(c[9]));
let c5 = dd_fmla(f64::from_bits(c[8]), z, f64::from_bits(c[7]));
/* c3h, c3l <- c[5] + z*c[6] */
let mut c3 = DoubleDouble::from_exact_add(f64::from_bits(c[5]), z * f64::from_bits(c[6]));
c7 = dd_fmla(c9, z2, c7);
/* c3h, c3l <- c3h, c3l + c5*z2 */
let p = DoubleDouble::from_exact_add(c3.hi, c5 * z2);
c3.hi = p.hi;
c3.lo += p.lo;
/* c3h, c3l <- c3h, c3l + c7*z4 */
let p = DoubleDouble::from_exact_add(c3.hi, c7 * z4);
c3.hi = p.hi;
c3.lo += p.lo;
/* c2h, c2l <- c[4] + z*(c3h + c3l) */
let mut t = DoubleDouble::from_exact_mult(z, c3.hi);
let mut c2 = DoubleDouble::from_exact_add(f64::from_bits(c[4]), t.hi);
c2.lo += dd_fmla(z, c3.lo, t.lo);
/* compute c[2] + c[3] + z*(c2h + c2l) */
t = DoubleDouble::from_exact_mult(z, c2.hi);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(c[2]), t.hi);
v.lo += t.lo + dd_fmla(z, c2.lo, f64::from_bits(c[3]));
/* compute c[0] + c[1] + z*(h + l) */
t = DoubleDouble::from_exact_mult(z, v.hi);
t.lo = dd_fmla(z, v.lo, t.lo);
v = DoubleDouble::from_exact_add(f64::from_bits(c[0]), t.hi);
v.lo += t.lo + f64::from_bits(c[1]);
Erf {
result: v,
err: f64::from_bits(0x3ba1100000000000),
} /* err < 1.80414390200020e-21, cf analyze_p(1)
(larger values of i yield smaller error bounds) */
}
/// Error function
///
/// Max ULP 0.5
pub fn f_erf(x: f64) -> f64 {
let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
let mut t = z.to_bits();
let ux = t;
/* erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
if ux > 0x4017afb48dc96626
// |x| > 0x4017afb48dc96626
{
let os = f64::copysign(1.0, x);
const MASK: u64 = 0x7ff0000000000000u64;
if ux > MASK {
return x + x; /* NaN */
}
if ux == MASK {
return os; /* +/-Inf */
}
return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
}
/* now |x| <= 0x4017afb48dc96626 */
if z < f64::from_bits(0x3c20000000000000) {
/* for x=-0 the code below returns +0 which is wrong */
if x == 0. {
return x;
}
/* tiny x: erf(x) ~ 2/sqrt(pi) * x + O(x^3), where the ratio of the O(x^3)
term to the main term is in x^2/3, thus less than 2^-123 */
let y = TWO_OVER_SQRT_PI.hi * x; /* tentative result */
/* scale x by 2^106 to get out the subnormal range */
let sx = x * f64::from_bits(0x4690000000000000);
let mut p = DoubleDouble::quick_mult_f64(TWO_OVER_SQRT_PI, sx);
/* now compute the residual h + l - y */
p.lo += f_fmla(-y, f64::from_bits(0x4690000000000000), p.hi); /* h-y*2^106 is exact since h and y are very close */
let res = dyad_fmla(p.lo, f64::from_bits(0x3950000000000000), y);
return res;
}
let result = erf_fast(z);
let mut u = result.result.hi.to_bits();
let mut v = result.result.lo.to_bits();
t = x.to_bits();
const SIGN_MASK: u64 = 0x8000000000000000u64;
u ^= t & SIGN_MASK;
v ^= t & SIGN_MASK;
let left = f64::from_bits(u) + f_fmla(result.err, -f64::from_bits(u), f64::from_bits(v));
let right = f64::from_bits(u) + f_fmla(result.err, f64::from_bits(u), f64::from_bits(v));
if left == right {
return left;
}
let a_results = erf_accurate(z);
if x >= 0. {
a_results.to_f64()
} else {
(-a_results.hi) + (-a_results.lo)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erf() {
assert_eq!(f_erf(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009456563898732),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010670589695636709);
assert_eq!(f_erf(0.), 0.);
assert_eq!(f_erf(1.), 0.8427007929497149);
assert_eq!(f_erf(0.49866735123), 0.5193279892991808);
assert_eq!(f_erf(-0.49866735123), -0.5193279892991808);
assert!(f_erf(f64::NAN).is_nan());
assert_eq!(f_erf(f64::INFINITY), 1.0);
assert_eq!(f_erf(f64::NEG_INFINITY), -1.0);
}
}

177
vendor/pxfm/src/err/erf_poly.rs vendored Normal file
View File

@@ -0,0 +1,177 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[rustfmt::skip]
pub(crate) static ERF_POLY: [[u64; 13]; 94] = [
[ 0x3fbb0081148a873a, 0xbc2f0295f16ba5d8, 0x3ff1e565bca400d4, 0xbc962d0ac26c78d3, 0xbfbad8189af6013d, 0xbfd7712743c42914, 0x3faaafd4760d7634, 0x3fbba14988b4127e, 0xbf91afcdb244078a, 0xbf99d72ee25cf211, 0x3f719502f7beca8f, 0x3f73b955bfd46624, 0xbf4a4e2d4d32228b ],
[ 0x3fc662a0bdf7a89f, 0xbc4ef7bc5856c2d4, 0x3ff19e5e92b964ab, 0x3c6cca4dec08a640, 0xbfc605f63767bdd6, 0xbfd6582e9b69c9a9, 0x3fb5aa32b580ec64, 0x3fb97594c2593d3e, 0xbf9c69c62749fb7f, 0xbf96fa7f611aacdc, 0x3f7bf1e628a4606e, 0x3f70e50e4329e8a9, 0xbf568ca9c1954b4c ],
[ 0x3fcf190aa85540e2, 0xbc6e522ac9f718e6, 0x3ff135e3075d076b, 0xbc6e2d8ed30e4a48, 0xbfce1e4d4ce2ccfb, 0xbfd4c04e66e0d59b, 0x3fbd2855d59988e8, 0x3fb659a35f29781a, 0xbfa2cf6266a634c8, 0xbf92ef4180b1f3fa, 0x3f823199a6da60e3, 0x3f69e80d13a3368c, 0xbf5ba4e4eff641dd ],
[ 0x3fd3c9aa8b84beda, 0x3c538ec27d3e5820, 0x3ff0ae54fa490723, 0xbc9d016b7bc67433, 0xbfd2c41f99922807, 0xbfd2b900b640a201, 0x3fc1c6c7eef8fa14, 0x3fb277ad7822021e, 0xbfa66c9b2023b9df, 0xbf8bf7e7b4e8559e, 0x3f853005de4b5751, 0x3f60737c6ba405f0, 0xbf606ccc916b15dc ],
[ 0x3fd7e15944d9d3e4, 0xbc695f819cf77862, 0x3ff00abcf3e187a9, 0x3c85860d868dc542, 0xbfd60ec3cf561a89, 0xbfd05599bafe4ecc, 0x3fc451ef6280e70f, 0x3fac06c6e434be6f, 0xbfa8e2d73679096f, 0xbf80ea4a60550d9c, 0x3f86c911882cc99c, 0x3f48c65a9990353b, 0xbf61e8a88301a7b5 ],
[ 0x3fdbccfec24855b8, 0xbc7472ab1c2b898c, 0x3fee9d5a8e4c934e, 0xbc79a002a2814a72, 0xbfd8dfd9939e37af, 0xbfcb588d8dc5bb96, 0x3fc62338788aee97, 0x3fa26cf85bc6dff9, 0xbfaa1bcaa91da902, 0xbf65b4a7d42d0f64, 0x3f86edef7de2b68d, 0xbf4037b458e2da8c, 0xbf5e8d6001a54334 ],
[ 0x3fdf86faa9428f9d, 0x3c79996c0c376e32, 0x3fecfc41e36c7df9, 0xbc79be994724ea34, 0xbfdb2c7dc535b619, 0xbfc5a9de93f9c0d5, 0x3fc7317958d24aae, 0x3f9133e02ab7d777, 0xbfaa155bbde32db8, 0x3f672049c0cc8525, 0x3f85adde5c722d85, 0xbf5b0a7ec5dc80fc, 0xbf5aa9393b806535 ],
[ 0x3fe1855a5fd3dd50, 0x3c88f6964e67d61a, 0x3feb3aafcc27502e, 0xbc7a9dd26edea8a2, 0xbfdcee5ac8e9c531, 0xbfbfa02983c853d1, 0x3fc77cd75ec73100, 0xbf5fa6f82f9333b7, 0xbfa8e0db5528e559, 0x3f800bf7062212bc, 0x3f83319e670adc9f, 0xbf658833e091aa36, 0xbf58f99b6e81e8f5 ],
[ 0x3fe32a54cb8db67b, 0xbc696221f7e18978, 0x3fe96164fafd8de3, 0x3c70887f82841acc, 0xbfde23a7ea0d187e, 0xbfb3f5ee1564be49, 0x3fc70e469de06907, 0xbf93da6878ae6fd8, 0xbfa6a0d076468415, 0x3f88cf081f1fc304, 0x3f7f6d62866525e6, 0xbf6b93149d5701a4, 0xbf51a6c1a9f7ea73 ],
[ 0x3fe4b13713ad3513, 0x3c6e944ee1b212e4, 0x3fe7791b886e7403, 0xbc6da43cb53d911c, 0xbfdecef42310f844, 0xbfa15c3c5ce705df, 0x3fc5f6890affa468, 0xbfa1da642fabd4da, 0xbfa385991202c7eb, 0x3f8fa4f37fc7c6d4, 0x3f77156b4e430998, 0xbf6f546a4377d648, 0xbf432e4e5abb1e1a ],
[ 0x3fe61955607dd15d, 0x3c898ff39319ab83, 0x3fe58a445da7c74c, 0x3c808ec8e156809b, 0xbfdef6c246a12e7e, 0x3f7e83e0da030480, 0x3fc44cc65df8bfc7, 0xbfa87d3c8dd62c82, 0xbf9f9271a8a1d4e2, 0x3f9225234c1c0a0e, 0x3f6c0b0e055a0c48, 0xbf70585251f84919, 0xbf285bfb02436e0f ],
[ 0x3fe762870f720c6f, 0x3c8118b1ba6da9a7, 0x3fe39ccc1b136d5a, 0x3c5faa9371c0dd80, 0xbfdea4feea4e5add, 0x3fa715e595343353, 0x3fc22cdbdb4cdd0c, 0xbfada50ae547e69e, 0xbf975578f87f217d, 0x3f9353319c65f251, 0x3f539db53a2d03d5, 0xbf6fc0364ce17870, 0x3f3272bc18b0f2ce ],
[ 0x3fe88d1cd474a2e0, 0x3c86f571ada77d52, 0x3fe1b7e98fe26217, 0x3c7952bd607eb12e, 0xbfdde65a22ce0587, 0x3fb40686a3f3dc2b, 0x3fbf6b0cb6926c42, 0xbfb09c7caecd317d, 0xbf8da668f759eaea, 0x3f9364e72035e80a, 0xbf4d421975736447, 0xbf6cc98454e96141, 0x3f4a8860fdf17259 ],
[ 0x3fe999d4192a5715, 0xbc8c888a5759a92c, 0x3fdfc3ee5d1524b0, 0xbc527e60faac0278, 0xbfdcc990045b293f, 0x3fbb37338e6ac814, 0x3fba0d11fe9ba61a, 0xbfb19bb2ca3816ba, 0xbf7a0b7d94791f03, 0x3f9274a59774d5e6, 0xbf664adea7b36f57, 0xbf683684bd8ef173, 0x3f538905afd229ff ],
[ 0x3fea89c850b7d54d, 0xbc8e2752ebf0cd02, 0x3fdc40b0729ed548, 0xbc7c4c1c4927306d, 0xbfdb5eaaef09de9d, 0x3fc0847c7dad86af, 0x3fb47de0a4f796ca, 0xbfb1d9de8b54a3ec, 0x3f533252fb810c7c, 0x3f90ab3e329ded2f, 0xbf712d82076274ed, 0xbf6287bb4a78d728, 0x3f557d31bd574da0 ],
[ 0x3feb5e62fce16095, 0x3c7bc3cff4400364, 0x3fd8eed36b886d93, 0x3c7ea7e17b96436d, 0xbfd9b64a06e4b100, 0x3fc2bb6e2c74d4fe, 0x3fadee322c062364, 0xbfb169960d5a983d, 0x3f7feab4ad0bfc14, 0x3f8c76eb94b07a5f, 0xbf7584474ae8f994, 0xbf588df75be9251f, 0x3f54edef50317090 ],
[ 0x3fec194b1d49a184, 0xbc66770a58b27668, 0x3fd5d4fd33729015, 0xbc76db7d76e9e97b, 0xbfd7e0f4f0454d97, 0x3fc444bc66c35bc4, 0x3fa356dbb5432550, 0xbfb0643de6e8c574, 0x3f8b2e1f789415e4, 0x3f86ba6d9f4af32f, 0xbf78138bf4573a6a, 0xbf47e6e52a583322, 0x3f50f87322fa18a3 ],
[ 0x3fecbc54b476248d, 0x3c81a5083b01ec0d, 0x3fd2f7cc3fe6f423, 0x3c79fbb4b774e85d, 0xbfd5ee8429e30a49, 0x3fc52a8395f96270, 0x3f9313759f199499, 0xbfadcf844d90282c, 0x3f91e45f25ab54a1, 0x3f8091cb68a58665, 0xbf78ea40b0ac8b7b, 0xbee6b91b1bf985f2, 0x3f5158d9c0e1c327 ],
[ 0x3fed4970f9ce00d9, 0xbc756704209fca70, 0x3fd059f59af7a906, 0xbc70ce27da57f153, 0xbfd3eda354ddd5ff, 0x3fc57b85ad436067, 0x3f58e90c2a157e8d, 0xbfaa2893b28f4033, 0x3f94d6af4484a1cb, 0x3f74ccee8c8b1f57, 0xbf783304b9e2e312, 0x3f440cb679d0a832, 0x3f4d6b5f4bdef24b ],
[ 0x3fedc29fb60715af, 0x3c8ab029f047a087, 0x3fcbf8e1b1ca2279, 0x3be0426e10a38000, 0xbfd1eb7095e57e16, 0x3fc549ea6f7a013f, 0xbf8b10f20d110552, 0xbfa61420b5b34a55, 0x3f9677b7ea46c6f2, 0x3f624f9940ffd840, 0xbf76304445e5f6ca, 0x3f5222fabfa75bb0, 0x3f3fdcf55be3c03e ],
[ 0x3fee29e22a89d766, 0x3c8bcc9d569ed217, 0x3fc7bd5c7df3fe9c, 0x3c6488f3b06e1394, 0xbfcfe674493fde22, 0x3fc4a9feacf7e222, 0xbf9a0082c90a1b0d, 0xbfa1cf0e7655f99a, 0x3f96e3396f042620, 0xbf33a2d2cdd5650d, 0xbf7334add14b9a31, 0x3f57e12864580191, 0x3f3dae75c3e2be46 ],
[ 0x3fee812fc64db369, 0x3c83c66a6a23d9a5, 0x3fc3fda6bc016994, 0x3c6586ddaff31a18, 0xbfcc1cb27861fc79, 0x3fc3b1051230b982, 0xbfa1e645a2a638ff, 0xbf9b1f643b14fd89, 0x3f964297d7a66c20, 0xbf63e365adfbccae, 0xbf6f2aa2b3ef5ec2, 0x3f5b3339ee2c8c49, 0x3f20ef5710223110 ],
[ 0x3feeca6ccd709544, 0x3c6f3de8f1953470, 0x3fc0b3f52ce8c383, 0x3c6d1234b508bcfb, 0xbfc8885019f5df29, 0x3fc274275fc87eae, 0xbfa57f7386bfd263, 0xbf930769f45aaa8b, 0x3f94c8231709cfee, 0xbf70c2c99c75913f, 0xbf67514483efc090, 0x3f5c3ebcf121a533, 0x3eede2f1801b8480 ],
[ 0x3fef0762fde45ee6, 0x3c89c3612a14fb77, 0x3fbbb1c972f23e50, 0x3c5ba69c564971e1, 0xbfc5341e3c0177b6, 0x3fc107929f6e7528, 0xbfa7e1b362eacfe6, 0xbf873b61e487b8a9, 0x3f92aa763e0343a9, 0xbf759a388fd2272d, 0xbf5eea3c7f50e8de, 0x3f5b5026fd87d0ca, 0xbf30f2c660125dc6 ],
[ 0x3fef39bc242e43e6, 0xbc8dbae0fd9b967d, 0x3fb6c7e64e7281cb, 0x3c5aa87392dc4c20, 0xbfc2274b86833f6e, 0x3fbefb890e5b6633, 0xbfa92c7dbb880b5c, 0xbf74547708842f2b, 0x3f902047ab6c08c4, 0xbf7888355239e9ec, 0xbf50313bb85e86e1, 0x3f58ced9ddf3d834, 0xbf32d520499bd799 ],
[ 0x3fef62fe80272419, 0xbc8b7c2d17fc31d3, 0x3fb297db960e4f63, 0xbc522bea9385fad9, 0xbfbecb83b087b37b, 0x3fbbce18363bbbb9, 0xbfa985aaf97891cb, 0x3f3cd95f2aa8601a, 0x3f8ab9d43270d20f, 0xbf79b93410d46789, 0xbf29b530b472cadf, 0x3f552f54de527458, 0xbf36844d43c7d693 ],
[ 0x3fef848acb544e95, 0xbc8b27aa2c376c3c, 0x3fae1d4cf1e2450a, 0xbc4783e14555c1e9, 0xbfb9e12e1fde7354, 0x3fb8a27806de834f, 0xbfa91674e13a339a, 0x3f73bc75e8f9d448, 0x3f851b4d09ac47b8, 0xbf796dc7b5f9bd66, 0x3f3e16520532bde9, 0x3f50e742b323f434, 0xbf3ac319bfed91d4 ],
[ 0x3fef9f9ba8d3c733, 0x3c8cd5790ff03ab3, 0x3fa83298d717210e, 0x3c4740e2b04276bf, 0xbfb58d101f909971, 0x3fb58f1456f7db5e, 0xbfa808d17b33b814, 0x3f80c1bdce673b10, 0x3f7f5ff1c06e9df2, 0xbf77f26b8865f398, 0x3f4f87060e6f6460, 0x3f48c6056bea9223, 0xbf3e3499a90b84f5 ],
[ 0x3fefb54641aebbc9, 0xbc879975513f67e7, 0x3fa34ac36ad8dafe, 0x3c0902fb5363d360, 0xbfb1c8ec267fe9e2, 0x3fb2a52c5d83c050, 0xbfa68541b2c0582c, 0x3f85afe422155ad5, 0x3f756303c111cd8a, 0xbf7597ead749c06a, 0x3f557b0870a7b4cf, 0x3f3ffc0efb0ac024, 0xbf39e3ea349ab39e ],
[ 0x3fefc67bcf2d7b8f, 0xbc80d2748f976e8c, 0x3f9e85c449e377f3, 0xbc3cb7ccd2616394, 0xbfad177f166cce53, 0x3fafe23b75845cdf, 0xbfa4b120f9dde895, 0x3f88d9906d138bd5, 0x3f69201b7e469e83, 0xbf72aceacb2954f0, 0x3f58d4e8140dc518, 0x3f300a33f7e93047, 0xbf372b7adfeee575 ],
[ 0x3fefd40bd6d7a785, 0x3c860d45e630998f, 0x3f97f5188610ddc8, 0xbc360e8565137ecb, 0xbfa7954423f89a51, 0x3faaf5baae337ae6, 0xbfa2ad77b77d17dc, 0x3f8a7b8c4a8d53fe, 0x3f54593adc5d737a, 0xbf6ef1cf14455c9c, 0x3f5a1a04ce289b4b, 0x3f03d14f37840954, 0xbf350b861df174ee ],
[ 0x3fefdea6e062d0c9, 0xbc764c70f379f670, 0x3f92a875b5ffab56, 0x3c0531231987c3b8, 0xbfa2f3178cd7aa03, 0x3fa68d1c45b96efe, 0xbfa09648dd332653, 0x3f8ad8b148089c02, 0xbf2f00fa01e6ca19, 0xbf68718785b34600, 0x3f59a7b0da775387, 0xbf2090258ede6532, 0xbf2b3980b454d442 ],
[ 0x3fefe6e1742f7cf6, 0xbc8cebced8a49e04, 0x3f8cd5ec93c12432, 0xbc2bb85326a5eff3, 0xbf9e2ff3aaae31e4, 0x3fa2aa4e58242520, 0xbf9d049824fc44db, 0x3f8a34eda0fc336e, 0xbf5682d8d1801582, 0xbf6239bf51e17ea8, 0x3f57e761274bf059, 0xbf301e715d70d49f, 0xbf24d89f3d9c30d5 ],
[ 0x3fefed37386190fb, 0x3c872b1549ea44ee, 0x3f861beae53b72b7, 0x3bf401790f84b248, 0xbf97d6193f2417ad, 0x3f9e947279e4a43b, 0xbf99060301092cdc, 0x3f88d14d4bdaa7f4, 0xbf61f795ac880380, 0xbf59222edb6bd145, 0x3f553f95c7b01615, 0xbf3529b07d094e1d, 0xbf15b533d0382e20 ],
[ 0x3feff20e0a7ba8c2, 0xbc603f86c5a13f78, 0x3f80d1d69569b82d, 0xbc1a5e866bd1366e, 0xbf92a8ca0dc14852, 0x3f98cc071b719c43, 0xbf954a148886e917, 0x3f86e91361df3c9e, 0xbf665c02e0d08291, 0xbf4e94b0adc3b1ca, 0x3f5210781b57b089, 0xbf37b88f8c82fbff, 0xbf068df27e9a1688 ],
[ 0x3feff5b8fb26f5f6, 0xbc87e917ec20b615, 0x3f79646f35a76624, 0xbc1f771f32fd191b, 0xbf8cf68ed932f081, 0x3f93e8735b5b73b1, 0xbf91e1611aabcbea, 0x3f84afd8cd100d70, 0xbf68c72005b1cfcf, 0xbf3c6a7216b336aa, 0x3f4d577412afc2e2, 0xbf3836a0c0e10a99, 0x3eca8f39f410252a ],
[ 0x3feff87b1913e853, 0xbc73ca98afc58454, 0x3f730499b503957f, 0xbbfd1eabb1c04f50, 0xbf86496420203331, 0x3f8fa73d7eb1b70d, 0xbf8daa3005c2d3fe, 0x3f8250942c31c3ad, 0xbf6997578dc240a8, 0xbf03904177639e63, 0x3f46a6ed488a1f54, 0xbf371cf0c5789c7d, 0x3f043cb84231ab1c ],
[ 0x3feffa89fe5b3625, 0x3c8934b2bcb7f9a3, 0x3f6c4412bf4b8f0b, 0xbbcbbcc9dca4ec60, 0xbf8100f34713740d, 0x3f88ebda0768e8e6, 0xbf8850c68e8e5c3c, 0x3f7fdac8346071b3, 0xbf6929de70d00321, 0x3f310c7101bc52d8, 0x3f4070f7e89ec1e2, 0xbf34e4b3dcf4f08d, 0x3f0f0d43b9869b19 ],
[ 0x3feffc10194fcb64, 0x3c8ea14750ac9b59, 0x3f64d78bba8ca5fd, 0x3be4d9a93566b5b4, 0xbf79ba107a459ce4, 0x3f836f273fbd909b, 0xbf83b38708f7bef7, 0x3f7b3fdff1de2112, 0xbf67d55d55d262d8, 0x3f3eae5e05e74fcc, 0x3f35ebc1e53214a9, 0xbf31fd7c1cd5d63e, 0x3f149559a04c8568 ],
[ 0x3feffd2eae369a07, 0xbc683b09df7f7db4, 0x3f5e7f232d9e2630, 0x3bfa26ac725599e5, 0xbf734c7442de142b, 0x3f7e066bed09942f, 0xbf7f914f2c60b9bb, 0x3f76f4662f6be13b, 0xbf65e664591d6604, 0x3f43a1598d880f36, 0x3f2965b2e78a4544, 0xbf2d8db42b193729, 0x3f1449172919598e ],
[ 0x3feffdff92db56e5, 0xbc78aeef4ee0690a, 0x3f56235fbd7a4345, 0xbbe11380fe434056, 0xbf6cb5e029ba8f3d, 0x3f76fa4c7ef470e9, 0xbf7903a08305eeb0, 0x3f730f12c83fdb23, 0xbf639d769a774af1, 0x3f45d79439ceaefd, 0x3f15326883e7dfeb, 0xbf27199782285958, 0x3f147181c8911603 ],
[ 0x3feffe96a78a04a9, 0xbc82816fe4528f9b, 0x3f4fe41cd9bb4eee, 0x3bde3be508cae7ec, 0xbf652d7b2896626a, 0x3f716c192d8803dc, 0xbf739bfce9b4ecc2, 0x3f6f376a554e5dec, 0xbf612e67cb7aa486, 0x3f466d6e460b1614, 0xbed54f70e4bde32b, 0xbf210e125571fe1e, 0x3f12842d46eb9f29 ],
[ 0x3fefff0312b010b5, 0x3c8155dec9cdc96b, 0x3f46caa0d3582fe9, 0xbbc97d95851163fc, 0xbf5efb729f4be121, 0x3f6a2da7cec01564, 0xbf6e6c27ad2b1ce0, 0x3f693b1f34b17723, 0xbf5d8179cd2ad34f, 0x3f45cf51e0add9bb, 0xbf116d8f4b5119c7, 0xbf1768557564f5f5, 0x3f0f4fc9dde73f24 ],
[ 0x3fefff50456dab8c, 0xbc5a197a986f0de0, 0x3f40295ef6591848, 0xbbd262bd83520706, 0xbf5679880e93e5c4, 0x3f637d38e3a705af, 0xbf675b371a264745, 0x3f64231c3bfe3e65, 0xbf58e184d4921105, 0x3f445d5b5a7f77fa, 0xbf1bf8ece4afedd2, 0xbf0ccd677aaa82f7, 0x3f09e5241d5b6b15 ],
[ 0x3fefff86cfd3e657, 0xbc72e06adb26f84e, 0x3f36be02102b3520, 0x3bb448bcfd3cfe0c, 0xbf502b15777eb7c5, 0x3f5cc1d886874d5b, 0xbf61bff70664651d, 0x3f5fc0f76c943696, 0xbf54a22286622d3e, 0x3f4268887688a6e6, 0xbf20fa2692fd7da2, 0xbefcc13d1a82f742, 0x3f04153e6537aae5 ],
[ 0x3fefffad0b901755, 0x3c670d5c9a92b65c, 0x3f2fc0d55470cf51, 0xbbc6f2b03553d4c8, 0xbf47121aff59f6a1, 0x3f5506d6992fc8ff, 0xbf5ab596015fc183, 0x3f58bdd79a098723, 0xbf50d88da9deb868, 0x3f4031cdd07e4507, 0xbf222fc41430a37d, 0xbedb5cc9546afcec, 0x3efd7ea1c7b8fdb6 ],
[ 0x3fefffc7a37857d2, 0xbc797b30fd4b6b48, 0x3f25feada379d8b7, 0xbbc0546c4da57036, 0xbf405304df546ed8, 0x3f4e79c081b79ebc, 0xbf53e5dc1062db15, 0x3f530eb20ccc1f98, 0xbf4b1b06c20a060d, 0x3f3bd52fbd55e0ef, 0xbf2214afb8835b23, 0x3ee19ae9d16650a0, 0x3ef42d933ee154fd ],
[ 0x3fefffd9fdeabcce, 0x3c80c43c3bc59762, 0x3f1e3bcf436a1a95, 0xbba6458a28a3f9b6, 0xbf36e95311166825, 0x3f45e3edf674e2db, 0xbf4d5be6d15abe3a, 0x3f4d07da13e640c2, 0xbf458106cc648748, 0x3f376c840985e5eb, 0xbf2111de112b1a2e, 0x3ef315fc34053fbd, 0x3ee939439a75a553 ],
[ 0x3fefffe68f4fa777, 0x3c32f21786b76440, 0x3f149e17724f4d41, 0x3ba747684f0023e4, 0xbf2fe48c44d2ab81, 0x3f3f2bd95d72a532, 0xbf457389188a71a9, 0x3f45decc4058f7a1, 0xbf40d559cf0f2957, 0x3f33583904af6f83, 0xbf1efd7979333337, 0x3ef904cf9fa5c1f6, 0x3eda13a094bd56a2 ],
[ 0x3fefffef1960d85d, 0xbc8f7cc78053f6ad, 0x3f0be6abbb10a5aa, 0xbb9e50b219d40126, 0xbf260403819b22b8, 0x3f35fff1dde5305e, 0xbf3f0c93c73e7f42, 0x3f404cbf67af6c26, 0xbf3a04893510426c, 0x3f2f66b51a7bc4a0, 0xbf1b410d7f2fd319, 0x3efb99f9eb427956, 0x3ebf26fcffb14441 ],
[ 0x3feffff4db27f146, 0x3c8ddecdd5e1d408, 0x3f02bb5cc22e5db6, 0x3b9c5112eca8acde, 0xbf1e258948829ed1, 0x3f2ec8a8e59d9d5b, 0xbf36425722b9f3cd, 0x3f380a83a7103b4b, 0xbf33dbb9374004f9, 0x3f2913b301d37bde, 0xbf17563b0d94459f, 0x3efbc01eea9a10be, 0xbeb3df26463df6a5 ],
[ 0x3feffff8b500e77c, 0xbc71014e1f83ed4c, 0x3ef8f4ccca7fc90d, 0x3b9a5d4ec8b9de43, 0xbf1478cffe1cd2ed, 0x3f2559f04ad4de62, 0xbf2f9e163b15c466, 0x3f318bda8b8c1315, 0xbf2df381bd3c058e, 0x3f23b94f531bb6be, 0xbf1385f32481ed94, 0x3efa414bd2b7cb3c, 0xbecac2bbe30f8767 ],
[ 0x3feffffb43555b5f, 0x3c8c17f83b8d73a2, 0x3ef07ebd2a2d2844, 0x3b9d1bbdc704f49b, 0xbf0b93e442837f52, 0x3f1d5cf1514977f3, 0xbf263f5eb46877fd, 0x3f295a0411e668b1, 0xbf2652e5f2a88269, 0x3f1e950ddb7f5444, 0xbf0ffeb9383bdb3d, 0x3ef7c24392346fdd, 0xbed1f3b3254d7230 ],
[ 0x3feffffcf23ff5fc, 0xbc8b18a8b25039c4, 0x3ee5a2adfa0b4bc4, 0x3b8eb6d61aaaf95c, 0xbf026c8826ed9e85, 0x3f140473571d5383, 0xbf1f057dbf365c0a, 0x3f22217929fed933, 0xbf207324014ddb42, 0x3f1762758a56d654, 0xbf09ba250c662e90, 0x3ef4c25759179e3d, 0xbed3e800358f1a7b ],
[ 0x3feffffe0bd3e852, 0xbc5d7ece4ab53150, 0x3edc282cd3957eda, 0x3b6eb3cf4fd14280, 0xbef86ad6df7ba401, 0x3f0b0f313eeb65a6, 0xbf156e457745d637, 0x3f19ad1f65a78253, 0xbf17f92ad8542929, 0x3f11a5578c0d30b3, 0xbf04548d876bb0a3, 0x3ef19e60bf53b25a, 0xbed3f1745170e2d3 ],
[ 0x3feffffec2641a9e, 0xbc8e7ba4fdaaa8c8, 0x3ed22df298214423, 0xbb5a9d49552152a4, 0xbef00c902a4d5e27, 0x3f022234eb745941, 0xbf0d57a2be01db67, 0x3f1200c2ffad65f1, 0xbf1147585d43f49a, 0x3f0a4b07aec797e9, 0xbeff9d088bbeff64, 0x3eed2b2be4e42422, 0xbed2bb57c0cf2941 ],
[ 0x3fefffff37d63a36, 0xbc6753e3241c01b0, 0x3ec74adc8f4064d3, 0x3b6de8a904d5c372, 0xbee4ed4228b3da96, 0x3ef81918baca1979, 0xbf03e81c09c29601, 0x3f09004afed1bde9, 0xbf08a40e183ee3fc, 0x3f0359242a8b8c58, 0xbef834b953bcb845, 0x3ee79e345fb0b20d, 0xbed0bb2d323900ce ],
[ 0x3fefffff82cdcf1b, 0x3c8046bbe9897fd5, 0x3ebd9c73698fb1dc, 0x3b588de36481dfb5, 0xbedb11017e7d5893, 0x3eefc0dfadc2c6d6, 0xbefac4e1aa499ac6, 0x3f0131810ab2e2e3, 0xbf01629d94abc864, 0x3efc22a71036c259, 0xbef244452f74de31, 0x3ee2bf17664310c1, 0xbeccd1b31a8349be ],
[ 0x3fefffffb248c39d, 0x3c89b9a41713558c, 0x3eb2acee2f5ecdb8, 0xbb32d1692a9a105c, 0xbed15cc5700a2341, 0x3ee4be757b934819, 0xbef1d6ab6f8cbf7c, 0x3ef76c5a3035bdab, 0xbef847332578dfac, 0x3ef437f23f8d25ff, 0xbeeb305e625a092d, 0x3edd3886ff986fef, 0xbec81f2189b385a2 ],
[ 0x3fefffffd01f36af, 0xbc8d41915db812ef, 0x3ea75fa8dbc84bec, 0x3b3a5cd79572a1a6, 0xbec6186d9fc357c5, 0x3edae02322e08822, 0xbee79082befd50ca, 0x3eef9c26e211b174, 0xbef0c768235c378b, 0x3eecba7164e1064f, 0xbee3f75c28c31ac8, 0x3ed663fcfff77e44, 0xbec3a6da35f36ee6 ],
[ 0x3fefffffe2ba0ea5, 0xbc826cd7908cba2b, 0x3e9d06ad6ecdf971, 0xbb3020b74d9d30fb, 0xbebbe46aa879edb2, 0x3ed143860c49d129, 0xbededabcbc3e620d, 0x3ee52139c87e9c82, 0xbee6f567cd982028, 0x3ee42ebd266abd62, 0xbedcf2f0c6adfb3e, 0x3ed0e2c0ed67786c, 0xbebf50cb81b9b190 ],
[ 0x3fefffffee3cc32c, 0x3c7e429188c949b8, 0x3e91e1e857adc568, 0x3b32439f8a1649bb, 0xbeb1769ce59fb2c8, 0x3ec5fe5d47560794, 0xbed405da04875e51, 0x3edbfc96a938083d, 0xbedf19ff5e59cbe9, 0x3edc0c4d50d275bf, 0xbed4b9df120462ae, 0x3ec916640ee35de4, 0xbeb874483d99c37e ],
[ 0x3feffffff54dab72, 0xbc8a443df643729a, 0x3e85dcd669f2cd34, 0xbb1ceb1ec59e0c28, 0xbea5b11cbd1ee799, 0x3ebbc91a6b1c1839, 0xbec9c2c5d12dfa2c, 0x3ed25d1e3c70364f, 0xbed4dbe26c88e4f7, 0x3ed347bb8350b422, 0xbecd51d3280da8a0, 0x3ec25ed8e5b466b5, 0xbeb2b9c5d3390919 ],
[ 0x3feffffff99b79d2, 0xbc758ff1c425f8de, 0x3e7a854ea14102a9, 0xbb121745e4b4fcb3, 0xbe9aba593e8384ae, 0x3eb167c252a45678, 0xbec06d78ca0424a3, 0x3ec7e0f59fcfa53d, 0xbecbb4d48383b847, 0x3eca39f3ad9a397f, 0xbec47e836879c374, 0x3eba89244d14b829, 0xbeac33e15a6dbe37 ],
[ 0x3feffffffc355dfd, 0x3c688cb60fd4511c, 0x3e6febc107d5efab, 0xbaed9ed10902067c, 0xbe9055a3c70279a4, 0x3ea59ff37766e9a7, 0xbeb4c53adb9dcc4d, 0x3ebec49242997849, 0xbec23927ad6ac54f, 0x3ec1a6e0676c7463, 0xbebc5239f6a88a96, 0x3eb2e991308bf6fa, 0xbea4e276c09fe81b ],
[ 0x3feffffffdc4ad7a, 0xbc8d75de787812d4, 0x3e630f93c3699079, 0xbaf8f941ab38e9da, 0xbe83ce2f890bb01d, 0x3e9aa5010863c83b, 0xbeaa08ef1ca16360, 0x3eb3a4a6af3cafac, 0xbeb7be1e832218f0, 0x3eb784775c30c386, 0xbeb3593046482ce3, 0x3eaa9d448178fbfd, 0xbe9e77bb85451c65 ],
[ 0x3feffffffeb24467, 0x3c8bff89ef33d6dd, 0x3e56961b8d641d07, 0xbaf74a7fc97b1544, 0xbe77d2510f1f969d, 0x3e90476b165ac852, 0xbea02d3a3b9d195e, 0x3ea8db3567bef1df, 0xbeaea3ef4e3a126b, 0x3eaf03b0861a59ac, 0xbeaa250ca467705a, 0x3ea27e9995f6dfcd, 0xbe95e77b673c6d74 ],
[ 0x3fefffffff3e8892, 0x3c5befbf8d294678, 0x3e4a8e405e651ab7, 0x3ab167a2d8cf6b18, 0xbe6c6c40e5083698, 0x3e83ba47a17512fd, 0xbe93ee334beef6ec, 0x3e9f2bf9e6c43e99, 0xbea395c08ac8e281, 0x3ea43ee4b521ccad, 0xbea178f0deeb9b20, 0x3e9964e51b0f0532, 0xbe8f0cc4ecca5c2f ],
[ 0x3fefffffff90b2e3, 0xbc7d82d94a90f1e4, 0x3e3efac5187b2864, 0x3acf1301ae680614, 0xbe60d229044adeee, 0x3e77b5bc9db47d00, 0xbe88588212e670c2, 0x3e935f42db1989fa, 0xbe98cd98865c4ff0, 0x3e9a2b8587c48078, 0xbe971aa2de99af9c, 0x3e913a89805c15d9, 0xbe85b53ca1bcf01a ],
[ 0x3fefffffffc0748f, 0x3c66ef7a9caef280, 0x3e31edfa3c5f5ccb, 0x3ac368f60e2e6cfa, 0xbe53c025a6810c37, 0x3e6c42f78a0989ad, 0xbe7d7c6c3583c6e3, 0x3e87dd6ccb5c93b4, 0xbe8f1ec2f699fdcc, 0x3e90bf7a04407a8c, 0xbe8e3aafe6dfd4e0, 0x3e871bc3a55b63f4, 0xbe7df66b11724e7c ],
[ 0x3fefffffffdbff2a, 0x3c749438981099b2, 0x3e24979ac8b28928, 0xbacc2f44bcf3ce52, 0xbe47015eec37753a, 0x3e60b487791590cf, 0xbe71b44b64c3c995, 0x3e7d23ff3ef8dd83, 0xbe8357d673d1ccfc, 0x3e853a563ce0e9e3, 0xbe83921106a960f6, 0x3e7ea527d318f96e, 0xbe746bd6cea7103d ],
[ 0x3fefffffffebc1a9, 0x3c7e0e5facabfab4, 0x3e177756ec9f78fb, 0x3aae20366d0e0306, 0xbe3a9530780ca70c, 0x3e53962ecb10df65, 0xbe651494525dee64, 0x3e71a2961b90efb0, 0xbe77d35cd0b404bf, 0x3e7aa596d9d73afb, 0xbe791493d8d43ba2, 0x3e74184505343c2d, 0xbe6b7d977f1a3402 ],
[ 0x3feffffffff4b453, 0x3c859b25048a61cc, 0x3e0a887bd2b4404f, 0xba82556d8ad4dd44, 0xbe2e78be33fb01da, 0x3e46c6ef0b68629e, 0xbe58e36e9a44c497, 0x3e65286ee37c531e, 0xbe6d146395886537, 0x3e7090902855d5f0, 0xbe6fd0d1e8fcb6df, 0x3e6a10f65c3c5a7b, 0xbe624888c323daf3 ],
[ 0x3feffffffff9bec8, 0xbc76755054654b62, 0x3dfdc479de0ef004, 0xba9c3434581af3b8, 0xbe21535aee3eb1b2, 0x3e3a4547ed264758, 0xbe4d2308d0dead0f, 0x3e5929d46a9a7edc, 0xbe6195dbfd4afd19, 0x3e646630f49ccd2f, 0xbe63fa4637c64ebc, 0x3e60b98a6e0cfc02, 0xbe58093f032972f3 ],
[ 0x3feffffffffc901c, 0x3c69c951c943961c, 0x3df0916f04b6e18d, 0x3a81bdf9650721ea, 0xbe138b90f78fbe14, 0x3e2e0d7765326885, 0xbe40e9760d0ac127, 0x3e4daad91166722d, 0xbe5513c51b9838ed, 0x3e58e27fb85ba534, 0xbe58d6f6bd99eaff, 0x3e553c31e52fff08, 0xbe4f3bfd31796bc0 ],
[ 0x3feffffffffe202d, 0x3c8a54841f566a61, 0x3de24caf2c32af16, 0x3a802e3358112fa1, 0xbe05dfa962d49548, 0x3e210ca1ff2af812, 0xbe3377c7e98dd9b4, 0x3e4156649e0b5dd2, 0xbe49092f4db426c5, 0x3e4e12a29b227972, 0xbe4e94e18d5271a9, 0x3e4aae38927ee69b, 0xbe441121b0293be1 ],
[ 0x3feffffffffefc57, 0xbc68225a9658ef84, 0x3dd40dfd87456f4f, 0xba7a6d5c55f8e63b, 0xbdf848f101ce14c8, 0x3e132fed47f8dd28, 0xbe2638ff4a6975f2, 0x3e3416d25168a6b8, 0xbe3d78fb22f58668, 0x3e42009c6b4e61ea, 0xbe42a459e59c850b, 0x3e4096a3e8dac0ea, 0xbe397fba69de37d8 ],
[ 0x3fefffffffff748e, 0x3c6ae15e36044aac, 0x3dc5ce9ab1670dd6, 0x3a4cc9bbfb723fc4, 0xbdeabf69bd9866f7, 0x3e056ae1e8abbbbf, 0xbe1927ca04d1a7a8, 0x3e2713d3b07d7a36, 0xbe31318f5d7d717b, 0x3e355ab94fdfd1f4, 0xbe368216fb90717a, 0x3e346ad5ce577d65, 0xbe30065a20073e81 ],
[ 0x3fefffffffffb5b0, 0xbc850fb19119064f, 0x3db7872d9fa10ab2, 0xba57760afdf543a4, 0xbddd39eaac4a0b47, 0x3df7b67ab8af33d6, 0xbe0c3ced54e694ea, 0x3e1a4875d8a47f12, 0xbe23e213e6f5c296, 0x3e2919137301f897, 0xbe2aea6bd9b34930, 0x3e28e06e4ab5925f, 0xbe23ed1d979421b2 ],
[ 0x3fefffffffffd8b3, 0xbc65182469c211e0, 0x3da92ff33023d5c3, 0xba42932180032bd1, 0xbdcfae4fe28d12dd, 0x3dea0a80964d6e97, 0xbdff6f47be478e2a, 0x3e0dad968cdacb13, 0xbe16ca68a8bfdb81, 0x3e1d3a79e5305b4a, 0xbe1fe1534ebf69c7, 0x3e1e01ee76d92779, 0xbe1883ed9069f3fd ],
[ 0x3fefffffffffeb60, 0xbc74d3f53e684bf8, 0x3d9ac0f5f322937a, 0xba38e8ab19224e58, 0xbdc108dc99cf03e5, 0x3ddc5db17016a0c6, 0xbdf159f41ea079c3, 0x3e009ced3e9b7204, 0xbe09e4dace066800, 0x3e10dd5e0e9749b6, 0xbe12b3aa6599d0b5, 0x3e11eb5e8e4ffe8f, 0xbe0dd8955967ed31 ],
[ 0x3feffffffffff542, 0x3c6b57ed63ed8110, 0x3d8c324c20e337e5, 0x3a253fd8abf42ed9, 0xbdb22c6b11327305, 0x3dcea5f66f89cbd4, 0xbde2ff1e0a81bedc, 0x3df270ddbd8e501f, 0xbdfd2992b5c25c93, 0x3e03492d76bdf266, 0xbe05bc7361853dde, 0x3e053121ae3f1d2e, 0xbe01fb0f7e3f242b ],
[ 0x3feffffffffffa73, 0xbc76fead614b7934, 0x3d7d7c593130dd16, 0xba08e78574fe0514, 0xbda33c1e2f16e037, 0x3dc06c53fdc74764, 0xbdd4a029a87915ac, 0x3de44bd86238ff0d, 0xbdf0474ac3a80072, 0x3df5db2a89e9bc47, 0xbdf906f4b51a7f75, 0x3df8d189784c1f50, 0xbdf571a4760f483d ],
[ 0x3feffffffffffd27, 0x3c719e1a84064c56, 0x3d6e9810295890f9, 0x3a0f998d55766fdb, 0xbd943262ab4b77b2, 0x3db1756eae580a28, 0xbdc6359d5b0d251e, 0x3dd626391bd58994, 0xbde203efc6c9f556, 0x3de88c0b111be900, 0xbdec8ca211a38811, 0x3decc911f684d612, 0xbde950e3edf09a71 ],
[ 0x3feffffffffffe8d, 0x3c5e766e2c801398, 0x3d5f7f338086a87b, 0xb9ddfa0c27b527e0, 0xbd8509f766d9f287, 0x3da268e278ede221, 0xbdb7b7b43e9a1b0e, 0x3dc7f7aadab6b398, 0xbdd3c3cc6aafba0b, 0x3ddb52c69b4ab6de, 0xbde0222c438d1182, 0x3de0888e14314f83, 0xbddd96aaea63b362 ],
[ 0x3fefffffffffff45, 0xbc85948eec884df5, 0x3d501647ba79874e, 0x3986d5d39dabc300, 0xbd75be1cf20840dc, 0x3d93418096320daf, 0xbda91e9beb94b447, 0x3db9b762261756a7, 0xbdc57f320a630c91, 0x3dce24b78ce82b11, 0xbdd2112fff5c77aa, 0x3dd2cfdd93a41786, 0xbdd11ea1f35b4d2b ],
[ 0x3fefffffffffffa2, 0x3c6d07509a1a9440, 0x3d404e15ecc7f401, 0xb9d0858e34f7a6a6, 0xbd664ac1f9b95f96, 0x3d83fa8302ade993, 0xbd9a62b70897719e, 0x3dab5c619266e9f0, 0xbdb72de32129cbb8, 0x3dc07ae94305c398, 0xbdc40c45a9e95152, 0x3dc533d127efdf16, 0xbdc39dc242ba4cda ],
[ 0x3fefffffffffffd1, 0x3c83b6fc0b729759, 0x3d3065b9616170e1, 0xb9c49459f5147526, 0xbd56acaa58a8be12, 0x3d748fb92d0947e7, 0xbd8b7ce1a1ea8ea5, 0x3d9cddc552bbebeb, 0xbda8c751cc1a5784, 0x3db1dc79b52007b0, 0xbdb60b3d17e7714c, 0x3db7ac1d379afc28, 0xbdb641ca84798564 ],
[ 0x3fefffffffffffe9, 0xbc55fe91226dd510, 0x3d205ca50205d279, 0xb9c7a281f9edb8e6, 0xbd46e18ec0d42451, 0x3d64fdb051100a15, 0xbd7c66b3f3fe565e, 0x3d8e331281475b54, 0xbd9a42e6965b2b9a, 0x3da3301ef4931960, 0xbda804fcc1524d74, 0x3daa2ef0c13a3daa, 0xbda9028a915f98d3 ],
[ 0x3feffffffffffff5, 0xbc8238f8ed17d9b3, 0x3d10330f0fd69931, 0x39ba2c00e0c6dcba, 0xbd36e8334c65749d, 0x3d5541d561058477, 0xbd6d1ac042ada69e, 0x3d7f54864c5a530e, 0xbd8b984c73c1d301, 0x3d946ec7009c291f, 0xbd99efc2df737760, 0x3d9cb12ac38f37ca, 0xbd9bd54fcd67b8d4 ],
[ 0x3feffffffffffffb, 0xbc8efa4d64f59f62, 0x3cffd3de10d6287a, 0xb99e1fdae91c5cfe, 0xbd26c073be0916e6, 0x3d455a8eab9e129a, 0xbd5d94c87c1bc304, 0x3d701db0818bec24, 0xbd7cbfbe4c0ef6ee, 0x3d859179d8c519c4, 0xbd8bc172710440bd, 0x3d8f26a4f726814e, 0xbd8ead889e052555 ],
[ 0x3feffffffffffffd, 0x3c86be96953fe014, 0x3cef05e82aae2be2, 0xb98070a8237b4337, 0xbd166b44c6d7ddb6, 0x3d35474bd9d072f3, 0xbd4dd1e8c33100cc, 0x3d60711486984913, 0xbd6db2522b66a6ce, 0x3d76919a06329739, 0xbd7d6fe8f87926e8, 0x3d80c1488010ff5c, 0xbd80bf9fa407e9ab ],
[ 0x3fefffffffffffff, 0xbc80fecc5ed770de, 0x3cde00e9148a1d52, 0x394f7a503c7a2ad8, 0xbd05eaaa4200e355, 0x3d25088b6566fced, 0xbd3dd0b48e0f634e, 0x3d50a27116d7478e, 0xbd5e6a3e1d5c214f, 0x3d6769249755a4bc, 0xbd6ef16049050b69, 0x3d71dbf2744f66db, 0xbd721c636bd8f5a9 ],
[ 0x3fefffffffffffff, 0x3c8989c6c5d51227, 0x3ccccaaea71ab110, 0x394152f323a1f3b4, 0xbcf541a2f15eb476, 0x3d149fd53e85cdf3, 0xbd2d9144beee6b4a, 0x3d40b09b02f533a1, 0xbd4ee312fcf48076, 0x3d5812ed2f01f60a, 0xbd601e6391f47ad7, 0x3d62dce8f6b8c896, 0xbd6365d5011db0df ],
];
#[rustfmt::skip]
pub(crate) static ERF_POLY_C2: [[u64; 27]; 47] = [
[ 0x3fcac45e37fe2526, 0x3c648d48536c61e3, 0x3ff16e2d7093cd8c, 0x3c9979a52f906b4d, 0xbfca254428ddb453, 0x3c69c98838a77aea, 0xbfd59b3da8e1e176, 0xbc41f650c25d97b0, 0x3fb988648fe88219, 0xbc55aecf0c7bb6c1, 0x3fb803427310d199, 0xbc5a14576e703eb2, 0xbfa09e7bce5592c9, 0x3c3eb7c7f3e76998, 0xbf9516b205318414, 0xbc2941aa998b1fa4, 0x3f8038d3f3a16b57, 0x3f6e19d52695ad59, 0xbf59542e7ed01428, 0xbf41f9b6e46418dc, 0x3f30796a08a400f4, 0x3f12610d97c70323, 0xbf025d31d73f96d1, 0xbee05e1fa9e02f11, 0x3ed1e616f979139c, 0x3ea9b3d54f1f222a, 0xbe97ad96beea439a ],
[ 0x3fd5da9f415ff23f, 0xbc4a72e51e191950, 0x3ff05fd3ecbec298, 0xbc9f17d49717adf8, 0xbfd477c8e7ee733d, 0xbc792236432236b7, 0xbfd1917b60acab73, 0x3c7c06e6c21b4b3b, 0x3fc322a728d4ed12, 0x3c3ffa8aef321410, 0x3fb04c50a9cd2c12, 0xbc4edd0562dce396, 0xbfa7ce764eeddd86, 0x3c29afeb391c029c, 0xbf868aac5801171d, 0x3c24f9655411fc03, 0x3f862aa895f51cd3, 0x3f56c003c3cedb10, 0xbf6079502dbbafff, 0xbf1d9c7cbb799b47, 0x3f345a995aede3f4, 0x3eb0c04ea8c98fc9, 0xbf057edfa53128d0, 0x3eba96286bf3ef56, 0x3ed3c8ab12e6d24b, 0xbe97454eba0cb203, 0xbe8f02a6f6847617 ],
[ 0x3fddb081ce6e2a48, 0xbc77ff0a3296d9cb, 0x3fedd167c4cf9d2a, 0x3c844f2832f90a97, 0xbfda173acc35a985, 0xbc3c5432c9a22740, 0xbfc889a80f4ad955, 0xbc6f6123bf467942, 0x3fc6c2eea0d17b39, 0xbc61f4935c3cf5b1, 0x3f9b0645438e5d17, 0x3c37a5f08ebaf9d0, 0xbfaa3fd9fcbb6d6d, 0x3c494a1b58b5916f, 0x3f2060b78c935b8e, 0x3bb9cec375875a1c, 0x3f8678b51a9c4b0a, 0xbf51e03bfc8eebb4, 0xbf5e653535cab33f, 0x3f355f31366d2c5c, 0x3f30dcf1445cbb88, 0xbf1098913ad4dcc7, 0xbeff6e252329eeed, 0x3ee41ad0a5afe51d, 0x3ec8fd4609222f1c, 0xbeb4465926de1a35, 0xbea407a1f42b46d4 ],
[ 0x3fe25b8a88b6dd7f, 0x3c89534a3b5bd215, 0x3fea5074e2157620, 0x3c7fad8c0ef6fae6, 0xbfdd9a837e5824e4, 0xbc71d19ec86adc7c, 0xbfb9c41d1d5fae55, 0x3c374c230d6afba4, 0x3fc75bebc1b18d1c, 0x3c501ece95d4dffc, 0xbf86410ad9332666, 0xbc216523d167a40c, 0xbfa7df8890b11fa7, 0x3c4d6a99d1387564, 0x3f84a54816d3608a, 0xbc2f810ad06699cc, 0x3f818f36eb18f3d7, 0xbf68d661c030e174, 0xbf53628ede23e249, 0x3f4438eb2b3c4d27, 0x3f1fd3c13e725e91, 0xbf1991b866a32c87, 0xbee1237c600dab6f, 0x3eea9c701140d4c0, 0x3e71801e61adfdda, 0xbeb785516863e6ce, 0xbe83e033ef590125 ],
[ 0x3fe569243d2b3a9b, 0x3c78eef7012e8df4, 0x3fe681ff24b4ab04, 0xbc5dba6493354c70, 0xbfdef2bed2786b25, 0xbc7ae3f6b6b2b679, 0xbf8a4254557d722f, 0xbc20ff7bffd10053, 0x3fc532415c267962, 0x3c62eacc4bd2e841, 0xbfa558b4c55a835c, 0x3c3c21c40815d70a, 0xbfa1b7ad5b777f1b, 0xbc42115b2bd8d644, 0x3f91201d3bd0e758, 0xbc0b39b845442560, 0x3f72995e3a88a890, 0xbf70294c3e93cdb0, 0xbf3159644a564f83, 0x3f463daf9b3858ef, 0xbf03beeb4a1255ac, 0xbf180c5178c36c72, 0x3eed4f6f5bab7dfa, 0x3ee521deb6d2f46e, 0xbec4ef3208231a8b, 0xbeae7d2b4e06e4a2, 0x3e921536d5b8bdf9 ],
[ 0x3fe7fb9bfaed8078, 0x3c766cf14bcad032, 0x3fe2a8dcede3673b, 0xbc77378e2c70325e, 0xbfde5267029187c0, 0x3c7add23841b110a, 0x3fafe0796bb9d05a, 0xbc37a992e13ce574, 0x3fc0fa23021ad0ac, 0xbc417f4228359928, 0xbfafa21ebca76761, 0x3c3278ca2820f66c, 0xbf931546d5c4edb4, 0x3c136fcf151892a0, 0x3f937e5469efb7a6, 0xbc39553630321d4f, 0x3f2097966e2e87ea, 0xbf6e82ab020887a7, 0x3f4318270c11ae74, 0x3f412652e433da97, 0xbf24dc9bd6368bb8, 0xbf0c441138d4ff53, 0x3efc91d8dc5b66ec, 0x3ecf3ba57b86d474, 0xbecdd3403d11a818, 0xbe731f497a106a7c, 0x3e9436dbcc93d342 ],
[ 0x3fea1551a16aaeaf, 0x3c6a558a46df5f68, 0x3fddfca26f5bbf88, 0xbc6ddcbaf85587b6, 0xbfdc1cd84866038f, 0xbc7200885b97f453, 0x3fbe4c9975da0987, 0xbc5f162e7576c79c, 0x3fb747e31bf47af3, 0xbc56178f12d62ed9, 0xbfb1d1f00109e42a, 0x3c5002b06e023544, 0xbf647654175ceb42, 0x3bd683389ccacfa8, 0x3f91a817c594b8cb, 0x3c336ac477166efb, 0xbf6cb8acd699cca6, 0xbf657b72bf874db6, 0x3f524493dca8b6fa, 0x3f2f556774c6aaf6, 0xbf2b09ec5c8ba626, 0xbed09bd1a09f38e8, 0x3efd149c3e776976, 0xbec8f7c2a6575e92, 0xbec8391d4afaf16a, 0x3ea5a7552081d1d5, 0x3e932d1bb2d1d0ca ],
[ 0x3febbef0fbde6221, 0xbc8322c1148e0d48, 0x3fd75a91a7f4d2ed, 0x3c56eb826a9df85c, 0xbfd8d03ac274201c, 0x3c57a5c56eb7f6a0, 0x3fc3954778d6a0df, 0xbc5863eca74d1838, 0x3fa88e0f7b183fc6, 0x3c4226527d05ce39, 0xbfb0f7c15f75ee13, 0xbc156f74f3513660, 0x3f85e22cfa1aab51, 0x3c24b49a250c6474, 0x3f89ad28c5557c22, 0x3c299920b730ecd5, 0xbf7704ec5d29fc83, 0xbf523360304f19ba, 0x3f543ca3fcdf079d, 0xbf0dcb97a9e04bd4, 0xbf2735e26c43d267, 0x3f0360c3b06ffbb4, 0x3ef29a6b5798e781, 0xbeddbc35e4cf98f5, 0xbeb2f6e8e81287bb, 0x3eaeeb2fdddad355, 0x3e81ae65e387ac52 ],
[ 0x3fed0580b2cfd249, 0x3c84fca6318dfee9, 0x3fd1a0dc51a9934d, 0xbc6ca89d2d78fba4, 0xbfd4ef05a0f95eeb, 0xbc65f7c55a00231c, 0x3fc5648b5dc47417, 0xbc6fb8fa09976e07, 0x3f840fbaba44504c, 0x3bf435c75f61f1e0, 0xbfac0db89d0a41a4, 0xbc11dd02d9441b98, 0x3f9388c3ec056942, 0x3c38e7498172c914, 0x3f7aecb7463cf446, 0xbbe0d6701a009d70, 0xbf78bca53327e075, 0x3f34add4a8239f4a, 0x3f505ce4abd10484, 0xbf3183f198a0b620, 0xbf19cd1a9b9fc69b, 0x3f0d30363021af83, 0x3ecda66f2161c4c6, 0xbedf41f1f238827d, 0x3ea725a07b1177b7, 0x3ea84c3b2483eb6a, 0x3e6e30e89d6e85cd ],
[ 0x3fedf85ea8db188e, 0xbc8f71e8254d11a9, 0x3fc9cb5bd549b111, 0xbc4973e73caa1edc, 0xbfd0ed7443f85c33, 0xbc574bf040302ad8, 0x3fc5066cda84bba9, 0x3c4beb86d9e281a8, 0xbf9419fa10b6ed7d, 0x3c35157491034c58, 0xbfa3f41761d5a941, 0x3c494a1c1f7af153, 0x3f96d1d724baaae4, 0x3c3c41090a704426, 0x3f4e377f5703f7ff, 0xbbea753be0c53963, 0xbf74cc916ad63c27, 0x3f5553ef0d12719f, 0x3f426240f55987fd, 0xbf36bbf0fffb7138, 0xbee320cf6663c40d, 0x3f0a9d4850aaa197, 0xbee17036c4011c91, 0xbed441ea26a91a02, 0x3ebd81eb8e2ef452, 0x3e917d7b798a4322, 0xbe4b7b0dfb2559d0 ],
[ 0x3feea7730ed0bbb9, 0x3c82c5bd7ce1388b, 0x3fc24a7b84d38971, 0x3c6aa0c5e788ed5e, 0xbfca4b118ef01593, 0xbc3238e3e6a99de0, 0x3fc319c7a75f9187, 0x3c4a8f8fff24b0ac, 0xbfa3db5bed47faf6, 0x3c429cf699c8512c, 0xbf97019bda6c2fdd, 0x3c1dd56b84622d88, 0x3f959d3aa402c32e, 0xbc08de701f1e95e8, 0xbf6b324eab9c87a9, 0xbbec3a4329771a44, 0xbf6b4774d37d0dd6, 0x3f5c01377485a844, 0x3f1a5db5f627539b, 0xbf340d9c429b8932, 0x3f0e720d935ef7db, 0x3effc8295ac052de, 0xbeed1ccde95c6551, 0xbeb251c256ca45cb, 0x3ebe892cc5397b1b, 0xbe88f6831febdf3d, 0xbe9aa5ef30a52421 ],
[ 0x3fef21c9f12f0677, 0xbc57efe429672268, 0x3fb92470a61b6965, 0x3c5c6acd40cee352, 0xbfc3a47801c56a57, 0xbc6033705aa16f01, 0x3fc0453f90d3bd35, 0xbc6686e281ba5405, 0xbfa8a7c6a239217b, 0x3c32a988808a7222, 0xbf8075c088031ee3, 0xbc1665bd0a645f40, 0x3f916f9c9c127b80, 0xbc1e1813af47374c, 0xbf774c2fc9bdfe97, 0x3c15cf2dbe53783b, 0xbf5760c522bd5bec, 0x3f5a3cdb656adb44, 0xbf302c3c1ab0a7ba, 0xbf292892013c7e15, 0x3f16e7b268d42034, 0x3ed970751eb9359f, 0xbeeb00b549bbdf58, 0x3ec033f8545bcc6a, 0x3eb2d8b6f0a2204a, 0xbe9c1c1335b105c5, 0x3e661bbb2d003b8a ],
[ 0x3fef74a6d9a38383, 0x3c8c33a329423946, 0x3fb0bf97e95f2a64, 0xbc5446051f6fef82, 0xbfbc435059d09788, 0xbc4b93aeb5e5cf84, 0x3fba3687c1eaf1ad, 0x3c564513fb767a13, 0xbfa9647a30b16824, 0xbc486357831221be, 0x3f66981061dfbb09, 0xbbfccc83193c8742, 0x3f87e8755da47040, 0xbbec1eaeb3371490, 0xbf79be731fdab95d, 0xbc0ab79fedbfccd2, 0x3f23a95ae0a75542, 0x3f5319f780e962d8, 0xbf3b88dd51a4f261, 0xbf1037f168a8f581, 0x3f153fc5e83e3199, 0xbee9d5bf30917222, 0xbee03045c999d17a, 0x3ecb5d376e96179f, 0x3e8c66d2e5aa2274, 0xbe9aef24a52bcaca, 0x3e7b20b678e8a0c6 ],
[ 0x3fefab0dd89d1309, 0xbc8ae61bd9db1bab, 0x3fa5a08e85af27e0, 0x3c4e4f9cfc8c2382, 0xbfb399812926bc23, 0xbc5b782644df6665, 0x3fb4140efb719cb0, 0x3c308fa5a48311e8, 0xbfa7535a61a4193d, 0x3c359e0501c376b2, 0x3f8374c88c7e6abd, 0x3bfc2578bd7e3f00, 0x3f7a40709e010e77, 0xbbf18c33197d9138, 0xbf76dc078888efa7, 0x3c02b49da4c86c70, 0x3f52ee6d200993b0, 0x3f444f175e22a161, 0xbf3c2fb051c92f92, 0x3f0523035ed3964b, 0x3f0bc7b666856fc1, 0xbef574549f39ee50, 0xbebc57f3c47b39d9, 0x3ec8acc76ac31fcd, 0xbe9f70e8b7deaa9a, 0xbe8e1a28a0c1a6a6, 0x3e6bfa0e5b606c5e ],
[ 0x3fefcdacca0bfb73, 0xbc82c33d88729e43, 0x3f9b1160991ff737, 0xbc2d940a504353bc, 0xbfaa38d59456f77d, 0xbc1d625808eb9778, 0x3fad5bd91b6b0123, 0x3c222b86f5e3e16c, 0xbfa3b35dcbc80146, 0x3c482838d776d958, 0x3f89d76b0a0535c7, 0x3c260fda06bca0a0, 0x3f614c887a83a0e6, 0x3be55ef222558d68, 0xbf7117f42cc6e9f4, 0x3bed4213a7e14a18, 0x3f59b477bdad8e08, 0x3f21d219fb0e1bc8, 0xbf35bb59d3ca4fa9, 0x3f18ca373c577821, 0x3ef4a9b74153a4a3, 0xbef424a8a8831410, 0x3ec6ce0877965abc, 0x3ebc1ed3c11b1dd1, 0xbea86b0a731d831a, 0xbe55cea3996396c5, 0x3e9640950bde5eb3 ],
[ 0x3fefe307f2b503d0, 0xbc68a555000387f8, 0x3f906ae13b0d3255, 0xbc388abd7f4be982, 0xbfa0ee3844e59be7, 0xbc40b0ec94b96d83, 0x3fa48b127f8ed8a5, 0x3c3b6a1f18c2c162, 0xbf9f155b4e7d8c3b, 0x3c3adb2d99b0c1fc, 0x3f8aa2c0753d569a, 0x3c29a37b9864b8e6, 0xbf4bbf7e2795837b, 0xbbe4784a66288abf, 0xbf65478d784d271c, 0x3be27115917a7ec0, 0x3f58eae08cdf9546, 0xbf292946556037e6, 0xbf290f27ae61444c, 0x3f1b076b78538f02, 0xbedb2906f1b92d5d, 0xbeea2f66822d4a01, 0x3ed3031c4f7c4a97, 0x3e941708ced2abd0, 0xbea45ffd6deae2a8, 0x3e7e844ebdc8456a, 0x3e7c0bbf2b711595 ],
[ 0x3fefefcce6813974, 0xbc5b27cf5025d1c8, 0x3f834d7dbc76d7e5, 0x3c23780d6e7eb351, 0xbf951cc18621fc23, 0x3c2969629e4b64a6, 0x3f9b925a99886bb7, 0x3c29c8f65efdd1f4, 0xbf971e7d408c8c6f, 0xbc3c5621deaf4cfc, 0x3f87ea58080a81ef, 0xbc22f25b7f384ff3, 0xbf646eb9d203e071, 0x3beff569e38360a4, 0xbf5403333682fa5e, 0xbbe36256a95953a6, 0x3f53b37d5bd14a40, 0xbf36be130822dbdf, 0xbf103d4bcdafd553, 0x3f155848476c8142, 0xbef5492bf3c6eee6, 0xbed3823d4328e9c5, 0x3ed152fefc353e5a, 0xbea5199dbf7bc4c6, 0xbe94dda2bebe08f2, 0x3e83fb850b47210a, 0x3e6bcd1b284c4798 ],
[ 0x3feff733814af88c, 0x3c70a87238cea4fa, 0x3f75ff2750fe7820, 0xbc15f184847ca667, 0xbf896f0575a63ae5, 0x3c295f4139297a96, 0x3f91c5a643f04363, 0xbc16ea87997fba3c, 0xbf904f5caaf2196f, 0x3c119502347d3b54, 0x3f8382a146afb9d2, 0xbc0f93bde902d2d0, 0xbf695cab93aa68d2, 0x3be0f716a5fc18c4, 0xbf2d2fd90fe62928, 0xbbb4e00d5fcc484a, 0x3f49f50fb94c0b86, 0xbf37d7378074399b, 0x3efcc0c9cb9ede1e, 0x3f092a3a29471895, 0xbef7c127858c909a, 0x3eb5a72fde935a48, 0x3ec57b9d90a92106, 0xbeafdb8443754cf7, 0xbe56c7d633eab55a, 0x3e7ddcfc714a2b67, 0xbe89fedf738e84b4 ],
[ 0x3feffb5bdf67fe6f, 0x3c14e830346f6e80, 0x3f684ba3004a50d0, 0xbbf90b93d4632206, 0xbf7d9c2ea85a927d, 0xbc10bcf1ea93cfdc, 0x3f860898536e104a, 0xbc1ab6aa911c445e, 0xbf85eb1c899f0b70, 0x3c21bc22eed1f1fb, 0x3f7d854f73e74c87, 0x3c07a977a3364c40, 0xbf6897719a9d257e, 0xbbeab523e3f93994, 0x3f388cdc8b807c97, 0x3b94875acc7c06a0, 0x3f3b325a11c1f45a, 0xbf3381548f692740, 0x3f12b1fd05559bfa, 0x3ef1ed31cd6feb26, 0xbef29cf593fdf00a, 0x3ed1cea99b59228c, 0x3eaceff221e3598a, 0xbeab0ad4b899b2d9, 0x3e83761b047e21d1, 0x3e696c31c2256049, 0xbe60a714c57f7adf ],
[ 0x3feffd9f78c7524a, 0x3c804ed6ff98e45d, 0x3f5a024365f771bd, 0x3bf3c8f5202cb405, 0xbf70a9732d5284dd, 0xbc11acbd0899ce7e, 0x3f7a4bf47a43042a, 0x3c0e6cb2580d0920, 0xbf7c23802d8a5bb7, 0xbbd9963700abfc80, 0x3f74f40070668329, 0xbc1e1fe1c0e1182a, 0xbf64c9a2c9dccd04, 0x3c080fb9c9cd78c1, 0x3f44f7a50b5bc019, 0xbbd40906b7a1de3a, 0x3f218b04eb90c737, 0xbf2a4c3880c0ea69, 0x3f14b7b82a86f423, 0xbed0bc762b1c2aaa, 0xbee589d6f8892acf, 0x3ed357ab63f7bdf9, 0xbe96675858bbff5e, 0xbe9ea96dcb12a15c, 0x3e88c572fcf5610e, 0xbe3700c93da86dee, 0x3e57ae9ceb75e26e ],
[ 0x3feffed167b12ac2, 0xbc8ddc0ce3ed8fcb, 0x3f4afc85e0f82e12, 0x3bd438f22895e03e, 0xbf6221a9f326bef4, 0xbbf99642b37af330, 0x3f6e3c9aab90bcf4, 0x3bb7dcdfdccc72a0, 0xbf714b1b98141f21, 0x3c1af6edf50eba66, 0x3f6c1c19b9e63d70, 0x3bd4d1e9411f1d28, 0xbf5feac3dbeb5124, 0x3be2400e6ffbc1c8, 0x3f463e88178b0e49, 0x3be3e4ae97774f91, 0xbf04441c86c93f39, 0xbf1c8ceebc5fc50b, 0x3f1125b77a79aa6c, 0xbeeda7be990bc718, 0xbece019960474aff, 0x3ecd229185ef6279, 0xbeacea9fa10885e7, 0xbe8044fd6a2e447a, 0x3e83695f88fc641d, 0xbe60c0dc0ba0d589, 0xbe89194748828b93 ],
[ 0x3fefff6dee89352e, 0x3c8b96c0ba13851d, 0x3f3b23a5a23e4210, 0x3bc727bce1be0014, 0xbf5315107613c673, 0xbbf823f8673f5b7a, 0x3f60c243329a9ca1, 0xbbf65e361cefe652, 0xbf64630116262084, 0xbc00ea6ee40daf79, 0x3f61e84d1022e8cb, 0xbbd9b77b85eed4f0, 0xbf56b41872716325, 0x3bd3e9e001100f64, 0x3f436edde582b265, 0xbbe1cb479a94e148, 0xbf1f7870ebc38e77, 0xbf051ecfdc37801d, 0x3f0711d817e0d3b6, 0xbef0ae90d500d1d8, 0x3eaa85b1bf54920c, 0x3ebfe73958205038, 0xbead222bfef33aa4, 0x3e7833f8b13b1a4e, 0x3e7233b5a19285db, 0xbe61adcf574b7db6, 0x3e7ab10bedc44532 ],
[ 0x3fefffbb8f1049c6, 0x3c7d2c6266b51f26, 0x3f2a740684026555, 0xbba7e24cc3ac5710, 0xbf436d34c8f1c26a, 0xbbe69d73e7d1c977, 0x3f51eb6e14974a25, 0xbbf99b78600e0664, 0xbf5714eb8cc0947f, 0x3bf3613f37c7410b, 0x3f55bec08c01b1d7, 0xbbf3e3a262f6c68a, 0xbf4e4621d82dad12, 0x3bc302878843e2cc, 0x3f3e1b7b564b0e79, 0x3bcf894fc1f14d54, 0xbf224564b69716aa, 0x3ebbf8e3b47f3ccd, 0x3ef8f55a9be1a264, 0xbeeb3b76e6203281, 0x3ec713c795a07e0c, 0x3ea3bb092cfd93e0, 0xbea473b0a8333dee, 0x3e8645526869c143, 0x3e41a343e004b33d, 0xbe576c7e253faad1, 0x3e7e16080963cffe ],
[ 0x3fefffe0e0140857, 0xbc66aa36f86c14dc, 0x3f18fdc1b2dcf7b9, 0x3b87050f50b8f308, 0xbf3322484cf12daa, 0x3bd4cc0408806d4f, 0x3f427dc1bc6cfef5, 0x3beffbb5229f6bb7, 0xbf49202f465eb421, 0x3ba8f3f063b40660, 0x3f493b4c9746835f, 0xbbe04e2d6df2fce5, 0xbf430e9e6142fe9b, 0xbbd0396045094744, 0x3f3555b9d5fb4825, 0x3bd9a40d2ca5ef0b, 0xbf2055983c4ac7a6, 0x3ef68e6c75a5d068, 0x3ee2d4a50d2757ce, 0xbee1de08b56479aa, 0x3ec9110ccc7fe6fd, 0xbe8bb3184d789af8, 0xbe94629a164e82a0, 0x3e8413b087ee5e4d, 0xbe5648d7786f9fbc, 0xbe4293289f8c327d, 0xbe6c283008e726f7 ],
[ 0x3feffff2436a21dc, 0xbc83607959a29d36, 0x3f06e2367dc27f95, 0x3b6d96e6f0151020, 0xbf223c436c36fdab, 0x3bbf0d77fc600a50, 0x3f326bf00867a835, 0xbbdc92e1aecdc750, 0xbf3a51fb50b15f22, 0x3ba248227c6d2260, 0x3f3c0825378fda08, 0x3bd5a8a09c053451, 0xbf36c3dbfe0cbe4a, 0xbbde65769c33f8a1, 0x3f2c1dd1438378df, 0x3ba91bd161f34158, 0xbf194c36a9d7c0dc, 0x3efbf0aab116ca41, 0x3e86bdbd2f103930, 0xbed2b32e8d43ef25, 0x3ec3a7403459770b, 0xbea17411873320fa, 0xbe735bb2691c9b29, 0x3e798313537ed069, 0xbe5cb4b60e85a341, 0x3e02be214cf4c9eb, 0xbe8350a1a851865a ],
[ 0x3feffffa1de8c582, 0x3c8832540129302a, 0x3ef44f21e49054f2, 0x3b8f338cf4086346, 0xbf10d18811478659, 0x3bb914a7a08b6a2b, 0x3f21b964d438f622, 0x3bca52c94c56aaaf, 0xbf2a8d7851f26bf0, 0x3bcc38dbf3ee1223, 0x3f2ddd6df9b6852d, 0xbbc3b0dd7eac9b91, 0xbf29e52b7aac1644, 0x3bc904036dfb5764, 0x3f2165b2034fcab2, 0x3bc27beac4bf3866, 0xbf11b75c3332673a, 0x3ef91a253c42f4e7, 0xbed020b498095051, 0xbebade63f30809ae, 0x3eb89bb0d75e59b7, 0xbea180c78d3dca28, 0x3e6cabfd39b38553, 0x3e66013ffba86cfd, 0xbe564f2b123e1f0b, 0x3e335bf3e5021105, 0xbe8177828ffd35af ],
[ 0x3feffffd8e1a2f22, 0xbc8c10adf6b19989, 0x3ee1783ceac28910, 0xbb87f19d8ee58337, 0xbefe06a8b37e5b93, 0x3b824e8db1358f2e, 0x3f107978c7b8496b, 0x3bbf163b5580927c, 0xbf19d039884f8be5, 0x3bbfce53cd30b1eb, 0x3f1e8d1145e94a54, 0xbbbd0f6e009a99ee, 0xbf1c1f7251172a87, 0xbb83ce0f013dfe90, 0x3f1458b9e0854d68, 0xbbb897cf3950b1a7, 0xbf06eb0557245429, 0x3ef33045cf65279e, 0xbed42c8adf18ab62, 0x3e491109b80f9918, 0x3ea83a9b44249fbf, 0xbe99bcbaf0a8dfd1, 0x3e7900325b58a857, 0x3e34a3cf9c161684, 0xbe50cbcc4d0a916a, 0x3e34275e1b91f084, 0x3e839180c75350e1 ],
[ 0x3fefffff039f9e8f, 0xbc89d1bcd6174e99, 0x3ecd21397ead99cb, 0xbb46abd9c029c47c, 0xbee9f19734d29cf9, 0x3b820c4383da36c1, 0x3efd982bd41d8954, 0x3b8d9bc9988e9666, 0xbf08320fc4836be5, 0x3b7526638b9926a8, 0x3f0e0a1cb1d071f3, 0x3b9d9f5d232bab90, 0xbf0d384223047b9c, 0xbba30d0b2b8a170d, 0x3f0696daf6422bd4, 0x3baba6ac732f399e, 0xbefbb6e2d311a93f, 0x3eea4fcb0ea87efb, 0xbed1c940c5303daf, 0x3ea7469913f4e9c6, 0x3e8ef4b4f8ab67ae, 0xbe8e189c28e8e041, 0x3e7678b281d5bc55, 0xbe49c3bf4e9f2b5d, 0xbe374c9ba997ffed, 0x3e2b4b843f8c7068, 0x3e6c901764507862 ],
[ 0x3fefffff9d446ccc, 0xbc6bb06bab98bc80, 0x3eb789fb715aae95, 0xbaf226d93bf89b40, 0xbed5b333cc7f98f1, 0xbb76bd1091d25440, 0x3ee9b12fdbf90f62, 0x3b8d4b6b0ee9cf46, 0xbef5e06923144d70, 0x3b4c593194857860, 0x3efc6a071925631d, 0xbb8835ef595952e4, 0xbefd178cb0388a82, 0xbb9039272760f01c, 0x3ef7e29d33ac92b6, 0x3b921ff8b0e9d5eb, 0xbeef9203429baad6, 0x3ee094dadeee395c, 0xbeca771cf3500d9f, 0x3eab8fd1c29c21ea, 0xbe5cc8573d7de110, 0xbe7b0362da1722cb, 0x3e6e5eae518f94e9, 0xbe507963addd99a6, 0xbe23f496093d0bef, 0x3e199078a326092d, 0x3e842681ecfe4da1 ],
[ 0x3fefffffda86faa9, 0xbc7d230252d68f26, 0x3ea26f9df8519bd7, 0xbb4e339871c015b7, 0xbec1926290adc888, 0xbb6e36d23dbb2644, 0x3ed5900c02d97304, 0x3b7fa7d21e3ed616, 0xbee3166de6a8c640, 0x3b8b014157867958, 0x3ee9dfcc328729e0, 0x3b820e9fee0b7665, 0xbeebcab1ed5ec38d, 0x3b6d9003794f0fe0, 0x3ee81cd74a57ce17, 0xbb8809fde9c0f6f5, 0xbee106e95b6bf556, 0x3ed379625a71385f, 0xbec1970a5b5bd443, 0x3ea74761c8333ff2, 0xbe80864e125c9951, 0xbe5b83bf9019aa3b, 0x3e60397611c35b28, 0xbe4a25392adb29ac, 0xbe17b832af40d9d4, 0x3df62a02eb79577b, 0x3e8a6da58ffe94f4 ],
[ 0x3feffffff233ee1d, 0x3c8db123ed17221d, 0x3e8bfd7555a3bd68, 0x3b20151cf177b53a, 0xbeab8d7f804d2e73, 0x3b482b366f0bc2dc, 0x3ec17f93e5149289, 0x3b391997bfd26568, 0xbed013b0457d08fa, 0xbb60d6d5a7f06298, 0x3ed6b245d7e1d829, 0xbb79985e02c8ce3b, 0xbed98077548c6951, 0x3b701cd3f1d12c93, 0x3ed7492048ab3ceb, 0xbb70368a0dc0750e, 0xbed17506c7b39cb0, 0x3ec57e94a4c5f5a6, 0xbeb570971200d7db, 0x3ea0a0f956947b21, 0xbe81a9b7bd5bba32, 0x3e451bfc00de3146, 0x3e495b6967f79cbe, 0xbe3fe3c43cb3cf84, 0x3e32f364a7a2dc5f, 0xbdf007442a10cc14, 0xbe7ef5ab6fc5e849 ],
[ 0x3feffffffb127525, 0x3c8504f382db4102, 0x3e74980cb3c80949, 0x3b17fbdd923f8057, 0xbe94ea6ce697296f, 0x3b3ea42f9c9de533, 0x3eab771d9b6f07b8, 0xbb1e9c1ca9662fe8, 0xbeba26c653fad5b8, 0xbb5146c4cee0e898, 0x3ec3302bb89379de, 0x3b64c55b83ef7a68, 0xbec67f42e5264334, 0xbb66779da26b4197, 0x3ec58b4adafb958e, 0x3b68351251b45e84, 0xbec10f576796285a, 0x3eb66ca44250dd07, 0xbea84ee0ada37543, 0x3e953b6065291e6b, 0xbe7c09ebfd0c581c, 0x3e563062625d59c0, 0x3e1e259c60eb7b83, 0xbe2e43802ad25514, 0x3e351bcdabe8cda5, 0xbdf930fc3df6e909, 0xbe881cdd770e1c81 ],
[ 0x3feffffffe4aed5e, 0x3c4389c0f32ad0f0, 0x3e5d5f3a8dea7357, 0x3affa07c18622dd2, 0xbe7ebfb14c9170c0, 0x3b19e40632b4145d, 0x3e94d9228525f449, 0x3b2d35bd7f959136, 0xbea48b536addac5f, 0xbb461ace22b32569, 0x3eaf48ccf23a68e2, 0xbb4ee1d13c79c281, 0xbeb3183b6134cf03, 0x3b4e1f4d5fe2a06c, 0x3eb31efde2215f01, 0xbb564a7021e23fba, 0xbeafd9eeb0f18fdb, 0x3ea63414459ae298, 0xbe99dda81be20b5a, 0x3e88da7d306423c5, 0xbe7303da86a4fc28, 0x3e54f5e1327706b9, 0xbe23efb5eefcbe53, 0xbe131bc5ce1ce65d, 0xbe13eafe1b05c93f, 0xbdf47fc2d9cc851e, 0x3e7d27265006a9df ],
[ 0x3fefffffff6d1e56, 0xbc864d969b4be4c4, 0x3e444d26de513197, 0x3ae76fc20fc4b365, 0xbe65e32de7af8977, 0xbaf888fd6ae18a1c, 0x3e7e9e05b3c8f38a, 0x3b17532141b12aa7, 0xbe8f2f6fa7db5b1d, 0x3b2b3bf498e3462c, 0x3e9899dcace485eb, 0x3b11885a0ae9e878, 0xbe9f34b7eef3c9b2, 0x3b3294a3b618b470, 0x3ea04be030272d14, 0xbb4df83095e40f79, 0xbe9c73bd22571559, 0x3e94edda838439f5, 0xbe89fc860b504677, 0x3e7b0d686a260420, 0xbe672370c2fdbe10, 0x3e4ee29f0d197d25, 0xbe2b4d88d500c5be, 0x3dc96014c45b0178, 0x3e0238f19dc8fd82, 0xbde8d34d46ae6567, 0xbe454105fe4a9cd8 ],
[ 0x3fefffffffd01f89, 0xbc735e8e39884f62, 0x3e2b334fac4b9f99, 0x3ac32178ed1a4971, 0xbe4e2cec6323e50e, 0xbac0e5693f9d4908, 0x3e65c027d5bba36a, 0xbaefc46fb3cc7ae0, 0xbe76df4d024fffbe, 0xbb090fd7226ec57a, 0x3e82aaf7c205b9ea, 0x3b2dbec2005b45a8, 0xbe88902edfbfefdd, 0xbb2c353aca58d08a, 0x3e8ab2ab1b338249, 0x3b2b498186c39105, 0xbe885abe0ff198d3, 0x3e82d32f7c3621eb, 0xbe78c141c71dbc95, 0x3e6b9fa6fbb9b198, 0xbe59db5fe2c2f5b9, 0x3e43b8e07840483e, 0xbe26d95e5070d91d, 0x3dfd7616168b0e49, 0x3e1f2be0744b3a5f, 0xbdd737a375809985, 0xbe7936d4936fb865 ],
[ 0x3feffffffff0dd2b, 0x3c80df73e7d2fc98, 0x3e11a94ff571654f, 0x3abfbf537b47967d, 0xbe34251f33f5578f, 0x3ad4c9cece8f41b2, 0x3e4de6bc1f75bb9b, 0x3a894afb459a3000, 0xbe6036b5fd1c4158, 0xbb0d582afa097896, 0x3e6b58f1385def96, 0xbaf8778854601996, 0xbe72a2347efb2133, 0xbb026f9e1ef0f378, 0x3e7508db866ffe00, 0x3b164de561a68a21, 0xbe73ffea934685b9, 0x3e702ff87b2e2576, 0xbe666e54eae5fa4b, 0x3e5a9ea2195c567d, 0xbe4ae3b91fecafa1, 0x3e36bb883d2e5ed1, 0xbe1ee10e97715c11, 0x3dfe2873d2b77f1f, 0xbdeaf385ae29d57b, 0xbdbd793eecfc2513, 0x3e420d80dcfa68d1 ],
[ 0x3feffffffffb5be5, 0xbc7729d6819c7f34, 0x3df63ac6b4edc88e, 0xba7c45991835da24, 0xbe1a0ce0dc06a706, 0xbab1b72d11da9dab, 0x3e33e380dd7593a5, 0xbad8ad868a7b5674, 0xbe4638bc4fb02cba, 0x3a87a84506fcda40, 0x3e535753ad4c5875, 0x3aead190ab170366, 0xbe5b41f33cafccba, 0x3af0e3539bf61116, 0x3e5fe694e371a659, 0xbad3a84e01866ea8, 0xbe5f8af0121aa0ab, 0x3e5aa77274dab3d8, 0xbe53616fe8f6a259, 0x3e484fddf4c681a1, 0xbe3a3de05d1b8a31, 0x3e2822529aca9f83, 0xbe126c3dfba84378, 0x3df64c287a84aa09, 0xbe0107d2dac5d83b, 0xbd4e251d1ab1d873, 0x3e58f37005f17b42 ],
[ 0x3feffffffffe9eb0, 0xbc5ea527e0bef1e8, 0x3ddb1e5acf351d87, 0x3a5dc96583ba19f0, 0xbe005042a0a5f3c3, 0xbaa2023f0f13867c, 0x3e199ac8fd63c66c, 0xbaabf57c5fd0501a, 0xbe2d72344378e114, 0x3acc77758959af41, 0x3e3a6be9a123435b, 0x3acdab4af8807c36, 0xbe433aacb4bf6dea, 0x3aebd241ea49ac35, 0x3e474b732e7ceaa7, 0x3adc7c89730b0264, 0xbe47e7eab6531ccb, 0x3e450959f2daae39, 0xbe3ffed4cef94261, 0x3e351c7f99f908a2, 0xbe282b5fd5fbedfc, 0x3e17e1c8e715c978, 0xbe051536822c861b, 0x3debe7e4c220ca82, 0x3e2f5bb67c461296, 0x3da1d7cf04529bf0, 0xbe8acc021ab828c4 ],
[ 0x3fefffffffff9a1b, 0xbc66a87270d2450c, 0x3dc0084ff125639d, 0xba58ad61debedc86, 0xbde3ca42adaa26f6, 0x3a8c20c6583dccdd, 0x3dffe73513c67bf8, 0x3a920d28c0c7e686, 0xbe12dd9aa5a2bee3, 0x3aad76d7235461be, 0x3e216ef6b93944a8, 0xbacf07bd785566de, 0xbe2a2d58e9b22b26, 0xbab19e6ea91dd55e, 0x3e306389b9748f25, 0x3adfbcc52565c0be, 0xbe316cdd9eb58ba2, 0x3e2fdd861b55c500, 0xbe29457846c943d2, 0x3e2178f3905f435c, 0xbe1518cf20c53de2, 0x3e06329939a34b66, 0xbdf5ef3ad85e5d3b, 0x3ddf2b41494e49e9, 0x3e2bad43bc0b622d, 0x3da21a45fa9dcebf, 0xbe8790b3d88f69fe ],
[ 0x3fefffffffffe380, 0x3c87ce07114e4fe0, 0x3da25f9ee0b923dc, 0xba4174c43a73a4d1, 0xbdc74105146a5162, 0xba47d0740e56625c, 0x3de33cde4f35d941, 0xba72a344950797c6, 0xbdf760fe7b666392, 0x3a9a8b77c82ed644, 0x3e063a70fd66d485, 0x3aa6b87715649d6d, 0xbe11324f6fb6dfa1, 0x3ab3fc045e39915f, 0x3e163a31a36b815c, 0xba502dec9bc1a700, 0xbe18724ca8970b91, 0x3e172e290891e5de, 0xbe131fc03858aab1, 0x3e0b9e8b0e7fa253, 0xbe01821a002637bd, 0x3df37ba5f3fba5eb, 0xbde3578bf23dc654, 0x3dcfdaf2015d7b54, 0x3df7f6a435069067, 0x3d99d14ee557ec62, 0xbe555f4c743ee571 ],
[ 0x3feffffffffff845, 0x3c7b0edc5a89ab8e, 0x3d846897d4b69fc6, 0x3a2a74852415bb49, 0xbdaa77a4e7dcd735, 0xba434edb43ab7de6, 0x3dc67543695dcc12, 0xba329ae577004af8, 0xbddc05c1e2fc710e, 0x3a5dbbf42d2537a8, 0x3deb639419fedf8e, 0xba8ed72eb9e7a59e, 0xbdf5cfd7eb9bfe87, 0xba7e97db27125fc0, 0x3dfd11578959ba45, 0xba8c0635ac2b5768, 0xbe0082f9e9f7eb37, 0x3e00354ceadad8b3, 0xbdfbc2dee0154fc6, 0x3df4e11efdc66eae, 0xbdebb357c0253f64, 0x3de035f9889bc29c, 0xbdc8e7bdb10b7441, 0x3dbe364571102661, 0xbe212cffcf49a2e8, 0x3d8ee9362bcfec26, 0x3e7cc5b58dd85301 ],
[ 0x3feffffffffffdf8, 0xbc8dcf8b10ff973b, 0x3d65f8b87a31bd85, 0x39d65b265455b658, 0xbd8d2e55024a0fb5, 0x3a2444e1d84cea02, 0x3da9612cc225df4b, 0xba4c784edb664ce7, 0xbdc03ee5f38b9b4d, 0xba691ca8efa41a30, 0x3dd04f2f71e2e96b, 0xba633f36a4e51350, 0xbddab7099f99ced9, 0xba54af7a67f2110c, 0x3de2554b8f609fd1, 0xba729e641eb44218, 0xbde57c87529ca968, 0x3de5cd182c967671, 0xbde3580a2517d57a, 0x3dde3be72b1be982, 0xbdd4e9908689ad08, 0x3dc9a61979d3395b, 0xbdb7b826aadd1c89, 0x3daad3a9fc4a0d1e, 0xbe00e9325ed20970, 0x3d80722198ff452c, 0x3e5c2ef85611aa11 ],
[ 0x3fefffffffffff7b, 0x3c800fa07f7fb612, 0x3d46ed2f2515e933, 0x39d2bc1802a42b92, 0xbd6f2a6c1669c901, 0xba07b3e174cc1840, 0x3d8bc42ba38a13f8, 0x3a2460463d59d3df, 0xbda2391e135afae4, 0xba3bd08c8c5f7b18, 0x3db2c6c24550f64f, 0xba3fdc861a487110, 0xbdbf9a3c1b0d63ec, 0xba5843dc8d9ad3d5, 0x3dc6502546ab341a, 0x3a645f812e48eb98, 0xbdcaf223186006d1, 0x3dcc388dd1764f41, 0xbdc9e65a242b52aa, 0x3dc4fcd2787781eb, 0xbdbe3cbdb20a48d6, 0x3db35639e9fcd410, 0xbd55b8e97774b2c9, 0x3d966ffe6a100bc9, 0xbe15706c390c113e, 0x3d6ff0d11cf61949, 0x3e72054de347e3f8 ],
[ 0x3fefffffffffffdf, 0x3c75669e670f914c, 0x3d272fd93e036cdc, 0x39b1c553d12fbbd0, 0xbd501f450d1e61b2, 0x39cbed807e60c078, 0x3d6d68fb81b2ed89, 0x39dc7ea3c4444cc0, 0xbd83c706aa4d2328, 0x3a2d6d2d51dd414d, 0x3d94e6479565838e, 0x3a350580f36c14c1, 0xbda20e9eb83b3dd9, 0xba104b6334a32fd0, 0x3daa35b9d2fcac80, 0x3a1c07c6978bf2f0, 0xbdb04a134f6e3dcb, 0x3db196579f27ddbe, 0xbdb0ab97aa74c700, 0x3dabf68355f542b1, 0xbda49da25a547134, 0x3d9bd64993a3958e, 0xbdc1193990186399, 0x3d81c0e98335ae18, 0x3e2e08edb685494a, 0x3d5cb9fcc058465b, 0xbe894cacccfb8964 ],
[ 0x3feffffffffffff8, 0x3c70160ef15c497e, 0x3d06ba91ac734786, 0xb9af81d6fa69b5b2, 0xbd3028a39099f4db, 0xb9c83ed68de15404, 0x3d4e292863e1795e, 0xb9db292e812abb68, 0xbd64c4e690fbdd14, 0xba080991e1d4ef25, 0x3d767e6e5ac60fd1, 0x3a01d2ca68dcf0e8, 0xbd83f00d80afa00c, 0xba23e174dc7225ac, 0x3d8db88ee63eb28a, 0x3a28abd97527892f, 0xbd92fe58a1f19368, 0x3d951dbeae22a5c8, 0xbd94a4d54823e0fc, 0x3d91e432d674cfba, 0xbd8b001e26c6e764, 0x3d8328ce695259fe, 0xbdb4e492cf7d4f4c, 0x3d6aaa77d339dc00, 0x3e2366538db382e5, 0x3d4826ad7d581503, 0xbe8056e0810b14da ],
[ 0x3feffffffffffffe, 0x3c759ab24e589a30, 0x3ce5982008db1304, 0xb981cf9bda64b38a, 0xbd0f610e8cde57ac, 0xb99884dcd86f98c8, 0x3d2df2dac2f2d47f, 0xb997f27bf279d988, 0xbd451b17f95fc0b4, 0xb9e063e04485c3e7, 0x3d576996ddc975d7, 0xb9e21489d6648428, 0xbd6546155a972b18, 0xb9b9d3fb518aa7c0, 0x3d70456ed89c4f24, 0x3a1eee772fc32c5e, 0xbd755d6295aa388a, 0x3d786ead99977388, 0xbd789b3d387efa6e, 0x3d76011e175e64f8, 0xbd70cd70515af47b, 0x3d69402199dfdde7, 0xbda806743bc32b08, 0x3d530e561550364a, 0x3e170093985e2c1b, 0x3d331999a27ace63, 0xbe735f54db0f4dbc ],
[ 0x3ff0000000000000, 0xbc8a6d7d18831888, 0x3cc3e296303b2297, 0x39668cf648cfed1c, 0xbced8456ef97c744, 0x398fcded17005500, 0x3d0ccb92e6c24c8d, 0xb9aa704dc202cff2, 0xbd24c1aa8cf1229b, 0x39c652efa61e4ec2, 0x3d37918b6b83c0fb, 0x39b2fb01fb8836dc, 0xbd45f073659de44d, 0xb9e9ceb48a2d1931, 0x3d5134d070b5921e, 0x39d9af3038fcc184, 0xbd5730a2938c09dd, 0x3d5b4091041f5905, 0xbd5c3c44ab8c8421, 0x3d5a06b4f4c3044d, 0xbd5704f511555fe7, 0x3d4fe51fcfc1acba, 0x3d95e7229a07e7cd, 0x3d39f8121f6c3146, 0xbe0692b2f9b3f445, 0x3d1c8c34f73d3823, 0x3e6301e540260d52 ],
];

960
vendor/pxfm/src/err/erfc.rs vendored Normal file
View File

@@ -0,0 +1,960 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::dd_fmla;
use crate::double_double::DoubleDouble;
use crate::err::erf::{Erf, erf_accurate, erf_fast};
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
static ASYMPTOTIC_POLY: [[u64; 13]; 6] = [
[
0x3fe20dd750429b6d,
0x3c61a1feb75a48a8,
0xbfd20dd750429b6c,
0x3fdb14c2f863e403,
0xbff0ecf9db3af35d,
0x400d9eb53ca6eeed,
0xc030a945830d95c8,
0x4056e8a963e2f1f5,
0xc0829b7ccc8f396f,
0x40b15e716e83c27e,
0xc0e1cfdcfbcaf22a,
0x4111986cc7a7e8fe,
0xc1371f7540590a91,
],
[
0x3fe20dd750429ae7,
0x3c863da89e801fd4,
0xbfd20dd750400795,
0x3fdb14c2f57c490c,
0xbff0ecf95c8c9014,
0x400d9e981f2321ef,
0xc030a81482de1506,
0x4056d662420a604b,
0xc08233c96fff7772,
0x40af5d62018d3e37,
0xc0d9ae55e9554450,
0x410052901e10d139,
0xc1166465df1385f0,
],
[
0x3fe20dd75041e3fc,
0xbc7c9b491c4920fc,
0xbfd20dd74e5f1526,
0x3fdb14c1d35a40e0,
0xbff0ecdecd30e86b,
0x400d9b4e7f725263,
0xc030958b5ca8fb39,
0x40563e3179bf609c,
0xc0806bbd1cd2d0fd,
0x40a7b66eb6d1d2f2,
0xc0cce5a4b1afab75,
0x40e8b5c6ae6f773c,
0xc0f5475860326f86,
],
[
0x3fe20dd75025cfe9,
0x3c55a92eef32fb20,
0xbfd20dd71eb9d4e7,
0x3fdb14af4c25db28,
0xbff0ebc78a22b3d8,
0x400d85287a0b3399,
0xc03045f751e5ca1d,
0x4054a0d87ddea589,
0xc07ac6a0981d1eee,
0x409f44822f567956,
0xc0bcba372de71349,
0x40d1a4a19f550ca4,
0xc0d52a580455ed79,
],
[
0x3fe20dd74eb31d84,
0xbc439c4054b7c090,
0xbfd20dd561af98c4,
0x3fdb1435165d9df1,
0xbff0e6b60308e940,
0x400d3ce30c140882,
0xc02f2083e404c299,
0x40520f113d89b42a,
0xc0741433ebd89f19,
0x4092f35b6a3154f6,
0xc0ab020a4313cf3b,
0x40b90f07e92da7ee,
0xc0b6565e1d7665c3,
],
[
0x3fe20dd744b3517b,
0xbc6f77ab25e01ab4,
0xbfd20dcc62ec4024,
0x3fdb125bfa4f66c1,
0xbff0d80e65381970,
0x400ca11fbcfa65b2,
0xc02cd9eaffb88315,
0x404e010db42e0da7,
0xc06c5c85250ef6a3,
0x4085e118d9c1eeaf,
0xc098d74be13d3d30,
0x40a211b1b2b5ac83,
0xc09900be759fc663,
],
];
static ASYMPTOTIC_POLY_ACCURATE: [[u64; 30]; 10] = [
[
0x3fe20dd750429b6d,
0x3c61ae3a912b08f0,
0xbfd20dd750429b6d,
0xbc51ae34c0606d68,
0x3fdb14c2f863e924,
0xbc796c0f4c848fc8,
0xbff0ecf9db3e71b6,
0x3c645d756bd288b0,
0x400d9eb53fad4672,
0xbcac61629de9adf2,
0xc030a945f3d147ea,
0x3cb8fec5ad7ece20,
0x4056e8c02f27ca6d,
0xc0829d1c21c363e0,
0x40b17349b70be627,
0xc0e28a6bb4686182,
0x411602d1662523ca,
0xc14ccae7625c4111,
0x4184237d064f6e0d,
0xc1bb1e5466ca3a2f,
0x41e90ae06a0f6cc1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6d,
0x3c61adaa62435c10,
0xbfd20dd750429b6d,
0xbc441516126827c8,
0x3fdb14c2f863e90b,
0x3c7a535780ba5ed4,
0xbff0ecf9db3e65d6,
0xbc9089edde27ad07,
0x400d9eb53fa52f20,
0xbcabc9737e9464ac,
0xc030a945f2cd7621,
0xbcc589f28b700332,
0x4056e8bffd7e194e,
0xc0829d18716876e2,
0x40b17312abe18250,
0xc0e287e73592805c,
0x4115ebf7394a39c1,
0xc14c2f14d46d0cf9,
0x4182af3d256f955e,
0xc1b7041659ebd7aa,
0x41e6039c232e2f71,
0xc2070ca15c5a07cb,
0,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6d,
0x3c5d3c35b5d37410,
0xbfd20dd750429b56,
0xbc7c028415f6f81b,
0x3fdb14c2f863c1cf,
0x3c51bb0de6470dbc,
0xbff0ecf9db33c363,
0x3c80f8068459eb16,
0x400d9eb53b9ce57b,
0x3ca20cce33e7d84a,
0xc030a945aa2ec4fa,
0xbcdf6e0fcd7c6030,
0x4056e8b824d2bfaa,
0xc0829cc372a6d0b0,
0x40b1703a99ddd429,
0xc0e2749f9a267cc6,
0x4115856a17271849,
0xc14a8bcb4ba9753f,
0x418035dcce882940,
0xc1b1e5d8c5e6e043,
0x41dfe3b4f365386e,
0xc20398fdef2b98fe,
0x42184234d4f4ea12,
0,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429b6a,
0x3c8ae622b765e9fd,
0xbfd20dd750428f0e,
0x3c703c6c67d69513,
0x3fdb14c2f8563e8e,
0x3c6766a6bd7aa89c,
0xbff0ecf9d8dedd48,
0x3c90af52e90336e3,
0x400d9eb4aad086fe,
0x3ca640d371d54a19,
0xc030a93f1d01cfe0,
0xbcc68dbd8d9c522c,
0x4056e842e9fd5898,
0xc08299886ef1fb80,
0x40b15e0f0162c9a0,
0xc0e222dbc6b04cd8,
0x411460268db1ebdf,
0xc1474f53ce065fb3,
0x417961ca8553f870,
0xc1a8788395d13798,
0x41d35e37b25d0e81,
0xc1f707b7457c8f5e,
0x4211ff852df1c023,
0xc21b75d0ec56e2cd,
0,
0,
0,
0,
0,
0,
],
[
0x3fe20dd750429a8f,
0xbc766d8dda59bcea,
0xbfd20dd7503fdbab,
0x3c6707bdffc2b3fe,
0x3fdb14c2f6526025,
0xbc27fa4bb9541140,
0xbff0ecf99c417d45,
0xbc9748645ef7af94,
0x400d9eaa9c712a7d,
0x3ca79e478994ebb4,
0xc030a8ef11fbf141,
0x3cbb5c72d69f8954,
0x4056e4653e0455b1,
0xc08286909448e6cf,
0x40b113424ce76821,
0xc0e1346d859e76de,
0x4111f9f6cf2293bf,
0xc14258e6e3b337db,
0x41714029ecd465fb,
0xc19c530df5337a6f,
0x41c34bc4bbccd336,
0xc1e4a37c52641688,
0x420019707cec2974,
0xc21031fe736ea169,
0x420f6b3003de3ddf,
0,
0,
0,
0,
0,
],
[
0x3fe20dd75042756b,
0x3c84ad9178b56910,
0xbfd20dd74feda9e8,
0xbc78141c70bbc8d6,
0x3fdb14c2cb128467,
0xbc709aebaa106821,
0xbff0ecf603921a0b,
0x3c97d3cb5bceaf0b,
0x400d9e3e1751ca59,
0x3c76622ae5642670,
0xc030a686af57f547,
0x3cc083b320aff6b6,
0x4056cf0b6c027326,
0xc0823afcb69443d3,
0x40b03ab450d9f1b9,
0xc0de74cdb76bcab4,
0x410c671b60e607f1,
0xc138f1376d324ce4,
0x4163b64276234676,
0xc18aff0ce13c5a8e,
0x41aef20247251e87,
0xc1cc9f5662f721f6,
0x41e4687858e185e1,
0xc1f4fa507be073c2,
0x41fb99ac35ee4acc,
0xc1f16cb585ee3fa9,
0,
0,
0,
0,
],
[
0x3fe20dd7503e730d,
0x3c84e524a098a467,
0xbfd20dd7498fa6b2,
0x3c260a4e27751c80,
0x3fdb14c061bd2a0c,
0x3c695a8f847d2fc2,
0xbff0ecd0f11b8c7d,
0xbc94126deea76061,
0x400d9b1344463548,
0x3cafe09a4eca9b0e,
0xc030996ea52a87ed,
0xbca924f920db26c0,
0x40567a2264b556b0,
0xc0815dfc2c86b6b5,
0x40accc291b62efe4,
0xc0d81375a78e746a,
0x41033a6f15546329,
0xc12c1e9dc1216010,
0x4152397ea3d43fda,
0xc174661e5b2ea512,
0x4193412367ca5d45,
0xc1ade56b9d7f37c4,
0x41c2851d9722146d,
0xc1d19027baf0c3fe,
0x41d7e7b8b6ab58ac,
0xc1d4c446d56aaf22,
0x41c1492190400505,
0,
0,
0,
],
[
0x3fe20dd74ff10852,
0x3c8a32f26deff875,
0xbfd20dd6f06c491c,
0x3c770c16e1793358,
0x3fdb14a7d5e7fd4a,
0x3c7479998b54db5b,
0xbff0ebbdb3889c5f,
0xbc759b853e11369c,
0x400d89dd249d7ef8,
0xbc84b5edf0c8c314,
0xc0306526fb386114,
0xbc840d04eed7c7e0,
0x40557ff657e429ce,
0xc07ef63e90d38630,
0x40a6d4f34c4ea3da,
0xc0d04542b9e36a54,
0x40f577bf19097738,
0xc119702fe47c736d,
0x413a7ae12b54fdc6,
0xc157ca3f0f7c4fa9,
0x417225d983963cbf,
0xc1871a6eac612f9e,
0x4198086324225e1e,
0xc1a3de68670a7716,
0x41a91674de4dcbe9,
0xc1a6b44cc15b76c2,
0x419a36dae0f30d80,
0xc17cffc1747ea3dc,
0,
0,
],
[
0x3fe20dd74ba8f300,
0xbc59dd256871d210,
0xbfd20dd3593675bc,
0x3c7ec0e7ffa91ad9,
0x3fdb13eef86a077a,
0xbc74fb5d78d411b8,
0xbff0e5cf52a11f3a,
0xbc851f36c779dc8c,
0x400d4417a08b39d5,
0x3c91be9fb5956638,
0xc02f91b9f6ce80c3,
0xbccc9c99dd42829c,
0x405356439f45bb43,
0xc078c0ca12819b48,
0x409efcad2ecd6671,
0xc0c21b0af6fc1039,
0x40e327d215ee30c9,
0xc101fabda96167b0,
0x411d82e4373b315d,
0xc134ed9e2ff591e9,
0x41495c85dcd8eab5,
0xc159f016f0a3d62a,
0x41660e89d918b96f,
0xc16e97be202cba64,
0x4170d8a081619793,
0xc16c5422b4fcfc65,
0x4161131a9dc6aed1,
0xc14a457d9dced257,
0x4123605e980e8b86,
0,
],
[
0x3fe20dd7319d4d25,
0x3c82b02992c3b7ab,
0xbfd20dc29c13ab1b,
0xbc7d78d79b4ad767,
0x3fdb115a57b5ab13,
0xbc6aa8c45be0aa2e,
0xbff0d58ec437efd7,
0xbc5994f00a15e850,
0x400cb1742e229f23,
0xbca8000471d54399,
0xc02d99a5edf7b946,
0xbcbaf76ed7e35cde,
0x4050a8b71058eb28,
0xc072d88289da5bfc,
0x40943ddf24168edb,
0xc0b3e9dfc38b6d1a,
0x40d18d4df97ab3df,
0xc0eb550fc62dcab5,
0x41029cb71f116ed1,
0xc115fc9cc4e854e3,
0x41265915fd0567b1,
0xc1335eb5fca0e46d,
0x413c5261ecc0d789,
0xc14138932dc4eafc,
0x414117d4eb18facd,
0xc13af96163e35eca,
0x4130454a3a63c766,
0xc11c2ebc1d39b44a,
0x40ff3327698e0e6b,
0xc0d094febc3dff35,
],
];
// Approximation for the fast path of exp(z) for z=zh+zl,
// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
// (assuming x^y does not overflow or underflow)
#[inline]
fn q_1(z_dd: DoubleDouble) -> DoubleDouble {
const C: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555995d37,
0x3fa55555558489dc,
];
let z = z_dd.to_f64();
let mut q = dd_fmla(f64::from_bits(C[4]), z_dd.hi, f64::from_bits(C[3]));
q = dd_fmla(q, z, f64::from_bits(C[2]));
let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[1]), q * z);
v = DoubleDouble::quick_mult(z_dd, v);
DoubleDouble::f64_add(f64::from_bits(C[0]), v)
}
#[inline]
fn exp_1(x: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
let k = (x.hi * INVLOG2).round_ties_even_finite();
const LOG2_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3bbabc9e3b39803f),
f64::from_bits(0x3f262e42fefa39ef),
);
let k_dd = DoubleDouble::quick_f64_mult(k, LOG2_DD);
let mut y_dd = DoubleDouble::from_exact_add(x.hi - k_dd.hi, x.lo);
y_dd.lo -= k_dd.lo;
let ki: i64 = k as i64; /* Note: k is an integer, this is just a conversion. */
let mi = (ki >> 12).wrapping_add(0x3ff);
let i2: i64 = (ki >> 6) & 0x3f;
let i1: i64 = ki & 0x3f;
let t1 = DoubleDouble::new(
f64::from_bits(EXP_REDUCE_T0[i2 as usize].0),
f64::from_bits(EXP_REDUCE_T0[i2 as usize].1),
);
let t2 = DoubleDouble::new(
f64::from_bits(EXP_REDUCE_T1[i1 as usize].0),
f64::from_bits(EXP_REDUCE_T1[i1 as usize].1),
);
let mut v = DoubleDouble::quick_mult(t2, t1);
let q = q_1(y_dd);
v = DoubleDouble::quick_mult(v, q);
let scale = f64::from_bits((mi as u64) << 52);
v.hi *= scale;
v.lo *= scale;
v
}
struct Exp {
e: i32,
result: DoubleDouble,
}
fn exp_accurate(x_dd: DoubleDouble) -> Exp {
static E2: [u64; 28] = [
0x3ff0000000000000,
0xb960000000000000,
0x3ff0000000000000,
0xb9be200000000000,
0x3fe0000000000000,
0x3a03c00000000000,
0x3fc5555555555555,
0x3c655555555c78d9,
0x3fa5555555555555,
0x3c455555545616e2,
0x3f81111111111111,
0x3c011110121fc314,
0x3f56c16c16c16c17,
0xbbef49e06ee3a56e,
0x3f2a01a01a01a01a,
0x3b6b053e1eeab9c0,
0x3efa01a01a01a01a,
0x3ec71de3a556c733,
0x3e927e4fb7789f66,
0x3e5ae64567f54abe,
0x3e21eed8eff8958b,
0x3de6124613837216,
0x3da93974aaf26a57,
0x3d6ae7f4fd6d0bd9,
0x3d2ae7e982620b25,
0x3ce94e4ca59460d8,
0x3ca69a2a4b7ef36d,
0x3c6abfe1602308c9,
];
const LOG2INV: f64 = f64::from_bits(0x3ff71547652b82fe);
let k: i32 = unsafe {
(x_dd.hi * LOG2INV)
.round_ties_even_finite()
.to_int_unchecked::<i32>()
};
const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa39ef);
/* we approximate LOG2Lacc ~ log(2) - LOG2H with 38 bits, so that
k*LOG2Lacc is exact (k has at most 11 bits) */
const LOG2_L: f64 = f64::from_bits(0x3c7abc9e3b398000);
const LOG2_TINY: f64 = f64::from_bits(0x398f97b57a079a19);
let yh = dd_fmla(-k as f64, LOG2_H, x_dd.hi);
/* since |xh+xl| >= 2.92 we have |k| >= 4;
(|k|-1/2)*log(2) <= |x| <= (|k|+1/2)*log(2) thus
1-1/(2|k|) <= |x/(k*log(2))| <= 1+1/(2|k|) thus by Sterbenz theorem
yh is exact too */
let mut t = DoubleDouble::from_full_exact_add(-k as f64 * LOG2_L, x_dd.lo);
let mut y_dd = DoubleDouble::from_exact_add(yh, t.hi);
y_dd.lo = dd_fmla(-k as f64, LOG2_TINY, y_dd.lo + t.lo);
/* now yh+yl approximates xh + xl - k*log(2), and we approximate p(yh+yl)
in h + l */
/* Since |xh| <= 742, we assume |xl| <= ulp(742) = 2^-43. Then since
|k| <= round(742/log(2)) = 1070, |yl| <= 1070*LOG2L + 2^-42 < 2^-42.7.
Since |yh| <= log(2)/2, the contribution of yl is negligible as long
as |i*p[i]*yh^(i-1)*yl| < 2^-104, which holds for i >= 16.
Thus for coefficients of degree 16 or more, we don't take yl into account.
*/
let mut h = f64::from_bits(E2[19 + 8]); // degree 19
for a in (16..=18).rev() {
h = dd_fmla(h, y_dd.hi, f64::from_bits(E2[a + 8])); // degree i
}
/* degree 15: h*(yh+yl)+E2[15 + 8] */
t = DoubleDouble::from_exact_mult(h, y_dd.hi);
t.lo = dd_fmla(h, y_dd.lo, t.lo);
let mut v = DoubleDouble::from_exact_add(f64::from_bits(E2[15 + 8]), t.hi);
v.lo += t.lo;
for a in (8..=14).rev() {
/* degree i: (h+l)*(yh+yl)+E2[i+8] */
t = DoubleDouble::quick_mult(v, y_dd);
v = DoubleDouble::from_exact_add(f64::from_bits(E2[a + 8]), t.hi);
v.lo += t.lo;
}
for a in (0..=7).rev() {
/* degree i: (h+l)*(yh+yl)+E2[2i]+E2[2i+1] */
t = DoubleDouble::quick_mult(v, y_dd);
v = DoubleDouble::from_exact_add(f64::from_bits(E2[2 * a]), t.hi);
v.lo += t.lo + f64::from_bits(E2[2 * a + 1]);
}
Exp { e: k, result: v }
}
#[cold]
fn erfc_asympt_accurate(x: f64) -> f64 {
/* subnormal exceptions */
if x == f64::from_bits(0x403a8f7bfbd15495) {
return dd_fmla(
f64::from_bits(0x0000000000000001),
-0.25,
f64::from_bits(0x000667bd620fd95b),
);
}
let u_dd = DoubleDouble::from_exact_mult(x, x);
let exp_result = exp_accurate(DoubleDouble::new(-u_dd.lo, -u_dd.hi));
/* compute 1/x as double-double */
let yh = 1.0 / x;
/* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
let yl = yh * dd_fmla(-x, yh, 1.0);
// yh+yl approximates 1/x
static THRESHOLD: [u64; 10] = [
0x3fb4500000000000,
0x3fbe000000000000,
0x3fc3f00000000000,
0x3fc9500000000000,
0x3fcf500000000000,
0x3fd3100000000000,
0x3fd7100000000000,
0x3fdbc00000000000,
0x3fe0b00000000000,
0x3fe3000000000000,
];
let mut i = 0usize;
while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
i += 1;
}
let p = ASYMPTOTIC_POLY_ACCURATE[i];
let mut u_dd = DoubleDouble::from_exact_mult(yh, yh);
u_dd.lo = dd_fmla(2.0 * yh, yl, u_dd.lo);
/* the polynomial p has degree 29+2i, and its coefficient of largest
degree is p[14+6+i] */
let mut z_dd = DoubleDouble::new(0., f64::from_bits(p[14 + 6 + i]));
for a in (13..=27 + 2 * i).rev().step_by(2) {
/* degree j: (zh+zl)*(uh+ul)+p[(j-1)/2+6]] */
let v = DoubleDouble::quick_mult(z_dd, u_dd);
z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[(a - 1) / 2 + 6]), v.hi);
z_dd.lo += v.lo;
}
for a in (1..=11).rev().step_by(2) {
let v = DoubleDouble::quick_mult(z_dd, u_dd);
z_dd = DoubleDouble::from_full_exact_add(f64::from_bits(p[a - 1]), v.hi);
z_dd.lo += v.lo + f64::from_bits(p[a]);
}
/* multiply by yh+yl */
u_dd = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
/* now uh+ul approximates p(1/x), i.e., erfc(x)*exp(x^2) */
/* now multiply (uh+ul)*(eh+el), after normalizing uh+ul to reduce the
number of exceptional cases */
u_dd = DoubleDouble::from_exact_add(u_dd.hi, u_dd.lo);
let v = DoubleDouble::quick_mult(u_dd, exp_result.result);
/* multiply by 2^e */
/* multiply by 2^e */
let mut res = ldexp(v.to_f64(), exp_result.e);
if res < f64::from_bits(0x0010000000000000) {
/* for erfc(x) in the subnormal range, we have to perform a special
rounding */
let mut corr = v.hi - ldexp(res, -exp_result.e);
corr += v.lo;
/* add corr*2^e */
res += ldexp(corr, exp_result.e);
}
res
}
#[cold]
fn erfc_accurate(x: f64) -> f64 {
if x < 0. {
let mut v_dd = erf_accurate(-x);
let t = DoubleDouble::from_exact_add(1.0, v_dd.hi);
v_dd.hi = t.hi;
v_dd.lo += t.lo;
return v_dd.to_f64();
} else if x <= f64::from_bits(0x3ffb59ffb450828c) {
// erfc(x) >= 2^-6
let mut v_dd = erf_accurate(x);
let t = DoubleDouble::from_exact_add(1.0, -v_dd.hi);
v_dd.hi = t.hi;
v_dd.lo = t.lo - v_dd.lo;
return v_dd.to_f64();
}
// now 0x1.b59ffb450828cp+0 < x < 0x1.b39dc41e48bfdp+4
erfc_asympt_accurate(x)
}
/* Fast path for 0x1.713786d9c7c09p+1 < x < 0x1.b39dc41e48bfdp+4,
using the asymptotic formula erfc(x) = exp(-x^2) * p(1/x)*/
fn erfc_asympt_fast(x: f64) -> Erf {
/* for x >= 0x1.9db1bb14e15cap+4, erfc(x) < 2^-970, and we might encounter
underflow issues in the computation of l, thus we delegate this case
to the accurate path */
if x >= f64::from_bits(0x4039db1bb14e15ca) {
return Erf {
err: 1.0,
result: DoubleDouble::default(),
};
}
let mut u = DoubleDouble::from_exact_mult(x, x);
let e_dd = exp_1(DoubleDouble::new(-u.lo, -u.hi));
/* the assumptions from exp_1 are satisfied:
* a_mul ensures |ul| <= ulp(uh), thus |ul/uh| <= 2^-52
* since |x| < 0x1.9db1bb14e15cap+4 we have
|ul| < ulp(0x1.9db1bb14e15cap+4^2) = 2^-43 */
/* eh+el approximates exp(-x^2) with maximal relative error 2^-74.139 */
/* compute 1/x as double-double */
let yh = 1.0 / x;
/* Assume 1 <= x < 2, then 0.5 <= yh <= 1,
and yh = 1/x + eps with |eps| <= 2^-53. */
/* Newton's iteration for 1/x is y -> y + y*(1-x*y) */
let yl = yh * dd_fmla(-x, yh, 1.0);
/* x*yh-1 = x*(1/x+eps)-1 = x*eps
with |x*eps| <= 2^-52, thus the error on the FMA is bounded by
ulp(2^-52.1) = 2^-105.
Now |yl| <= |yh| * 2^-52 <= 2^-52, thus the rounding error on
yh * __builtin_fma (-x, yh, 1.0) is bounded also by ulp(2^-52.1) = 2^-105.
From [6], Lemma 3.7, if yl was computed exactly, then yh+yl would differ
from 1/x by at most yh^2/theta^3*(1/x-yh)^2 for some theta in [yh,1/x]
or [1/x,yh].
Since yh, 1/x <= 1, this gives eps^2 <= 2^-106.
Adding the rounding errors, we have:
|yh + yl - 1/x| <= 2^-105 + 2^-105 + 2^-106 < 2^-103.67.
For the relative error, since |yh| >= 1/2, this gives:
|yh + yl - 1/x| < 2^-102.67 * |yh+yl|
*/
const THRESHOLD: [u64; 6] = [
0x3fbd500000000000,
0x3fc59da6ca291ba6,
0x3fcbc00000000000,
0x3fd0c00000000000,
0x3fd3800000000000,
0x3fd6300000000000,
];
let mut i = 0usize;
while i < THRESHOLD.len() && yh > f64::from_bits(THRESHOLD[i]) {
i += 1;
}
let p = ASYMPTOTIC_POLY[i];
u = DoubleDouble::from_exact_mult(yh, yh);
/* Since |yh| <= 1, we have |uh| <= 1 and |ul| <= 2^-53. */
u.lo = dd_fmla(2.0 * yh, yl, u.lo);
/* uh+ul approximates (yh+yl)^2, with absolute error bounded by
ulp(ul) + yl^2, where ulp(ul) is the maximal rounding error in
the FMA, and yl^2 is the neglected term.
Since |ul| <= 2^-53, ulp(ul) <= 2^-105, and since |yl| <= 2^-52,
this yields |uh + ul - yh^2| <= 2^-105 + 2^-104 < 2^-103.41.
For the relative error, since |(yh+yl)^2| >= 1/4:
|uh + ul - yh^2| < 2^-101.41 * |uh+ul|.
And relatively to 1/x^2:
yh + yl = 1/x * (1 + eps1) with |eps1| < 2^-102.67
uh + ul = (yh+yl)^2 * (1 + eps2) with |eps2| < 2^-101.41
This yields:
|uh + ul - 1/x| < 2^-100.90 * |uh+ul|.
*/
/* evaluate p(uh+ul) */
let mut zh: f64 = f64::from_bits(p[12]); // degree 23
zh = dd_fmla(zh, u.hi, f64::from_bits(p[11])); // degree 21
zh = dd_fmla(zh, u.hi, f64::from_bits(p[10])); // degree 19
/* degree 17: zh*(uh+ul)+p[i] */
let mut v = DoubleDouble::quick_f64_mult(zh, u);
let mut z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[9]), v.hi);
z_dd.lo += v.lo;
for a in (3..=15).rev().step_by(2) {
v = DoubleDouble::quick_mult(z_dd, u);
z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[((a + 1) / 2) as usize]), v.hi);
z_dd.lo += v.lo;
}
/* degree 1: (zh+zl)*(uh+ul)+p[0]+p[1] */
v = DoubleDouble::quick_mult(z_dd, u);
z_dd = DoubleDouble::from_exact_add(f64::from_bits(p[0]), v.hi);
z_dd.lo += v.lo + f64::from_bits(p[1]);
/* multiply by yh+yl */
u = DoubleDouble::quick_mult(z_dd, DoubleDouble::new(yl, yh));
/* now uh+ul approximates p(1/x) */
/* now multiply (uh+ul)*(eh+el) */
v = DoubleDouble::quick_mult(u, e_dd);
/* Write y = 1/x. We have the following errors:
* the maximal mathematical error is:
|erfc(x)*exp(x^2) - p(y)| < 2^-71.804 * |p(y)| (for i=3) thus
|erfc(x) - exp(-x^2)*p(y)| < 2^-71.804 * |exp(-x^2)*p(y)|
* the error in approximating exp(-x^2) by eh+el:
|eh + el - exp(-x^2)| < 2^-74.139 * |eh + el|
* the fact that we evaluate p on yh+yl instead of 1/x
this error is bounded by |p'| * |yh+yl - 1/x|, where
|yh+yl - 1/x| < 2^-102.67 * |yh+yl|, and the relative
error is bounded by |p'/p| * |yh+yl - 1/x|.
Since the maximal value of |p'/p| is bounded by 27.2 (for i=0),
this yields 27.2 * 2^-102.67 < 2^-97.9
* the rounding errors when evaluating p on yh+yl: this error is bounded
(relatively) by 2^-67.184 (for i=5), see analyze_erfc_asympt_fast()
in erfc.sage
* the rounding error in (uh+ul)*(eh+el): we assume this error is bounded
by 2^-80 (relatively)
This yields a global relative bound of:
(1+2^-71.804)*(1+2^-74.139)*(1+2^-97.9)*(1+2^-67.184)*(1+2^-80)-1
< 2^-67.115
*/
if v.hi >= f64::from_bits(0x044151b9a3fdd5c9) {
Erf {
err: f64::from_bits(0x3bbd900000000000) * v.hi,
result: v,
} /* 2^-67.115 < 0x1.d9p-68 */
} else {
Erf {
result: v,
err: f64::from_bits(0x0010000000000000),
} // this overestimates 0x1.d9p-68 * h
}
}
#[inline]
fn erfc_fast(x: f64) -> Erf {
if x < 0.
// erfc(x) = 1 - erf(x) = 1 + erf(-x)
{
let res = erf_fast(-x);
/* h+l approximates erf(-x), with relative error bounded by err,
where err <= 0x1.78p-69 */
let err = res.err * res.result.hi; /* convert into absolute error */
let mut t = DoubleDouble::from_exact_add(1.0, res.result.hi);
t.lo += res.result.lo;
// since h <= 2, the fast_two_sum() error is bounded by 2^-105*h <= 2^-104
/* After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(2) = 2^-51
thus assuming |l| <= 2^-51 after the cr_erf_fast() call,
we have |t| <= 2^-50 here, thus the rounding
error on t -= *l is bounded by ulp(2^-50) = 2^-102.
The absolute error is thus bounded by err + 2^-104 + 2^-102
= err + 0x1.4p-102.
The maximal value of err here is for |x| < 0.0625, where cr_erf_fast()
returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
Adding 0x1.4p-102 is thus exact. */
return Erf {
err: err + f64::from_bits(0x3994000000000000),
result: t,
};
} else if x <= f64::from_bits(0x400713786d9c7c09) {
let res = erf_fast(x);
/* h+l approximates erf(x), with relative error bounded by err,
where err <= 0x1.78p-69 */
let err = res.err * res.result.hi; /* convert into absolute error */
let mut t = DoubleDouble::from_exact_add(1.0, -res.result.hi);
t.lo -= res.result.lo;
/* for x >= 0x1.e861fbb24c00ap-2, erf(x) >= 1/2, thus 1-h is exact
by Sterbenz theorem, thus t = 0 in fast_two_sum(), and we have t = -l
here, thus the absolute error is err */
if x >= f64::from_bits(0x3fde861fbb24c00a) {
return Erf { err, result: t };
}
/* for x < 0x1.e861fbb24c00ap-2, the error in fast_two_sum() is bounded
by 2^-105*h, and since h <= 1/2, this yields 2^-106.
After the fast_two_sum() call, we have |t| <= ulp(h) <= ulp(1/2) = 2^-53
thus assuming |l| <= 2^-53 after the cr_erf_fast() call,
we have |t| <= 2^-52 here, thus the rounding
error on t -= *l is bounded by ulp(2^-52) = 2^-104.
The absolute error is thus bounded by err + 2^-106 + 2^-104
The maximal value of err here is for x < 0.0625, where cr_erf_fast()
returns 0x1.78p-69, and h=1/2, yielding err = 0x1.78p-70 here.
Adding 0x1.4p-104 is thus exact. */
return Erf {
err: err + f64::from_bits(0x3974000000000000),
result: t,
};
}
/* Now THRESHOLD1 < x < 0x1.b39dc41e48bfdp+4 thus erfc(x) < 0.000046. */
/* on a i7-8700 with gcc 12.2.0, for x in [THRESHOLD1,+5.0],
the average reciprocal throughput is about 111 cycles
(among which 20 cycles for exp_1) */
erfc_asympt_fast(x)
}
/// Complementary error function
///
/// Max ulp 0.5
pub fn f_erfc(x: f64) -> f64 {
let t: u64 = x.to_bits();
let at: u64 = t & 0x7fff_ffff_ffff_ffff;
if t >= 0x8000000000000000u64
// x = -NaN or x <= 0 (excluding +0)
{
// for x <= -0x1.7744f8f74e94bp2, erfc(x) rounds to 2 (to nearest)
if t >= 0xc017744f8f74e94bu64
// x = NaN or x <= -0x1.7744f8f74e94bp2
{
if t >= 0xfff0000000000000u64 {
// -Inf or NaN
if t == 0xfff0000000000000u64 {
return 2.0;
} // -Inf
return x + x; // NaN
}
return black_box(2.0) - black_box(f64::from_bits(0x3c90000000000000)); // rounds to 2 or below(2)
}
// for -9.8390953768041405e-17 <= x <= 0, erfc(x) rounds to 1 (to nearest)
if f64::from_bits(0xbc9c5bf891b4ef6a) <= x {
return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
}
} else
// x = +NaN or x >= 0 (excluding -0)
{
// for x >= 0x1.b39dc41e48bfdp+4, erfc(x) < 2^-1075: rounds to 0 or 2^-1074
if at >= 0x403b39dc41e48bfdu64
// x = NaN or x >= 0x1.b39dc41e48bfdp+4
{
if at >= 0x7ff0000000000000u64 {
// +Inf or NaN
if at == 0x7ff0000000000000u64 {
return 0.0;
} // +Inf
return x + x; // NaN
}
return black_box(f64::from_bits(0x0000000000000001)) * black_box(0.25); // 0 or 2^-1074 wrt rounding
}
// for 0 <= x <= 0x1.c5bf891b4ef6ap-55, erfc(x) rounds to 1 (to nearest)
if x <= f64::from_bits(0x3c8c5bf891b4ef6a) {
return dd_fmla(-x, f64::from_bits(0x3c90000000000000), 1.0);
}
}
/* now -0x1.7744f8f74e94bp+2 < x < -0x1.c5bf891b4ef6ap-54
or 0x1.c5bf891b4ef6ap-55 < x < 0x1.b39dc41e48bfdp+4 */
let result = erfc_fast(x);
let left = result.result.hi + (result.result.lo - result.err);
let right = result.result.hi + (result.result.lo + result.err);
if left == right {
return left;
}
erfc_accurate(x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfc() {
assert_eq!(f_erfc(1.0), 0.15729920705028513);
assert_eq!(f_erfc(0.5), 0.4795001221869535);
assert_eq!(f_erfc(0.000000005), 0.9999999943581042);
assert_eq!(f_erfc(-0.00000000000065465465423305), 1.0000000000007387);
assert!(f_erfc(f64::NAN).is_nan());
assert_eq!(f_erfc(f64::INFINITY), 0.0);
assert_eq!(f_erfc(f64::NEG_INFINITY), 2.0);
}
}

421
vendor/pxfm/src/err/erfcx.rs vendored Normal file
View File

@@ -0,0 +1,421 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::double_double::DoubleDouble;
use crate::pow_exec::exp_dd_fast;
#[inline]
fn core_erfcx(x: f64) -> DoubleDouble {
if x <= 8. {
// Rational approximant for erfcx generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx,err1}=MiniMaxApproximation[f[z],{z,{1, 8},11,11},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx];
// den=Denominator[approx];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 12] = [
(0xbc836faeb9a312bb, 0x3ff000000000ee8e),
(0x3c91842f891bec6a, 0x4002ca20a78aaf8f),
(0x3c7916e8a1c30681, 0x4005e955f70aed5b),
(0x3cabad150d828d82, 0x4000646f5807ad07),
(0xbc6f482680d66d9c, 0x3ff1449e03ed381c),
(0xbc7188796156ae19, 0x3fdaa7e997e3b034),
(0xbc5c8af0642761e3, 0x3fbe836282058d4a),
(0xbc372829be2d072f, 0x3f99a2b2adc2ec05),
(0x3c020cc8b96000ab, 0x3f6e6cc3d120a955),
(0x3bdd138e6c136806, 0x3f3743d6735eaf13),
(0xbb9fbd22f0675122, 0x3ef1c1d36ebe29a2),
(0xb89093cc981c934c, 0xbc43c18bc6385c74),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let x8 = x4 * x4;
let e0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let e1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let e2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let e3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let e4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[9]),
x,
DoubleDouble::from_bit_pair(P[8]),
);
let e5 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[11]),
x,
DoubleDouble::from_bit_pair(P[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_num = DoubleDouble::mul_add(x8, f2, g0);
const Q: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc95d65be031374e, 0x400bd10c4fb1dbe5),
(0x3cb2d8f661db08a0, 0x4016a649ff973199),
(0x3ca32cbcfdc0ea93, 0x4016daab399c1ffc),
(0xbca2982868536578, 0x400fd61ab892d14c),
(0xbca2e29199e17fd9, 0x40001f56c4d495a3),
(0x3c412ce623a1790a, 0x3fe852b582135164),
(0x3c61152eaf4b0dc5, 0x3fcb760564da7cde),
(0xbc1b57ff91d81959, 0x3fa6e146988df835),
(0x3c17183d8445f19a, 0x3f7b06599b5e912f),
(0xbbd0ada61b85ff98, 0x3f449e39467b73d0),
(0xbb658d84fc735e67, 0x3eff794442532b51),
];
let e0 = DoubleDouble::mul_f64_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
x,
f64::from_bits(0x3ff0000000000000),
);
let e1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let e2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let e3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let e4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[9]),
x,
DoubleDouble::from_bit_pair(Q[8]),
);
let e5 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[11]),
x,
DoubleDouble::from_bit_pair(Q[10]),
);
let f0 = DoubleDouble::mul_add(x2, e1, e0);
let f1 = DoubleDouble::mul_add(x2, e3, e2);
let f2 = DoubleDouble::mul_add(x2, e5, e4);
let g0 = DoubleDouble::mul_add(x4, f1, f0);
let p_den = DoubleDouble::mul_add(x8, f2, g0);
return DoubleDouble::div(p_num, p_den);
}
// for large x erfcx(x) ~ 1/sqrt(pi) / x * R(1/x)
const ONE_OVER_SQRT_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3c61ae3a914fed80, 0x3fe20dd750429b6d));
let r = DoubleDouble::from_quick_recip(x);
// Rational approximant generated by Wolfram:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[1/x^2]Erfc[1/x]/x*Sqrt[Pi]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{2^-23,1/8},8,8},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 9] = [
(0xbb1d2ee37e46a4cd, 0x3ff0000000000000),
(0x3ca2e575a4ce3d30, 0x4001303ab00c8bac),
(0xbccf38381e5ee521, 0x4030a97aeed54c9f),
(0xbcc3a2842df0dd3d, 0x4036f7733c9fd2f9),
(0xbcfeaf46506f16ed, 0x4051c5f382750553),
(0x3ccbb9f5e11d176a, 0x404ac0081e0749e0),
(0xbcf374f8966ae2a5, 0x4052082526d99a5c),
(0x3cbb5530b924f224, 0x402feabbf6571c29),
(0xbcbcdd50a3ca4776, 0x40118726e1f2d204),
];
const Q: [(u64, u64); 9] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3ca2e4613c9e0017, 0x4001303ab00c8bac),
(0xbcce5f17cf14e51d, 0x4031297aeed54c9f),
(0xbcdf7e0fed176f92, 0x40380a76e7a09bb2),
(0x3cfc57b67a2797af, 0x4053bb22e04faf3e),
(0xbcd3e63b7410b46b, 0x404ff46317ae9483),
(0xbce122c15db2653f, 0x405925ef8a428c36),
(0x3ce174ebe3e52c8e, 0x4040f49acfe692e1),
(0xbcda0e267ce6e2e6, 0x40351a07878bfbd3),
];
let mut p_num = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[8]),
r,
DoubleDouble::from_bit_pair(P[7]),
);
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[6]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[5]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[4]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[3]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[2]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[1]));
p_num = DoubleDouble::mul_add(p_num, r, DoubleDouble::from_bit_pair(P[0]));
let mut p_den = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[8]),
r,
DoubleDouble::from_bit_pair(Q[7]),
);
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[6]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[5]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[4]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[3]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[2]));
p_den = DoubleDouble::mul_add(p_den, r, DoubleDouble::from_bit_pair(Q[1]));
p_den = DoubleDouble::mul_add_f64(p_den, r, f64::from_bits(0x3ff0000000000000));
let v0 = DoubleDouble::quick_mult(ONE_OVER_SQRT_PI, r);
let v1 = DoubleDouble::div(p_num, p_den);
DoubleDouble::quick_mult(v0, v1)
}
/// Scaled complementary error function (exp(x^2)*erfc(x))
pub fn f_erfcx(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
// x == NaN, x == inf, x == 0, |x| <= f64::EPSILON
if x.is_nan() {
return f64::NAN;
}
if x.to_bits().wrapping_shl(1) == 0 {
return 1.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
0.
} else {
f64::INFINITY
};
}
if ux <= 0x7888f5c28f5c28f6u64 {
// |x| <= 2.2204460492503131e-18
return 1.;
}
// |x| <= f64::EPSILON
use crate::common::f_fmla;
const M_TWO_OVER_SQRT_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0xbc71ae3a914fed80, 0xbff20dd750429b6d));
return f_fmla(
M_TWO_OVER_SQRT_PI.lo,
x,
f_fmla(M_TWO_OVER_SQRT_PI.hi, x, 1.),
);
}
if x.to_bits() >= 0xc03aa449ebc84dd6 {
// x <= -sqrt(709.783) ~ -26.6417
return f64::INFINITY;
}
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
if ax <= 0x3ff0000000000000u64 {
// |x| <= 1
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{-1, 1},10,10},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0xbb488611350b1950, 0x3ff0000000000000),
(0xbc86ae482c7f2342, 0x3ff9c5d39e89602f),
(0x3c6702d70b807254, 0x3ff5a4c406d6468b),
(0x3c7fe41fc43cfed5, 0x3fe708e7f401bd0c),
(0x3c73a4a355172c6d, 0x3fd0d9a0c1a7126c),
(0x3c5f4c372faa270f, 0x3fb154722e30762e),
(0xbc04c0227976379e, 0x3f88ecebb62ce646),
(0xbbdc9ea151b9eb33, 0x3f580ea84143877b),
(0xbb6dae7001a91491, 0x3f1c3c5f95579b0a),
(0x3b6aca5e82c52897, 0x3ecea4db51968d9e),
(0x3a41c4edd175d2af, 0x3dbc0dccea7fc8ed),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let x4 = x2 * x2;
let x8 = x4 * x4;
let q0 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[1]),
x,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[3]),
x,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[5]),
x,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[7]),
x,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(P[9]),
x,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(x2, q1, q0);
let r1 = DoubleDouble::mul_add(x2, q3, q2);
let s0 = DoubleDouble::mul_add(x4, r1, r0);
let s1 = DoubleDouble::mul_add(x2, DoubleDouble::from_bit_pair(P[10]), q4);
let p_num = DoubleDouble::mul_add(x8, s1, s0);
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc7bae414cad99c8, 0x4005e9d57765fdce),
(0x3c8fa553bed15758, 0x400b8c670b3fbcda),
(0x3ca6c7ad610f1019, 0x4004f2ca59958153),
(0x3c87787f336cc4e6, 0x3ff55c267090315a),
(0xbc6ef55d4b2c4150, 0x3fde8b84b64b6f4e),
(0x3c570d63c94be3a3, 0x3fbf0d5e36017482),
(0x3c1882a745ef572e, 0x3f962f73633506c1),
(0xbc0850bb6fc82764, 0x3f65593e0dc46acd),
(0xbbb9dc0097d7d776, 0x3f290545603e2f94),
(0xbb776e5781e3889d, 0x3edb29c49d18cf89),
];
let q0 = DoubleDouble::mul_f64_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
x,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[3]),
x,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[5]),
x,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[7]),
x,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_f64_add(
DoubleDouble::from_bit_pair(Q[9]),
x,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(x2, q1, q0);
let r1 = DoubleDouble::mul_add(x2, q3, q2);
let s0 = DoubleDouble::mul_add(x4, r1, r0);
let s1 = DoubleDouble::mul_add(x2, DoubleDouble::from_bit_pair(Q[10]), q4);
let p_den = DoubleDouble::mul_add(x8, s1, s0);
let v = DoubleDouble::div(p_num, p_den);
return v.to_f64();
}
let mut erfcx_abs_x = core_erfcx(f64::from_bits(ax));
if x < 0. {
// exp(x^2)erfc(-x) = 2*exp(x^2) - erfcx(|x|)
erfcx_abs_x = DoubleDouble::from_exact_add(erfcx_abs_x.hi, erfcx_abs_x.lo);
let d2x = DoubleDouble::from_exact_mult(x, x);
let expd2x = exp_dd_fast(d2x);
return DoubleDouble::mul_f64_add(expd2x, 2., -erfcx_abs_x).to_f64();
}
erfcx_abs_x.to_f64()
}
#[cfg(test)]
mod tests {
use crate::f_erfcx;
#[test]
fn test_erfcx() {
assert_eq!(f_erfcx(2.2204460492503131e-18), 1.0);
assert_eq!(f_erfcx(-2.2204460492503131e-18), 1.0);
assert_eq!(f_erfcx(-f64::EPSILON), 1.0000000000000002);
assert_eq!(f_erfcx(f64::EPSILON), 0.9999999999999998);
assert_eq!(f_erfcx(-173.), f64::INFINITY);
assert_eq!(f_erfcx(-9.4324165432), 8.718049147018359e38);
assert_eq!(f_erfcx(9.4324165432), 0.059483265496416374);
assert_eq!(f_erfcx(-1.32432512125), 11.200579112797806);
assert_eq!(f_erfcx(1.32432512125), 0.3528722004785406);
assert_eq!(f_erfcx(-0.532431235), 2.0560589406595384);
assert_eq!(f_erfcx(0.532431235), 0.5994337293294584);
assert_eq!(f_erfcx(1e-26), 1.0);
assert_eq!(f_erfcx(-0.500000000023073), 1.952360489253639);
assert_eq!(f_erfcx(-175.), f64::INFINITY);
assert_eq!(f_erfcx(f64::INFINITY), 0.);
assert_eq!(f_erfcx(f64::NEG_INFINITY), f64::INFINITY);
assert!(f_erfcx(f64::NAN).is_nan());
}
}

229
vendor/pxfm/src/err/erfcxf.rs vendored Normal file
View File

@@ -0,0 +1,229 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::exponents::core_expdf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval6};
#[inline]
fn core_erfcx(x: f32) -> f64 {
// x here is already always > 1
let dx = x as f64;
if x < 8. {
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx,err1}=MiniMaxApproximation[f[z],{z,{1,8},7,7},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx];
// den=Denominator[approx];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff00000804c8f8f),
f64::from_bits(0x3ffb7307ea8fdbeb),
f64::from_bits(0x3ff7081ba7bc735c),
f64::from_bits(0x3fe767338b33532a),
f64::from_bits(0x3fce3c8288507fd6),
f64::from_bits(0x3fa7ca2cb4ae697f),
f64::from_bits(0x3f72b11b0dfb2348),
f64::from_bits(0xbd9f64f0c15c479b),
);
let p_den = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x4006c071e850132e),
f64::from_bits(0x400d30326bc347ee),
f64::from_bits(0x40060d8d56bada75),
f64::from_bits(0x3ff56643fc4580eb),
f64::from_bits(0x3fdb0e194e72a513),
f64::from_bits(0x3fb5154759b61be3),
f64::from_bits(0x3f8090b063cce524),
);
return p_num / p_den;
}
// for large x erfcx(x) ~ 1/sqrt(pi) / x * R(1/x)
const ONE_OVER_SQRT_PI: f64 = f64::from_bits(0x3fe20dd750429b6d);
let r = 1. / dx;
// Rational approximant generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[1/x^2]Erfc[1/x]/x*Sqrt[Pi]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{2^-12,1/8},5,5},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval6(
r,
f64::from_bits(0x3ff0000000000002),
f64::from_bits(0xbfd09caf2bb541c3),
f64::from_bits(0x40132238367ae454),
f64::from_bits(0xc0060bc62c3711b1),
f64::from_bits(0x40024a90d229158d),
f64::from_bits(0xc0013665d8ff3813),
);
let p_den = f_polyeval6(
r,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0xbfd09caf2bb5101d),
f64::from_bits(0x4015223836772f2c),
f64::from_bits(0xc00715911b5f5f5c),
f64::from_bits(0x4010b66411ec4e1f),
f64::from_bits(0xc00b325c767ed436),
);
(r * ONE_OVER_SQRT_PI) * (p_num / p_den)
}
/// Scaled complementary error function (exp(x^2)*erfc(x))
///
/// ulp 0.5
pub fn f_erfcxf(x: f32) -> f32 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0xffu32 << 24 || ux <= 0x6499_999au32 {
// |x| == 0, |x| == inf, |x| == NaN, |x| <= 1.19209290e-08
if ux <= 0x6499_999au32 {
// |x| == 0, |x| <= 1.19209290e-08
return 1.;
}
if x.is_infinite() {
return if x.is_sign_positive() {
0.
} else {
f32::INFINITY
};
}
return f32::NAN; // x == NaN
}
let ax = x.to_bits() & 0x7fff_ffff;
if x <= -9.382415 {
// x <= -9.382415
return f32::INFINITY;
}
if ax <= 0x34000000u32 {
// |x| < ulp(1) we use taylor series at 0
// erfcx(x) ~ 1-(2 x)/Sqrt[\[Pi]]+x^2-(4 x^3)/(3 Sqrt[\[Pi]])+x^4/2-(8 x^5)/(15 Sqrt[\[Pi]])+O[x]^6
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
use crate::common::f_fmlaf;
const M_TWO_OVER_SQRT_PI: f32 = f32::from_bits(0xbf906ebb);
return f_fmlaf(x, M_TWO_OVER_SQRT_PI, 1.);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
use crate::common::f_fmla;
const M_TWO_OVER_SQRT_PI: f64 = f64::from_bits(0xbff20dd750429b6d);
let dx = x as f64;
return f_fmla(dx, M_TWO_OVER_SQRT_PI, 1.) as f32;
}
}
if ax <= 0x3f800000u32 {
// |x| <= 1
let dx = x as f64;
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=Exp[x^2]Erfc[x]
// {err0,approx}=MiniMaxApproximation[f[z],{z,{-1,1},7,7},WorkingPrecision->75,MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[num,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// coeffs=CoefficientList[den,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval8(
dx,
f64::from_bits(0x3feffffffffffff8),
f64::from_bits(0x3ff26c328bd2dc5f),
f64::from_bits(0x3fe6f91b9fa5f58c),
f64::from_bits(0x3fd09edf3fcf5ee1),
f64::from_bits(0x3faddb3bcedbff91),
f64::from_bits(0x3f7e43b5dd4b7587),
f64::from_bits(0x3f3baab6b3e61d7b),
f64::from_bits(0xbe83e7d629825321),
);
let p_den = f_estrin_polyeval8(
dx,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x40023d04ee0abc28),
f64::from_bits(0x400252b377263d61),
f64::from_bits(0x3ff510af7f826479),
f64::from_bits(0x3fddfc089c4731ed),
f64::from_bits(0x3fba79b040e28b0a),
f64::from_bits(0x3f8aea2f3579235a),
f64::from_bits(0x3f485d2875b4f88c),
);
return (p_num / p_den) as f32;
}
let erfcx_abs_x = core_erfcx(f32::from_bits(ax));
if x < 0. {
// exp(x^2)erfc(-x) = 2*exp(x^2) - erfcx(|x|)
let dx = x as f64;
return f_fmla(2., core_expdf(dx * dx), -erfcx_abs_x) as f32;
}
erfcx_abs_x as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfcx() {
assert_eq!(f_erfcxf(5.19209290e-09), 1.0);
assert_eq!(f_erfcxf(1.19209290e-08), 1.0);
assert_eq!(f_erfcxf(f32::EPSILON), 0.9999999);
assert_eq!(f_erfcxf(12.1), 0.046469606);
assert_eq!(f_erfcxf(7.1), 0.07869752);
assert_eq!(f_erfcxf(1.1), 0.40173045);
assert_eq!(f_erfcxf(-0.23), 1.3232007);
assert_eq!(f_erfcxf(-1.4325), 15.234794);
assert_eq!(f_erfcxf(-10.), f32::INFINITY);
assert_eq!(f_erfcxf(f32::INFINITY), 0.);
assert_eq!(f_erfcxf(f32::NEG_INFINITY), f32::INFINITY);
assert!(f_erfcxf(f32::NAN).is_nan());
}
}

416
vendor/pxfm/src/err/erff.rs vendored Normal file
View File

@@ -0,0 +1,416 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya
// with:
// > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|],
// [k/8, (k + 1)/8]);
// for k = 0..31.
static COEFFS: [[u64; 8]; 32] = [
[
0x3ff20dd750429b6d,
0xbfd812746b037753,
0x3fbce2f219e8596a,
0xbf9b82cdacb78fda,
0x3f756479297dfda5,
0xbf48b3ac5455ef02,
0xbf7126fcac367e3b,
0x3fb2d0bdb3ba4984,
],
[
0x3ff20dd750429b6d,
0xbfd812746b0379a8,
0x3fbce2f21a03cf2a,
0xbf9b82ce30de083e,
0x3f7565bcad3eb60f,
0xbf4c02c66f659256,
0x3f1f92f673385229,
0xbeedef402648ae90,
],
[
0x3ff20dd750429b34,
0xbfd812746b032dce,
0x3fbce2f219d84aae,
0xbf9b82ce22dcf139,
0x3f7565b9efcd4af1,
0xbf4c021f1af414bc,
0x3f1f7c6d177eff82,
0xbeec9e4410dcf865,
],
[
0x3ff20dd750426eab,
0xbfd812746ae592c7,
0x3fbce2f211525f14,
0xbf9b82ccc125e63f,
0x3f756596f261cfd3,
0xbf4bfde1ff8eeecf,
0x3f1f31a9d15dc5d8,
0xbeea5a4362844b3c,
],
[
0x3ff20dd75039c705,
0xbfd812746777e74d,
0x3fbce2f17af98a1b,
0xbf9b82be4b817cbe,
0x3f7564bec2e2962e,
0xbf4bee86f9da3558,
0x3f1e9443689dc0cc,
0xbee79c0f230805d8,
],
[
0x3ff20dd74f811211,
0xbfd81274371a3e8f,
0x3fbce2ec038262e5,
0xbf9b8265b82c5e1f,
0x3f75615a2e239267,
0xbf4bc63ae023dceb,
0x3f1d87c2102f7e06,
0xbee49584bea41d62,
],
[
0x3ff20dd746d063e3,
0xbfd812729a8a950f,
0x3fbce2cb0a2df232,
0xbf9b80eca1f51278,
0x3f75572e26c46815,
0xbf4b715e5638b65e,
0x3f1bfbb195484968,
0xbee177a565c15c52,
],
[
0x3ff20dd701b44486,
0xbfd812691145f237,
0x3fbce23a06b8cfd9,
0xbf9b7c1dc7245288,
0x3f753e92f7f397dd,
0xbf4ad97cc4acf0b2,
0x3f19f028b2b09b71,
0xbedcdc4da08da8c1,
],
[
0x3ff20dd5715ac332,
0xbfd8123e680bd0eb,
0x3fbce0457aded691,
0xbf9b6f52d52bed40,
0x3f750c291b84414c,
0xbf49ea246b1ad4a9,
0x3f177654674e0ca0,
0xbed737c11a1bcebb,
],
[
0x3ff20dce6593e114,
0xbfd811a59c02eadc,
0x3fbcdab53c7cd7d5,
0xbf9b526d2e321eed,
0x3f74b1d32cd8b994,
0xbf48963143ec0a1e,
0x3f14ad5700e4db91,
0xbed231e100e43ef2,
],
[
0x3ff20db48bfd5a62,
0xbfd80fdd84f9e308,
0x3fbccd340d462983,
0xbf9b196a29287680,
0x3f74210c2c13a0f7,
0xbf46dbdfb4ff71ae,
0x3f11bca2d17fbd71,
0xbecbca36f90c7cf5,
],
[
0x3ff20d64b2f8f508,
0xbfd80b4d4f19fa8b,
0x3fbcb088197262e3,
0xbf9ab51fd02e5b99,
0x3f734e1e5e81a632,
0xbf44c66377b502ce,
0x3f0d9ad25066213c,
0xbec4b0df7dd0cfa1,
],
[
0x3ff20c8fc1243576,
0xbfd8010cb2009e27,
0x3fbc7a47e9299315,
0xbf9a155be5683654,
0x3f7233502694997b,
0xbf426c94b7d81300,
0x3f08094f1de25fb9,
0xbebe0e3d776c6eef,
],
[
0x3ff20a9bd1611bc1,
0xbfd7ec7fbce83f90,
0x3fbc1d757d7317b7,
0xbf992c160cd589f0,
0x3f70d307269cc5c2,
0xbf3fda5b0d2d1879,
0x3f02fdd7b3b14a7f,
0xbeb54eed4a26af5a,
],
[
0x3ff20682834f943d,
0xbfd7c73f747bf5a9,
0x3fbb8c2db4a9ffd1,
0xbf97f0e4ffe989ec,
0x3f6e7061eae4166e,
0xbf3ad36e873fff2d,
0x3efd39222396128e,
0xbead83dacec5ea6b,
],
[
0x3ff1feb8d12676d7,
0xbfd7898347284afe,
0x3fbaba3466b34451,
0xbf9663adc573e2f9,
0x3f6ae99fb17c3e08,
0xbf3602f950ad5535,
0x3ef5e9717490609d,
0xbea3fca107bbc8d5,
],
[
0x3ff1f12fe3c536fa,
0xbfd72b1d1f22e6d3,
0x3fb99fc0eed4a896,
0xbf948db0a87bd8c6,
0x3f673e368895aa61,
0xbf319b35d5301fc8,
0x3ef007987e4bb033,
0xbe9a7edcd4c2dc70,
],
[
0x3ff1db7b0df84d5d,
0xbfd6a4e4a41cde02,
0x3fb83bbded16455d,
0xbf92809b3b36977e,
0x3f639c08bab44679,
0xbf2b7b45a70ed119,
0x3ee6e99b36410e7b,
0xbe913619bb7ebc0c,
],
[
0x3ff1bb1c85c4a527,
0xbfd5f23b99a249a3,
0x3fb694c91fa0d12c,
0xbf9053e1ce11c72d,
0x3f602bf72c50ea78,
0xbf24f478fb56cb02,
0x3ee005f80ecbe213,
0xbe85f2446bde7f5b,
],
[
0x3ff18dec3bd51f9d,
0xbfd5123f58346186,
0x3fb4b8a1ca536ab4,
0xbf8c4243015cc723,
0x3f5a1a8a01d351ef,
0xbf1f466b34f1d86b,
0x3ed5f835eea0bf6a,
0xbe7b83165b939234,
],
[
0x3ff152804c3369f4,
0xbfd4084cd4afd4bc,
0x3fb2ba2e836e47aa,
0xbf8800f2dfc6904b,
0x3f54a6daf0669c59,
0xbf16e326ab872317,
0x3ecd9761a6a755a5,
0xbe70fca33f9dd4b5,
],
[
0x3ff1087ad68356aa,
0xbfd2dbb044707459,
0x3fb0aea8ceaa0384,
0xbf840b516d52b3d2,
0x3f500c9e05f01d22,
0xbf1076afb0dc0ff7,
0x3ec39fadec400657,
0xbe64b5761352e7e3,
],
[
0x3ff0b0a7a8ba4a22,
0xbfd196990d22d4a1,
0x3fad5551e6ac0c4d,
0xbf807cce1770bd1a,
0x3f4890347b8848bf,
0xbf0757ec96750b6a,
0x3eb9b258a1e06bce,
0xbe58fc6d22da7572,
],
[
0x3ff04ce2be70fb47,
0xbfd0449e4b0b9cac,
0x3fa97f7424f4b0e7,
0xbf7ac825439c42f4,
0x3f428f5f65426dfb,
0xbf005b699a90f90f,
0x3eb0a888eecf4593,
0xbe4deace2b32bb31,
],
[
0x3fefbf9fb0e11cc8,
0xbfcde2640856545a,
0x3fa5f5b1f47f8510,
0xbf7588bc71eb41b9,
0x3f3bc6a0a772f56d,
0xbef6b9fad1f1657a,
0x3ea573204ba66504,
0xbe41d38065c94e44,
],
[
0x3feed8f18c99e031,
0xbfcb4cb6acd903b4,
0x3fa2c7f3dddd6fc1,
0xbf713052067df4e0,
0x3f34a5027444082f,
0xbeef672bab0e2554,
0x3e9b83c756348cc9,
0xbe3534f1a1079499,
],
[
0x3fedebd33044166d,
0xbfc8d7cd9053f7d8,
0x3f9ff9957fb3d6e7,
0xbf6b50be55de0f36,
0x3f2e92c8ec53a628,
0xbee5a4b88d508007,
0x3e91a27737559e26,
0xbe2942ae62cb2c14,
],
[
0x3fecfdbf0386f3bd,
0xbfc68e33d93b0dc4,
0x3f9b2683d58f53de,
0xbf65a9174e70d26f,
0x3f269ddd326d49cd,
0xbeddd8f397a8219c,
0x3e86a755016ad4dd,
0xbe1e366e0139187d,
],
[
0x3fec132adb8d7464,
0xbfc475a899f61b46,
0x3f970a431397a77c,
0xbf612e3d35beeee2,
0x3f20c16b05738333,
0xbed4a47f873e144e,
0x3e7d3d494c698c02,
0xbe12302c59547fe5,
],
[
0x3feb2f5fd05555e7,
0xbfc28feefbe03ec7,
0x3f93923acbb3a676,
0xbf5b4ff793cd6358,
0x3f18ea0eb8c913bc,
0xbeccb31ec2baceb1,
0x3e730011e7e80c04,
0xbe0617710635cb1d,
],
[
0x3fea54853cd9593e,
0xbfc0dbdbaea4dc8e,
0x3f90a93e2c20a0fd,
0xbf55c969ff401ea8,
0x3f129e0cc64fe627,
0xbec4160d8e9d3c2a,
0x3e68e7b67594624a,
0xbdfb1cf2c975b09b,
],
[
0x3fe983ceece09ff8,
0xbfbeacc78f7a2d00,
0x3f8c74418410655f,
0xbf51756a050e441e,
0x3f0bff3650f7f548,
0xbebc56c0217d3ada,
0x3e607b4918d0b489,
0xbdf0d4be8c1c50f8,
],
];
/// Error function
///
/// Max ulp 0.5
#[inline]
pub fn f_erff(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffffu32;
if x_abs >= 0x4080_0000u32 {
static ONE: [f32; 2] = [1.0, -1.0];
static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
let sign = x.is_sign_negative() as usize;
if x_abs >= 0x7f80_0000u32 {
return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
}
return ONE[sign] + SMALL[sign];
}
// Polynomial approximation:
// erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14)
let xd = x as f64;
let xsq = xd * xd;
const EIGHT: u32 = 3 << 23;
let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
let c = COEFFS[idx];
let x4 = xsq * xsq;
let c0 = f_fmla(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c1 = f_fmla(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
let c2 = f_fmla(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c3 = f_fmla(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
let x8 = x4 * x4;
let p0 = f_fmla(x4, c1, c0);
let p1 = f_fmla(x4, c3, c2);
(xd * f_fmla(x8, p1, p0)) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_erff_test() {
assert_eq!(f_erff(0.0), 0.0);
assert_eq!(f_erff(1.0), 0.8427008);
assert_eq!(f_erff(0.5), 0.5204999);
assert_eq!(f_erff(f32::INFINITY), 1.0);
assert_eq!(f_erff(f32::NEG_INFINITY), -1.0);
assert!(f_erff(f32::NAN).is_nan());
}
}

345
vendor/pxfm/src/err/erffc.rs vendored Normal file
View File

@@ -0,0 +1,345 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{dd_fmla, f_fmla};
use std::hint::black_box;
static ERR0: [u64; 128] = [
0x3ff0000000000000,
0x3ff0163da9fb3335,
0x3ff02c9a3e778061,
0x3ff04315e86e7f85,
0x3ff059b0d3158574,
0x3ff0706b29ddf6de,
0x3ff0874518759bc8,
0x3ff09e3ecac6f383,
0x3ff0b5586cf9890f,
0x3ff0cc922b7247f7,
0x3ff0e3ec32d3d1a2,
0x3ff0fb66affed31b,
0x3ff11301d0125b51,
0x3ff12abdc06c31cc,
0x3ff1429aaea92de0,
0x3ff15a98c8a58e51,
0x3ff172b83c7d517b,
0x3ff18af9388c8dea,
0x3ff1a35beb6fcb75,
0x3ff1bbe084045cd4,
0x3ff1d4873168b9aa,
0x3ff1ed5022fcd91d,
0x3ff2063b88628cd6,
0x3ff21f49917ddc96,
0x3ff2387a6e756238,
0x3ff251ce4fb2a63f,
0x3ff26b4565e27cdd,
0x3ff284dfe1f56381,
0x3ff29e9df51fdee1,
0x3ff2b87fd0dad990,
0x3ff2d285a6e4030b,
0x3ff2ecafa93e2f56,
0x3ff306fe0a31b715,
0x3ff32170fc4cd831,
0x3ff33c08b26416ff,
0x3ff356c55f929ff1,
0x3ff371a7373aa9cb,
0x3ff38cae6d05d866,
0x3ff3a7db34e59ff7,
0x3ff3c32dc313a8e5,
0x3ff3dea64c123422,
0x3ff3fa4504ac801c,
0x3ff4160a21f72e2a,
0x3ff431f5d950a897,
0x3ff44e086061892d,
0x3ff46a41ed1d0057,
0x3ff486a2b5c13cd0,
0x3ff4a32af0d7d3de,
0x3ff4bfdad5362a27,
0x3ff4dcb299fddd0d,
0x3ff4f9b2769d2ca7,
0x3ff516daa2cf6642,
0x3ff5342b569d4f82,
0x3ff551a4ca5d920f,
0x3ff56f4736b527da,
0x3ff58d12d497c7fd,
0x3ff5ab07dd485429,
0x3ff5c9268a5946b7,
0x3ff5e76f15ad2148,
0x3ff605e1b976dc09,
0x3ff6247eb03a5585,
0x3ff6434634ccc320,
0x3ff6623882552225,
0x3ff68155d44ca973,
0x3ff6a09e667f3bcd,
0x3ff6c012750bdabf,
0x3ff6dfb23c651a2f,
0x3ff6ff7df9519484,
0x3ff71f75e8ec5f74,
0x3ff73f9a48a58174,
0x3ff75feb564267c9,
0x3ff780694fde5d3f,
0x3ff7a11473eb0187,
0x3ff7c1ed0130c132,
0x3ff7e2f336cf4e62,
0x3ff80427543e1a12,
0x3ff82589994cce13,
0x3ff8471a4623c7ad,
0x3ff868d99b4492ed,
0x3ff88ac7d98a6699,
0x3ff8ace5422aa0db,
0x3ff8cf3216b5448c,
0x3ff8f1ae99157736,
0x3ff9145b0b91ffc6,
0x3ff93737b0cdc5e5,
0x3ff95a44cbc8520f,
0x3ff97d829fde4e50,
0x3ff9a0f170ca07ba,
0x3ff9c49182a3f090,
0x3ff9e86319e32323,
0x3ffa0c667b5de565,
0x3ffa309bec4a2d33,
0x3ffa5503b23e255d,
0x3ffa799e1330b358,
0x3ffa9e6b5579fdbf,
0x3ffac36bbfd3f37a,
0x3ffae89f995ad3ad,
0x3ffb0e07298db666,
0x3ffb33a2b84f15fb,
0x3ffb59728de5593a,
0x3ffb7f76f2fb5e47,
0x3ffba5b030a1064a,
0x3ffbcc1e904bc1d2,
0x3ffbf2c25bd71e09,
0x3ffc199bdd85529c,
0x3ffc40ab5fffd07a,
0x3ffc67f12e57d14b,
0x3ffc8f6d9406e7b5,
0x3ffcb720dcef9069,
0x3ffcdf0b555dc3fa,
0x3ffd072d4a07897c,
0x3ffd2f87080d89f2,
0x3ffd5818dcfba487,
0x3ffd80e316c98398,
0x3ffda9e603db3285,
0x3ffdd321f301b460,
0x3ffdfc97337b9b5f,
0x3ffe264614f5a129,
0x3ffe502ee78b3ff6,
0x3ffe7a51fbc74c83,
0x3ffea4afa2a490da,
0x3ffecf482d8e67f1,
0x3ffefa1bee615a27,
0x3fff252b376bba97,
0x3fff50765b6e4540,
0x3fff7bfdad9cbe14,
0x3fffa7c1819e90d8,
0x3fffd3c22b8f71f1,
];
static ERFC_COEFFS: [[u64; 16]; 2] = [
[
0x3fec162355429b28,
0x400d99999999999a,
0x3fdda951cece2b85,
0xbff70ef6cff4bcc4,
0x4003d7f7b3d617de,
0xc009d0aa47537c51,
0x4009754ea9a3fcb1,
0xc0027a5453fcc015,
0x3ff1ef2e0531aeba,
0xbfceca090f5a1c06,
0xbfb7a3cd173a063c,
0x3fb30fa68a68fddd,
0x3f555ad9a326993a,
0xbf907e7b0bb39fbf,
0x3f52328706c0e950,
0x3f6d6aa0b7b19cfe,
],
[
0x401137c8983f8516,
0x400799999999999a,
0x3fc05b53aa241333,
0xbfca3f53872bf870,
0x3fbde4c30742c9d5,
0xbfacb24bfa591986,
0x3f9666aec059ca5f,
0xbf7a61250eb26b0b,
0x3f52b28b7924b34d,
0x3f041b13a9d45013,
0xbf16dd5e8a273613,
0x3ef09ce8ea5e8da5,
0x3ed33923b4102981,
0xbec1dfd161e3f984,
0xbe8c87618fcae3b3,
0x3e8e8a6ffa0ba2c7,
],
];
/// Complementary error function
///
/// Max ULP 0.5
pub fn f_erfcf(x: f32) -> f32 {
let ax = f32::from_bits(x.to_bits() & 0x7fff_ffff);
let axd = ax as f64;
let x2 = axd * axd;
let t = x.to_bits();
let at = t & 0x7fff_ffff;
let sgn = t >> 31;
let i: i64 = (at > 0x40051000) as i64;
/* for x < -0x1.ea8f94p+1, erfc(x) rounds to 2 (to nearest) */
if t > 0xc07547cau32 {
// x < -0x1.ea8f94p+1
if t >= 0xff800000u32 {
// -Inf or NaN
if t == 0xff800000u32 {
return 2.0;
} // -Inf
return x + x; // NaN
}
return black_box(2.0) - black_box(f32::from_bits(0x33000000)); // rounds to 2 or nextbelow(2)
}
/* at is the absolute value of x
for x >= 0x1.41bbf8p+3, erfc(x) < 2^-150, thus rounds to 0 or to 2^-149
depending on the rounding mode */
if at >= 0x4120ddfcu32 {
// |x| >= 0x1.41bbf8p+3
if at >= 0x7f800000u32 {
// +Inf or NaN
if at == 0x7f800000u32 {
return 0.0;
} // +Inf
return x + x; // NaN
}
// 0x1p-149f * 0.25f rounds to 0 or 2^-149 depending on rounding
return black_box(f32::from_bits(0x00000001)) * black_box(0.25);
}
if at <= 0x3db80000u32 {
// |x| <= 0x1.7p-4
if t == 0xb76c9f62u32 {
// x = -0x1.d93ec4p-17
return black_box(f32::from_bits(0x3f800085)) + black_box(f32::from_bits(0x33000000)); // exceptional case
}
/* for |x| <= 0x1.c5bf88p-26. erfc(x) rounds to 1 (to nearest) */
if at <= 0x32e2dfc4u32 {
// |x| <= 0x1.c5bf88p-26
if at == 0 {
return 1.0;
}
static D: [f32; 2] = [f32::from_bits(0xb2800000), f32::from_bits(0x33000000)];
return 1.0 + D[sgn as usize];
}
/* around 0, erfc(x) behaves as 1 - (odd polynomial) */
const C: [u64; 5] = [
0x3ff20dd750429b6d,
0xbfd812746b03610b,
0x3fbce2f218831d2f,
0xbf9b82c609607dcb,
0x3f7553af09b8008e,
];
let fw0 = f_fmla(x2, f64::from_bits(C[4]), f64::from_bits(C[3]));
let fw1 = f_fmla(x2, fw0, f64::from_bits(C[2]));
let fw2 = f_fmla(x2, fw1, f64::from_bits(C[1]));
let f0 = x as f64 * f_fmla(x2, fw2, f64::from_bits(C[0]));
return (1.0 - f0) as f32;
}
/* now -0x1.ea8f94p+1 <= x <= 0x1.41bbf8p+3, with |x| > 0x1.7p-4 */
const ILN2: f64 = f64::from_bits(0x3ff71547652b82fe);
const LN2H: f64 = f64::from_bits(0x3f762e42fefa0000);
const LN2L: f64 = f64::from_bits(0x3d0cf79abd6f5dc8);
let jt = dd_fmla(x2, ILN2, -(1024. + f64::from_bits(0x3f70000000000000))).to_bits();
let j: i64 = ((jt << 12) as i64) >> 48;
let sf = ((j >> 7) as u64)
.wrapping_add(0x3ffu64 | (sgn as u64) << 11)
.wrapping_shl(52);
const CH: [u64; 4] = [
0xbfdffffffffff333,
0x3fc5555555556a14,
0xbfa55556666659b4,
0x3f81111074cc7b22,
];
let d = f_fmla(LN2L, j as f64, f_fmla(LN2H, j as f64, x2));
let d2 = d * d;
let e0 = f64::from_bits(ERR0[(j & 127) as usize]);
let fw0 = f_fmla(d, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let fw1 = f_fmla(d, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let fw2 = f_fmla(d2, fw0, fw1);
let f = f_fmla(d2, fw2, d);
let ct = ERFC_COEFFS[i as usize];
let z = (axd - f64::from_bits(ct[0])) / (axd + f64::from_bits(ct[1]));
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let c = &ct[3..];
let sw0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
let sw1 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
let sw2 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
let sw3 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
let zw0 = f_fmla(z2, sw1, sw0);
let zw1 = f_fmla(z2, sw3, sw2);
let sw4 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
let sw5 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
let zw2 = f_fmla(z4, zw1, zw0);
let zw3 = f_fmla(z2, sw5, sw4);
let zw4 = f_fmla(z4, f64::from_bits(c[12]), zw3);
let mut s = f_fmla(z8, zw4, zw2);
s = f_fmla(z, s, f64::from_bits(ct[2]));
static OFF: [f64; 2] = [0., 2.];
let r = (f64::from_bits(sf) * f_fmla(-f, e0, e0)) * s;
let y = OFF[sgn as usize] + r;
y as f32
}
#[cfg(test)]
mod tests {
use crate::f_erfcf;
#[test]
fn test_erfc() {
assert_eq!(f_erfcf(0.0), 1.0);
assert_eq!(f_erfcf(0.5), 0.47950011);
assert_eq!(f_erfcf(1.0), 0.1572992);
assert!(f_erfcf(f32::NAN).is_nan());
assert_eq!(f_erfcf(f32::INFINITY), 0.0);
assert_eq!(f_erfcf(f32::NEG_INFINITY), 2.0);
}
}

692
vendor/pxfm/src/err/inverf.rs vendored Normal file
View File

@@ -0,0 +1,692 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_log_dd;
use crate::polyeval::{f_polyeval4, f_polyeval5};
#[cold]
fn inverf_0p06_to_0p75(x: f64) -> f64 {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 10] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
(0xbc72fc55f73765f6, 0xbff433be821423d0),
(0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4),
(0x3c56dfb85e83a2c5, 0xbfb770b6827e0829),
(0x3bff1472ecdfa403, 0x3f7a98a2980282bb),
(0x3baffb33d69d6276, 0xbf142a246fd2c07c),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let vx2 = vz * vz;
let vx4 = vx2 * vx2;
let vx8 = vx4 * vx4;
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let num = DoubleDouble::mul_add(vx8, p4, r0);
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 10] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
(0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c),
(0x3c7861f549c6aa61, 0x3fe91b12cf47da3a),
(0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da),
(0x3c1552b0ec0ba7b3, 0x3f939ada247f7609),
(0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6),
];
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let den = DoubleDouble::mul_add(vx8, p4, r0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult_f64(r, x);
k.to_f64()
}
#[inline]
fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 {
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0x3c936555853a8b2c, 0x3ff0001df06a2515),
(0x3cea488e802db3c3, 0x404406ba373221da),
(0xbce27d42419754e3, 0x407b0442e38a9597),
(0xbd224a407624cbdf, 0x409c9277e31ef446),
(0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a),
(0x3d105bc37bc61b58, 0x40b46be8f860f4d9),
(0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7),
(0x3d1d52013ba8aa38, 0x40aae93a603cf3ea),
(0xbd07a75306df0fc3, 0x4098ab8357dc2e51),
(0x3d1bb6770bb7a27e, 0x407ebead00879010),
(0xbbfcbff4a9737936, 0x3f8936117ccbff83),
];
let z2 = DoubleDouble::quick_mult(z, z);
let z4 = DoubleDouble::quick_mult(z2, z2);
let z8 = DoubleDouble::quick_mult(z4, z4);
let q0 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[1]),
z,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[3]),
z,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[5]),
z,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[7]),
z,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[9]),
z,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4);
let num = DoubleDouble::mul_add(z8, s1, s0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc75b1109d4a3262, 0x40440782efaab17f),
(0x3d1f7775b207d84f, 0x407b2da74b0d39f2),
(0xbd3291fdbab49501, 0x409dac8d9e7c90b2),
(0xbd58d8fdd27707a9, 0x40b178dfeffa3192),
(0xbd57fc74ad705ce0, 0x40bad19b686f219f),
(0x3d4075510031f2cd, 0x40be70a598208cea),
(0xbd5442e109152efb, 0x40b9683ef36ae330),
(0x3d5398192933962e, 0x40b04b7c4c3ca8ee),
(0x3d2d04d03598e303, 0x409bd0080799fbf1),
(0x3d2a988eb552ef44, 0x40815a46f12bafe3),
];
let q0 = DoubleDouble::mul_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
z,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[3]),
z,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[5]),
z,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[7]),
z,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[9]),
z,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4);
let den = DoubleDouble::mul_add(z8, s1, s0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, zeta_sqrt);
f64::copysign(k.to_f64(), x)
}
// branch for |x| > 0.9999 for extreme tail
#[cold]
fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble, x: f64) -> f64 {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 14] = [
(0x3c97612f9b24a614, 0x3ff0000ba84cc7a5),
(0xbcee8fe2da463412, 0x40515246546f5d88),
(0x3d2fa4a2b891b526, 0x40956b6837159b11),
(0x3d5d673ffad4f817, 0x40c5a1aa3be58652),
(0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75),
(0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2),
(0xbda78e569c0d237f, 0x410a385c627c461c),
(0xbdab3123ebc465d7, 0x4110f05ca2b65fe5),
(0x3d960def35955192, 0x4110bb079af2fe08),
(0xbd97904816054836, 0x410911c24610c11c),
(0xbd937745e9192593, 0x40fc603244adca35),
(0xbd65fbc476d63050, 0x40e6399103188c21),
(0xbd61016ef381cce6, 0x40c6482b44995b89),
(0x3c326105c49e5a1a, 0xbfab44bd8b4e3138),
];
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[13]),
DoubleDouble::from_bit_pair(P[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let num = DoubleDouble::mul_add(z8, q1, q0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 14] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcfc7b886ee61417, 0x405152838f711f3c),
(0xbd33f933c14e831a, 0x409576cb78cab36e),
(0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced),
(0x3d7be430c664bf7e, 0x40e766fdc8c7638c),
(0x3dac662e74cdfc0e, 0x4100276b5f47b5f1),
(0x3da67d06e82a8495, 0x410f843887f8a24a),
(0x3dbbf2e22fc2550a, 0x4116d04271703e08),
(0xbdb2fb3aed100853, 0x4119aff4ed32b74b),
(0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd),
(0x3dab2d8b8c1937eb, 0x410f71c38e84cb34),
(0xbda4e2e8a50b7370, 0x4100ca04b0f36b94),
(0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7),
(0x3d6938ea702c0328, 0x40c923ee1ab270c4),
];
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[13]),
DoubleDouble::from_bit_pair(Q[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let den = DoubleDouble::mul_add(z8, q1, q0);
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, zeta_sqrt);
f64::copysign(k.to_f64(), x)
}
/// Inverse error function
///
/// ulp 0.5
pub fn f_erfinv(x: f64) -> f64 {
let ax = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax >= 0x3ff0000000000000u64 || ax <= 0x3cb0000000000000u64 {
// |x| >= 1, |x| == 0, |x| <= f64::EPSILON
if ax == 0 {
// |x| == 0
return 0.;
}
if ax <= 0x3cb0000000000000u64 {
// |x| <= f64::EPSILON
// inverf(x) ~ Sqrt[Pi]x/2+O[x]^3
const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b);
return x * SQRT_PI_OVER_2;
}
// |x| > 1
if ax == 0x3ff0000000000000u64 {
// |x| == 1
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
return f64::NAN; // x == NaN, x = Inf, x > 1
}
let z = f64::from_bits(ax);
if ax <= 0x3f8374bc6a7ef9db {
// 0.0095
// for small |x| using taylor series first 3 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::from_exact_mult(z, z);
let p = f_fmla(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fc053c2c0ab91c5),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult_f64(r, z);
return f64::copysign(v.to_f64(), x);
} else if ax <= 0x3faeb851eb851eb8 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::from_exact_mult(z, z);
let p = f_polyeval4(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
f64::from_bits(0x3faa7c85c89bb08b),
f64::from_bits(0x3fa5eeb1d488e312),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult_f64(r, z);
return f64::copysign(v.to_f64(), x);
}
if ax <= 0x3fe8000000000000u64 {
// |x| < 0.75
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 5] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
];
let x2 = DoubleDouble::from_exact_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let ps_num = f_polyeval5(
vz.hi,
f64::from_bits(0xbff433be821423d0),
f64::from_bits(0x3fdf15f19e9d8da4),
f64::from_bits(0xbfb770b6827e0829),
f64::from_bits(0x3f7a98a2980282bb),
f64::from_bits(0xbf142a246fd2c07c),
);
let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0]));
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 5] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
];
let ps_den = f_polyeval5(
vz.hi,
f64::from_bits(0xbffbdaeff6fbb81c),
f64::from_bits(0x3fe91b12cf47da3a),
f64::from_bits(0xbfc7c5d0ffb7f1da),
f64::from_bits(0x3f939ada247f7609),
f64::from_bits(0xbf41be65038ccfe6),
);
let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0]));
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult_f64(r, z);
let err = f_fmla(
k.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3c40000000000000), // 2^-59
);
let ub = k.hi + (k.lo + err);
let lb = k.hi + (k.lo - err);
if ub == lb {
return f64::copysign(k.to_f64(), x);
}
return inverf_0p06_to_0p75(x);
}
let q = DoubleDouble::from_full_exact_add(1.0, -z);
let mut zeta = fast_log_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if z < 0.9999 {
inverf_asympt_small(rz, zeta_sqrt, x)
} else {
inverf_asympt_long(rz, zeta_sqrt, x)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erfinv() {
assert!(f_erfinv(f64::NEG_INFINITY).is_nan());
assert!(f_erfinv(f64::INFINITY).is_nan());
assert!(f_erfinv(f64::NAN).is_nan());
assert_eq!(f_erfinv(f64::EPSILON), 1.9678190753608283e-16);
assert_eq!(f_erfinv(-0.5435340000000265), -0.5265673336010599);
assert_eq!(f_erfinv(0.5435340000000265), 0.5265673336010599);
assert_eq!(f_erfinv(0.001000000000084706), 0.0008862271575416209);
assert_eq!(f_erfinv(-0.001000000000084706), -0.0008862271575416209);
assert_eq!(f_erfinv(0.71), 0.7482049711849852);
assert_eq!(f_erfinv(-0.71), -0.7482049711849852);
assert_eq!(f_erfinv(0.41), 0.381014610957532);
assert_eq!(f_erfinv(-0.41), -0.381014610957532);
assert_eq!(f_erfinv(0.32), 0.29165547581744206);
assert_eq!(f_erfinv(-0.32), -0.29165547581744206);
assert_eq!(f_erfinv(0.82), 0.9480569762323499);
assert_eq!(f_erfinv(-0.82), -0.9480569762323499);
assert_eq!(f_erfinv(0.05), 0.044340387910005497);
assert_eq!(f_erfinv(-0.05), -0.044340387910005497);
assert_eq!(f_erfinv(0.99), 1.8213863677184494);
assert_eq!(f_erfinv(-0.99), -1.8213863677184494);
assert_eq!(f_erfinv(0.9900000000867389), 1.8213863698392927);
assert_eq!(f_erfinv(-0.9900000000867389), -1.8213863698392927);
assert_eq!(f_erfinv(0.99999), 3.123413274341571);
assert_eq!(f_erfinv(-0.99999), -3.123413274341571);
}
}

704
vendor/pxfm/src/err/inverfc.rs vendored Normal file
View File

@@ -0,0 +1,704 @@
/*
* // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::{fast_log_d_to_dd, fast_log_dd};
use crate::polyeval::{f_polyeval4, f_polyeval5};
#[cold]
fn inverf_0p06_to_0p75(x: DoubleDouble) -> DoubleDouble {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 10] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
(0xbc72fc55f73765f6, 0xbff433be821423d0),
(0xbc66d05fb37c8592, 0x3fdf15f19e9d8da4),
(0x3c56dfb85e83a2c5, 0xbfb770b6827e0829),
(0x3bff1472ecdfa403, 0x3f7a98a2980282bb),
(0x3baffb33d69d6276, 0xbf142a246fd2c07c),
];
let x2 = DoubleDouble::quick_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let vx2 = vz * vz;
let vx4 = vx2 * vx2;
let vx8 = vx4 * vx4;
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let num = DoubleDouble::mul_add(vx8, p4, r0);
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 10] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
(0xbc9b58961ba253bc, 0xbffbdaeff6fbb81c),
(0x3c7861f549c6aa61, 0x3fe91b12cf47da3a),
(0xbc696dfd665b2f5e, 0xbfc7c5d0ffb7f1da),
(0x3c1552b0ec0ba7b3, 0x3f939ada247f7609),
(0xbbcaa226fb7b30a8, 0xbf41be65038ccfe6),
];
let p0 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let p1 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let p2 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let p3 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let p4 = DoubleDouble::mul_add(
vz,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let q0 = DoubleDouble::mul_add(vx2, p1, p0);
let q1 = DoubleDouble::mul_add(vx2, p3, p2);
let r0 = DoubleDouble::mul_add(vx4, q1, q0);
let den = DoubleDouble::mul_add(vx8, p4, r0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, x)
}
#[inline]
fn inverf_asympt_small(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},10,10},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 11] = [
(0x3c936555853a8b2c, 0x3ff0001df06a2515),
(0x3cea488e802db3c3, 0x404406ba373221da),
(0xbce27d42419754e3, 0x407b0442e38a9597),
(0xbd224a407624cbdf, 0x409c9277e31ef446),
(0x3d4f16ce65d6fea0, 0x40aec3ec005b1d8a),
(0x3d105bc37bc61b58, 0x40b46be8f860f4d9),
(0x3d5ca133dcdecaa0, 0x40b3826e6a32dad7),
(0x3d1d52013ba8aa38, 0x40aae93a603cf3ea),
(0xbd07a75306df0fc3, 0x4098ab8357dc2e51),
(0x3d1bb6770bb7a27e, 0x407ebead00879010),
(0xbbfcbff4a9737936, 0x3f8936117ccbff83),
];
let z2 = DoubleDouble::quick_mult(z, z);
let z4 = DoubleDouble::quick_mult(z2, z2);
let z8 = DoubleDouble::quick_mult(z4, z4);
let q0 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[1]),
z,
DoubleDouble::from_bit_pair(P[0]),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[3]),
z,
DoubleDouble::from_bit_pair(P[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[5]),
z,
DoubleDouble::from_bit_pair(P[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[7]),
z,
DoubleDouble::from_bit_pair(P[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(P[9]),
z,
DoubleDouble::from_bit_pair(P[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(P[10]), q4);
let num = DoubleDouble::mul_add(z8, s1, s0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 11] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc75b1109d4a3262, 0x40440782efaab17f),
(0x3d1f7775b207d84f, 0x407b2da74b0d39f2),
(0xbd3291fdbab49501, 0x409dac8d9e7c90b2),
(0xbd58d8fdd27707a9, 0x40b178dfeffa3192),
(0xbd57fc74ad705ce0, 0x40bad19b686f219f),
(0x3d4075510031f2cd, 0x40be70a598208cea),
(0xbd5442e109152efb, 0x40b9683ef36ae330),
(0x3d5398192933962e, 0x40b04b7c4c3ca8ee),
(0x3d2d04d03598e303, 0x409bd0080799fbf1),
(0x3d2a988eb552ef44, 0x40815a46f12bafe3),
];
let q0 = DoubleDouble::mul_add_f64(
DoubleDouble::from_bit_pair(Q[1]),
z,
f64::from_bits(0x3ff0000000000000),
);
let q1 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[3]),
z,
DoubleDouble::from_bit_pair(Q[2]),
);
let q2 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[5]),
z,
DoubleDouble::from_bit_pair(Q[4]),
);
let q3 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[7]),
z,
DoubleDouble::from_bit_pair(Q[6]),
);
let q4 = DoubleDouble::mul_add(
DoubleDouble::from_bit_pair(Q[9]),
z,
DoubleDouble::from_bit_pair(Q[8]),
);
let r0 = DoubleDouble::mul_add(z2, q1, q0);
let r1 = DoubleDouble::mul_add(z2, q3, q2);
let s0 = DoubleDouble::mul_add(z4, r1, r0);
let s1 = DoubleDouble::mul_add(z2, DoubleDouble::from_bit_pair(Q[10]), q4);
let den = DoubleDouble::mul_add(z8, s1, s0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, zeta_sqrt)
}
// branch for |x| > 0.9999 for extreme tail
#[cold]
fn inverf_asympt_long(z: DoubleDouble, zeta_sqrt: DoubleDouble) -> DoubleDouble {
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},13,13},WorkingPrecision->90]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const P: [(u64, u64); 14] = [
(0x3c97612f9b24a614, 0x3ff0000ba84cc7a5),
(0xbcee8fe2da463412, 0x40515246546f5d88),
(0x3d2fa4a2b891b526, 0x40956b6837159b11),
(0x3d5d673ffad4f817, 0x40c5a1aa3be58652),
(0x3d8867a1e5506f88, 0x40e65ebb1e1e7c75),
(0xbd9bbc0764ed8f5b, 0x40fd2064a652e5c2),
(0xbda78e569c0d237f, 0x410a385c627c461c),
(0xbdab3123ebc465d7, 0x4110f05ca2b65fe5),
(0x3d960def35955192, 0x4110bb079af2fe08),
(0xbd97904816054836, 0x410911c24610c11c),
(0xbd937745e9192593, 0x40fc603244adca35),
(0xbd65fbc476d63050, 0x40e6399103188c21),
(0xbd61016ef381cce6, 0x40c6482b44995b89),
(0x3c326105c49e5a1a, 0xbfab44bd8b4e3138),
];
let z2 = z * z;
let z4 = z2 * z2;
let z8 = z4 * z4;
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[1]),
DoubleDouble::from_bit_pair(P[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[3]),
DoubleDouble::from_bit_pair(P[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[5]),
DoubleDouble::from_bit_pair(P[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[7]),
DoubleDouble::from_bit_pair(P[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[9]),
DoubleDouble::from_bit_pair(P[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[11]),
DoubleDouble::from_bit_pair(P[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(P[13]),
DoubleDouble::from_bit_pair(P[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let num = DoubleDouble::mul_add(z8, q1, q0);
// See numerator generation above:
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
const Q: [(u64, u64); 14] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbcfc7b886ee61417, 0x405152838f711f3c),
(0xbd33f933c14e831a, 0x409576cb78cab36e),
(0x3d33fb09e2c4898a, 0x40c5e8a2c7602ced),
(0x3d7be430c664bf7e, 0x40e766fdc8c7638c),
(0x3dac662e74cdfc0e, 0x4100276b5f47b5f1),
(0x3da67d06e82a8495, 0x410f843887f8a24a),
(0x3dbbf2e22fc2550a, 0x4116d04271703e08),
(0xbdb2fb3aed100853, 0x4119aff4ed32b74b),
(0x3dba75e7b7171c3c, 0x4116b5eb8bf386bd),
(0x3dab2d8b8c1937eb, 0x410f71c38e84cb34),
(0xbda4e2e8a50b7370, 0x4100ca04b0f36b94),
(0xbd86ed6df34fdaf9, 0x40e9151ded4cf4b7),
(0x3d6938ea702c0328, 0x40c923ee1ab270c4),
];
let g0 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[1]),
DoubleDouble::from_bit_pair(Q[0]),
);
let g1 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[3]),
DoubleDouble::from_bit_pair(Q[2]),
);
let g2 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[5]),
DoubleDouble::from_bit_pair(Q[4]),
);
let g3 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[7]),
DoubleDouble::from_bit_pair(Q[6]),
);
let g4 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[9]),
DoubleDouble::from_bit_pair(Q[8]),
);
let g5 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[11]),
DoubleDouble::from_bit_pair(Q[10]),
);
let g6 = DoubleDouble::mul_add(
z,
DoubleDouble::from_bit_pair(Q[13]),
DoubleDouble::from_bit_pair(Q[12]),
);
let h0 = DoubleDouble::mul_add(z2, g1, g0);
let h1 = DoubleDouble::mul_add(z2, g3, g2);
let h2 = DoubleDouble::mul_add(z2, g5, g4);
let q0 = DoubleDouble::mul_add(z4, h1, h0);
let q1 = DoubleDouble::mul_add(z4, g6, h2);
let den = DoubleDouble::mul_add(z8, q1, q0);
let r = DoubleDouble::div(num, den);
DoubleDouble::quick_mult(r, zeta_sqrt)
}
#[inline]
fn erf_core(x: DoubleDouble) -> DoubleDouble {
// x is always positive, here, should be filtered out before the call
if x.hi <= 0.0095 {
// 0.0095
// for small |x| using taylor series first 3 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::quick_mult(x, x);
let p = f_fmla(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fc053c2c0ab91c5),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult(r, x);
return v;
} else if x.hi <= 0.06 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let z2 = DoubleDouble::quick_mult(x, x);
let p = f_polyeval4(
z2.hi,
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
f64::from_bits(0x3faa7c85c89bb08b),
f64::from_bits(0x3fa5eeb1d488e312),
);
let mut r = DoubleDouble::mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0x3c2cec68daff0d80, 0x3fc053c2c0ab91c5)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc33ea2ef8dde075, 0x3fcdb29fb2fee5e4)),
);
r = DoubleDouble::mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b)),
);
// (rh + rl) * z = rh * z + rl*z
let v = DoubleDouble::quick_mult(r, x);
return v;
}
if x.hi <= 0.75 {
// |x| < 0.75
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const P: [(u64, u64); 5] = [
(0xbc3e06eda42202a0, 0x3f93c2fc5d00e0c8),
(0xbc6eb374406b33b4, 0xbfc76fcfd022e3ff),
(0xbc857822d7ffd282, 0x3fe6f8443546010a),
(0x3c68269c66dfb28a, 0xbff80996754ceb79),
(0x3c543dce8990a9f9, 0x3ffcf778d5ef0504),
];
let x2 = DoubleDouble::quick_mult(x, x);
let vz = DoubleDouble::full_add_f64(x2, -0.5625);
let ps_num = f_polyeval5(
vz.hi,
f64::from_bits(0xbff433be821423d0),
f64::from_bits(0x3fdf15f19e9d8da4),
f64::from_bits(0xbfb770b6827e0829),
f64::from_bits(0x3f7a98a2980282bb),
f64::from_bits(0xbf142a246fd2c07c),
);
let mut num = DoubleDouble::mul_f64_add(vz, ps_num, DoubleDouble::from_bit_pair(P[4]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[3]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[2]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[1]));
num = DoubleDouble::mul_add(vz, num, DoubleDouble::from_bit_pair(P[0]));
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[x]/x
// g[x_] =f[Sqrt[x]];
// {err0,approx}=MiniMaxApproximation[g[z],{z,{0.06,0.75},9,9},WorkingPrecision->75, MaxIterations->100]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]];
const Q: [(u64, u64); 5] = [
(0xbc36337f24e57cb9, 0x3f92388d5d757e3a),
(0xbc63dfae43d60e0b, 0xbfc6ca7da581358c),
(0xbc77656389bd0e62, 0x3fe7c82ce417b4e0),
(0xbc93679667bef2f0, 0xbffad58651fd1a51),
(0x3ca2c6cb9eb17fb4, 0x4001bdb67e93a242),
];
let ps_den = f_polyeval5(
vz.hi,
f64::from_bits(0xbffbdaeff6fbb81c),
f64::from_bits(0x3fe91b12cf47da3a),
f64::from_bits(0xbfc7c5d0ffb7f1da),
f64::from_bits(0x3f939ada247f7609),
f64::from_bits(0xbf41be65038ccfe6),
);
let mut den = DoubleDouble::mul_f64_add(vz, ps_den, DoubleDouble::from_bit_pair(Q[4]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[3]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[2]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[1]));
den = DoubleDouble::mul_add(vz, den, DoubleDouble::from_bit_pair(Q[0]));
let r = DoubleDouble::div(num, den);
let k = DoubleDouble::quick_mult(r, x);
let err = f_fmla(
k.hi,
f64::from_bits(0x3c70000000000000), // 2^-56
f64::from_bits(0x3c40000000000000), // 2^-59
);
let ub = k.hi + (k.lo + err);
let lb = k.hi + (k.lo - err);
if ub == lb {
return k;
}
return inverf_0p06_to_0p75(x);
}
let q = DoubleDouble::full_add_f64(-x, 1.0);
let mut zeta = fast_log_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if x.hi < 0.9999 {
inverf_asympt_small(rz, zeta_sqrt)
} else {
inverf_asympt_long(rz, zeta_sqrt)
}
}
#[cold]
fn inverfc_extra_small(x: f64) -> DoubleDouble {
// Reversed order for erfinv with direct identity without subtraction.
let q = x;
let mut zeta = fast_log_d_to_dd(q);
zeta = DoubleDouble::from_exact_add(zeta.hi, zeta.lo);
zeta = -zeta;
let zeta_sqrt = zeta.fast_sqrt();
let rz = zeta_sqrt.recip();
if x >= 0.0001 {
inverf_asympt_small(rz, zeta_sqrt)
} else {
inverf_asympt_long(rz, zeta_sqrt)
}
}
/// Complementary inverse error function
pub fn f_erfcinv(x: f64) -> f64 {
let ix = x.to_bits();
if ix >= 0x4000000000000000u64 || ix == 0 {
// |x| == NaN, x == inf, |x| == 0, x < 0
if ix.wrapping_shl(1) == 0 {
return f64::INFINITY;
}
if ix == 0x4000000000000000u64 {
return f64::NEG_INFINITY;
}
return f64::NAN; // x == NaN, x == Inf, x > 2
}
if x == 1. {
return 0.;
}
// we compute erfcinv through identity
// erfcinv(x) = -erfinv(1-x)
static SIGN: [f64; 2] = [1.0, -1.0];
if x < 0.1 {
return inverfc_extra_small(x).to_f64();
}
let dx = if x > 1. {
DoubleDouble::from_full_exact_sub(2., x)
} else {
DoubleDouble::new(0., x)
};
let sign = SIGN[(x > 1.) as usize];
let mut dx = DoubleDouble::full_add_f64(-dx, 1.);
dx = DoubleDouble::from_exact_add(dx.hi, dx.lo);
erf_core(dx).to_f64() * sign
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_inverfc() {
assert_eq!(f_erfcinv(0.12), 1.0993909519492193);
assert_eq!(f_erfcinv(1.0000000000027623e-13), 5.261512368864527);
assert_eq!(f_erfcinv(1.0001200000182189), -0.00010634724760131264);
assert_eq!(f_erfcinv(0.7001200000182189), 0.2723481758403576);
assert_eq!(f_erfcinv(1.5231200000182189), -0.502985998867995);
assert_eq!(f_erfcinv(1.99545434324323243), -2.0064739778442213);
assert_eq!(f_erfcinv(1.), 0.);
assert!(f_erfcinv(2.05).is_nan());
assert!(f_erfcinv(-0.01).is_nan());
assert!(f_erfcinv(f64::NAN).is_nan());
assert!(f_erfcinv(f64::NEG_INFINITY).is_nan());
assert!(f_erfcinv(f64::INFINITY).is_nan());
}
}

80
vendor/pxfm/src/err/inverfcf.rs vendored Normal file
View File

@@ -0,0 +1,80 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::err::inverff::erfinv_core;
/// Complementary inverse error function
///
/// Max ulp 0.5
pub fn f_erfcinvf(x: f32) -> f32 {
let ix = x.to_bits();
let ux = ix.wrapping_shl(1);
if ix >= 0x4000_0000u32 || ux == 0 {
if x.is_infinite() {
return f32::INFINITY;
}
if ux == 0 {
return f32::INFINITY;
}
if ix == 0x3f80_0000u32 {
return 0.;
}
// x > 2
if ix == 0x4000_0000u32 {
// x == 2.
return f32::NEG_INFINITY;
}
return f32::NAN; // x == NaN, x < 0
}
let z = x as f64;
static SIGN: [f32; 2] = [1.0, -1.0];
// inferfc(x) = -inverf(1-x)
// ax doesn't need to be extremely accurate,
// it's just boundary detection so will do subtraction in f32
erfinv_core(1. - z, (1. - x).abs().to_bits(), SIGN[(x > 1.) as usize])
}
#[cfg(test)]
mod tests {
use super::f_erfcinvf;
#[test]
fn m_test() {
assert_eq!(f_erfcinvf(2.), f32::NEG_INFINITY);
assert!(f_erfcinvf(-1.).is_nan());
assert_eq!(f_erfcinvf(0.), f32::INFINITY);
assert!(f_erfcinvf(2.1).is_nan());
assert_eq!(f_erfcinvf(0.5), 0.47693628);
assert_eq!(f_erfcinvf(1.5), -0.47693628);
assert_eq!(f_erfcinvf(0.002), 2.1851242);
assert_eq!(f_erfcinvf(1.002), -0.0017724329);
assert!(f_erfcinvf(f32::NAN).is_nan());
}
}

359
vendor/pxfm/src/err/inverff.rs vendored Normal file
View File

@@ -0,0 +1,359 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::logs::simple_fast_log;
use crate::polyeval::{
f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval3, f_polyeval5,
f_polyeval10, f_polyeval11,
};
#[inline]
pub(crate) fn erfinv_core(z: f64, ax: u32, sign: f32) -> f32 {
if ax <= 0x3c1ba5e3u32 {
// 0.0095
// for small |x| using taylor series first 3 terms
let z2 = z * z;
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let p = f_polyeval3(
z2,
f64::from_bits(0x3fec5bf891b4ef6b),
f64::from_bits(0x3fcdb29fb2fee5e4),
f64::from_bits(0x3fc053c2c0ab91c5),
) * z;
return f32::copysign(p as f32, sign);
} else if ax <= 0x3d75c28fu32 {
// 0.06
// for |x| < 0.06 using taylor series first 5 terms
let z2 = z * z;
// Generated by SageMath:
// from mpmath import mp, erf
//
// mp.prec = 100
//
// def inverf_series(n_terms):
// from mpmath import taylor
// series_erf = taylor(mp.erfinv, 0, n_terms)
// return series_erf
//
// ser = inverf_series(10)
// for i in range(1, len(ser), 2):
// k = ser[i]
// print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
let p = f_polyeval5(
z2,
f64::from_bits(0x3fec5bf891b4ef6b),
f64::from_bits(0x3fcdb29fb2fee5e4),
f64::from_bits(0x3fc053c2c0ab91c5),
f64::from_bits(0x3fb62847c47dda48),
f64::from_bits(0x3fb0a13189c6ef7a),
) * z;
return f32::copysign(p as f32, sign);
}
if ax <= 0x3f400000u32 {
// |x| <= 0.75
let z2 = z * z;
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.06,0.75},8,7},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let r = z2 - 0.5625;
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,8}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_estrin_polyeval9(
r,
f64::from_bits(0x3fa329348a73d9d4),
f64::from_bits(0xbfd2cb089b644580),
f64::from_bits(0x3fed229149f732d6),
f64::from_bits(0xbff6a233d2028bff),
f64::from_bits(0x3ff268adbfbb6023),
f64::from_bits(0xbfddac401c7d70f4),
f64::from_bits(0x3fb3b1bd759d5046),
f64::from_bits(0xbf67aeb45bad547e),
f64::from_bits(0xbf01ccc7434d381b),
);
// x0=SetPrecision[0.5625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,7}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_estrin_polyeval8(
r,
f64::from_bits(0x3fa1aac2ee4b1413),
f64::from_bits(0xbfd279342e281c99),
f64::from_bits(0x3feef89a353c6d1b),
f64::from_bits(0xbffa8f1b7cd6d0a7),
f64::from_bits(0x3ff89ce6289819a1),
f64::from_bits(0xbfe7db5282a4a2e1),
f64::from_bits(0x3fc543f9a928db4a),
f64::from_bits(0xbf888fd2990e88db),
);
let k = (p_num / p_den) * z;
f32::copysign(k as f32, sign)
} else if ax <= 0x3f580000u32 {
// |x| <= 0.84375
let z2 = z * z;
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.75,0.84375},6,6},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let r = z2 - 0.84375;
// x0=SetPrecision[0.84375,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval10(
r,
f64::from_bits(0x3f116d07e62cbb74),
f64::from_bits(0xbf5c38d390052412),
f64::from_bits(0x3f92d6f96f84efe3),
f64::from_bits(0xbfbac9189cae446b),
f64::from_bits(0x3fd5dd124fb25677),
f64::from_bits(0xbfe49845d46b80ab),
f64::from_bits(0x3fe556c4913f60f8),
f64::from_bits(0xbfd59e527704e33b),
f64::from_bits(0x3fb07614a5e6c9f1),
f64::from_bits(0xbf60ce54a2d8a789),
);
// x0=SetPrecision[0.84375,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_polyeval10(
r,
f64::from_bits(0x3f09fbdd1c987d1e),
f64::from_bits(0xbf5602ad17d419f4),
f64::from_bits(0x3f8efe31ea5bc71d),
f64::from_bits(0xbfb77e5f1bd26730),
f64::from_bits(0x3fd4c3f03e4f5478),
f64::from_bits(0xbfe5aa87dfc5e757),
f64::from_bits(0x3fe9c6406f9abc0b),
f64::from_bits(0xbfdff2f008b4db05),
f64::from_bits(0x3fc1123be5319800),
f64::from_bits(0xbf83be49c2d5cb9e),
);
let k = (p_num / p_den) * z;
f32::copysign(k as f32, sign)
} else if ax <= 0x3f700000u32 {
// |x| <= 0.9375
// First step rational approximant is generated, but it's ill-conditioned, thus
// we're using taylor expansion to create Newton form at the point.
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
// {err0, approx}=MiniMaxApproximation[f[z],{z,{0.84375,0.9375},10,9},WorkingPrecision->70]
// num=Numerator[approx][[1]];
// den=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let x2 = z * z;
let r = x2 - 0.87890625;
// x0=SetPrecision[0.87890625,75];
// NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_num = f_polyeval11(
r,
f64::from_bits(0x3ec70f1cbf8a758b),
f64::from_bits(0xbf1c9dff87b698d0),
f64::from_bits(0x3f5dfe7be00cc21c),
f64::from_bits(0xbf913fd09c5a3682),
f64::from_bits(0x3fb7ab0095693976),
f64::from_bits(0xbfd3b3ca6a3c9919),
f64::from_bits(0x3fe3533be6d1d8c8),
f64::from_bits(0xbfe48208ef308ac7),
f64::from_bits(0x3fd361a82dab69d1),
f64::from_bits(0xbfa2401965a98195),
f64::from_bits(0xbf54ba4d14ca54e3),
);
// x0=SetPrecision[0.87890625,75];
// NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
// coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let p_den = f_polyeval10(
r,
f64::from_bits(0x3ec0699f391e2327),
f64::from_bits(0xbf151ec184941078),
f64::from_bits(0x3f5717bb379a3c6e),
f64::from_bits(0xbf8beed3755c3484),
f64::from_bits(0x3fb46148b4a431ef),
f64::from_bits(0xbfd25690b7bc76fa),
f64::from_bits(0x3fe3f1b2f4ee0d9d),
f64::from_bits(0xbfe888a7a4511975),
f64::from_bits(0x3fdd84db18f2a240),
f64::from_bits(0xbfb844807521be56),
);
let f = z * (p_num / p_den);
f32::copysign(f as f32, sign)
} else {
// Rational approximation generated by Wolfram Mathematica:
// for inverf(x) = sqrt(-log(1-x))*R(1/sqrt(-log(1-x)))
//
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
// {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},7,6},WorkingPrecision->90]
// num=Numerator[approx];
// den=Denominator[approx];
// poly=num;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=den;
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
let zeta = -simple_fast_log(1. - z);
let zeta_sqrt = zeta.sqrt();
let rcp_zeta = (1. / zeta) * zeta_sqrt;
let p_num = f_estrin_polyeval8(
rcp_zeta,
f64::from_bits(0x3ff00072876c578e),
f64::from_bits(0x40314e00c10282da),
f64::from_bits(0x404f4a1412af03f6),
f64::from_bits(0x404c895cc0d9b1b3),
f64::from_bits(0x404545794620bfaf),
f64::from_bits(0x403264d21ea21354),
f64::from_bits(0x3fc5a5141dd19237),
f64::from_bits(0xbf8c2e49707c21ec),
);
let p_den = f_estrin_polyeval7(
rcp_zeta,
f64::from_bits(0x3ff0000000000000),
f64::from_bits(0x403151312c313d77),
f64::from_bits(0x405032345fa3d0cd),
f64::from_bits(0x4053e0a81d4c5f09),
f64::from_bits(0x4054fa20c5e0731c),
f64::from_bits(0x404620d7f94d4804),
f64::from_bits(0x4035d7400867b81f),
);
let r = zeta_sqrt * (p_num / p_den);
f32::copysign(r as f32, sign)
}
}
/// Inverse error function
///
/// Max ulp 0.5
pub fn f_erfinvf(x: f32) -> f32 {
let ax = x.to_bits() & 0x7fff_ffff;
if ax >= 0x3f800000u32 || ax <= 0x3400_0000u32 {
// |x| >= 1, |x| == 0, |x| <= f32::EPSILON
if ax == 0 {
// |x| == 0
return 0.;
}
if ax <= 0x3400_0000u32 {
// |x| <= f32::EPSILON
// inverf(x) ~ Sqrt[Pi]x/2+O[x]^3
const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b);
return (x as f64 * SQRT_PI_OVER_2) as f32;
}
if ax == 0x3f800000u32 {
// |x| == 1
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
// |x| > 1
return f32::NAN; // |x| == NaN, |x| == Inf, |x| > 1
}
let z = f32::from_bits(ax) as f64;
erfinv_core(z, ax, x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_test_inv_erff() {
assert!(f_erfinvf(f32::NEG_INFINITY).is_nan());
assert!(f_erfinvf(f32::INFINITY).is_nan());
assert!(f_erfinvf(-1.1).is_nan());
assert!(f_erfinvf(1.1).is_nan());
assert_eq!(f_erfinvf(f32::EPSILON), 1.05646485e-7);
assert_eq!(f_erfinvf(-1.), f32::NEG_INFINITY);
assert_eq!(f_erfinvf(1.), f32::INFINITY);
assert_eq!(f_erfinvf(0.002), 0.0017724558);
assert_eq!(f_erfinvf(-0.002), -0.0017724558);
assert_eq!(f_erfinvf(0.02), 0.017726395);
assert_eq!(f_erfinvf(-0.02), -0.017726395);
assert_eq!(f_erfinvf(0.05), 0.044340387);
assert_eq!(f_erfinvf(-0.05), -0.044340387);
assert_eq!(f_erfinvf(0.5), 0.47693628);
assert_eq!(f_erfinvf(-0.5), -0.47693628);
assert_eq!(f_erfinvf(0.76), 0.8308411);
assert_eq!(f_erfinvf(-0.76), -0.8308411);
assert_eq!(f_erfinvf(0.92), 1.2379221);
assert_eq!(f_erfinvf(-0.92), -1.2379221);
assert_eq!(f_erfinvf(0.97), 1.5344859);
assert_eq!(f_erfinvf(-0.97), -1.5344859);
assert_eq!(f_erfinvf(0.99), 1.8213866);
assert_eq!(f_erfinvf(-0.99), -1.8213866);
assert_eq!(f_erfinvf(0.7560265), 0.82385886);
}
}

56
vendor/pxfm/src/err/mod.rs vendored Normal file
View File

@@ -0,0 +1,56 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![deny(unreachable_pub)]
mod erf;
mod erf_poly;
mod erfc;
mod erfcx;
mod erfcxf;
mod erff;
mod erffc;
mod inverf;
mod inverfc;
mod inverfcf;
mod inverff;
mod rerf;
mod rerf_poly;
mod rerff;
pub use erf::f_erf;
pub use erfc::f_erfc;
pub use erfcx::f_erfcx;
pub use erfcxf::f_erfcxf;
pub use erff::f_erff;
pub use erffc::f_erfcf;
pub use inverf::f_erfinv;
pub use inverfc::f_erfcinv;
pub use inverfcf::f_erfcinvf;
pub use inverff::f_erfinvf;
pub use rerf::f_rerf;
pub use rerff::f_rerff;

233
vendor/pxfm/src/err/rerf.rs vendored Normal file
View File

@@ -0,0 +1,233 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::err::rerf_poly::RERF_HARD;
use crate::polyeval::f_polyeval7;
#[cold]
#[inline(never)]
fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 {
// Polynomial for x/erf(x)
// Generated by Sollya.
// d = [0, 1/16];
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107...|], d);
// See ./notes/r_erf_tiny_hard.sollya
const C: [(u64, u64); 10] = [
(0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b),
(0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2),
(0xbc0cb8b926064434, 0x3f842aa561ecc102),
(0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727),
(0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472),
(0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d),
(0xbb9e05d21f4e1755, 0xbef3adb84631c39c),
(0x3b6ee5dc31565280, 0xbec366647cacdcc9),
(0x3b3698f8162c5fac, 0x3eaabb9db9f3b048),
(0xbb026f5401fce891, 0xbe66cd40349520b6),
];
let mut p = DoubleDouble::mul_add(
z2,
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
let z = DoubleDouble::div_dd_f64(p, x);
z.to_f64()
}
#[inline]
fn rerf_poly_tiny(z: f64, x: f64) -> f64 {
let z2 = DoubleDouble::from_exact_mult(z, z);
// Polynomial for x/erf(x)
// Generated by Sollya.
// d = [0, 1/16];
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107, 107, 107, D...|], d);
// See ./notes/r_erf_tiny.sollya
let p = f_polyeval7(
z2.hi,
f64::from_bits(0xbf75207c7ef80727),
f64::from_bits(0x3f2db4a8d7c36a03),
f64::from_bits(0x3f20faa7a8db7f27),
f64::from_bits(0xbef3adae94983bb2),
f64::from_bits(0xbec3b05fe5c49f32),
f64::from_bits(0x3ed67902690892be),
f64::from_bits(0xbf3090033375e5ee),
);
let mut r = DoubleDouble::quick_mul_f64_add(
z2,
p,
DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)),
);
r = DoubleDouble::quick_mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)),
);
r = DoubleDouble::quick_mul_add(
z2,
r,
DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)),
);
r = DoubleDouble::from_exact_add(r.hi, r.lo);
r = DoubleDouble::div_dd_f64(r, x);
let err = f_fmla(
r.hi,
f64::from_bits(0x3c10000000000000), // 2^-62
f64::from_bits(0x3b90000000000000), // 2^-70
);
let ub = r.hi + (r.lo + err);
let lb = r.hi + (r.lo - err);
if ub == lb {
return r.to_f64();
}
rerf_poly_tiny_hard(x, z2)
}
#[inline]
fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 {
let c = &RERF_HARD[idx];
let mut p = DoubleDouble::mul_add(
z2,
DoubleDouble::from_bit_pair(c[10]),
DoubleDouble::from_bit_pair(c[9]),
);
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1]));
p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
let z = DoubleDouble::div_dd_f64(p, x);
z.to_f64()
}
/// Computes 1/erf(x)
///
/// Max ulp 0.5001
pub fn f_rerf(x: f64) -> f64 {
let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
let t = z.to_bits();
let ux = t;
/* 1/erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
if ux > 0x4017afb48dc96626
// |x| > 0x4017afb48dc96626
{
let os = f64::copysign(1.0, x);
const MASK: u64 = 0x7ff0000000000000u64;
if ux > MASK {
return x + x; /* NaN */
}
if ux == MASK {
return os; /* +/-Inf */
}
return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
}
/* now |x| <= 0x4017afb48dc96626 */
if z < f64::from_bits(0x3c20000000000000) {
// |x| < 0.0000000000000000004336808689942018
/* for x=-0 the code below returns +0 which is wrong */
if x == 0. {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
if z.to_bits() <= 0x38b7f12369dedu64 {
return if x.is_sign_negative() {
f64::NEG_INFINITY
} else {
f64::INFINITY
};
}
/* double-double approximation of 2/sqrt(pi) to nearest */
const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8618f13eb7ca89),
f64::from_bits(0x3fec5bf891b4ef6b),
);
/* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3)
term to the main term is in x^2/3, thus less than 2^-123 */
/* scale x by 2^106 to get out the subnormal range */
let sx = x * f64::from_bits(0x4690000000000000);
let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx);
// scale back by 2^106, since we're performed the division
prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000));
return prod.to_f64();
}
if z.to_bits() < 0x3fb0000000000000u64 {
return rerf_poly_tiny(z, x);
}
const SIXTEEN: u64 = 4 << 52;
let idx =
unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() };
let z2 = DoubleDouble::from_exact_mult(z, z);
rerf_poly_hard(x, z2, idx)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_erf() {
assert_eq!(f_rerf(65.), 1.0);
assert_eq!(f_rerf(3.), 1.0000220909849995);
assert_eq!(f_rerf(-3.), -1.0000220909849995);
assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197);
assert_eq!(
f_rerf(0.0000000000000000002336808689942018),
3.7924667486354975e18
);
assert_eq!(f_rerf(2.000225067138672), 1.004695025872889);
assert_eq!(f_rerf(0.), f64::INFINITY);
assert_eq!(f_rerf(-0.), f64::NEG_INFINITY);
assert!(f_rerf(f64::NAN).is_nan());
}
}

1331
vendor/pxfm/src/err/rerf_poly.rs vendored Normal file

File diff suppressed because it is too large Load Diff

468
vendor/pxfm/src/err/rerff.rs vendored Normal file
View File

@@ -0,0 +1,468 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
// Polynomials approximating x/erf(x) on ( k/8, (k + 1)/8 ) generated by Sollya and SageMath
// ```text
// def build_sollya_script(idx):
// return f"""
// d = [{idx}/8, {idx + 1}/8];
//
// f = x/erf(x);
// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], d);
//
// for i from 0 to degree(Q) by 2 do {{
// write(coeff(Q, i)) >> "coefficients.txt";
// write("\\n") >> "coefficients.txt";
// }};
// """
//
// def load_coefficients(filename):
// with open(filename, "r") as f:
// return [RealField(500)(line.strip()) for line in f if line.strip()]
//
// def call_sollya_on_interval(idx):
// sollya_script = build_sollya_script(idx)
// with open("tmp_interval.sollya", "w") as f:
// f.write(sollya_script)
// import subprocess
// if os.path.exists("coefficients.txt"):
// os.remove("coefficients.txt")
// try:
// result = subprocess.run(
// ["sollya", "tmp_interval.sollya"],
// check=True,
// capture_output=True,
// text=True
// )
// except subprocess.CalledProcessError as e:
// return
//
// def print_coeffs(poly):
// print("[")
// for i in range(len(poly)):
// coeff = poly[i]
// print(double_to_hex(coeff), ",")
// print("],")
//
// print(f"static COEFFS: [[u64; 8]; 32] = [")
// for i in range(0, 32):
// call_sollya_on_interval(i)
// coeffs = load_coefficients(f"coefficients.txt")
// print_coeffs(coeffs)
// print("];")
// ```
static COEFFS: [[u64; 8]; 32] = [
[
0x3fec5bf891b4ef6b,
0x3fd2e7fb0bcdee7f,
0x3f842aa5641a200a,
0xbf752081ae81d16e,
0x3f2e1a191fb85592,
0xbf203a20ad500043,
0x3f861a864f719e76,
0xbfc79f68bad20bd1,
],
[
0x3fec5bf891b4ef6b,
0x3fd2e7fb0bcdf45b,
0x3f842aa561f35512,
0xbf75207c8167ac1d,
0x3f2db4b119b4ce20,
0x3f20fa28737c4219,
0xbef38e74cca2219a,
0xbec5d70713fc621e,
],
[
0x3fec5bf891b4ef30,
0x3fd2e7fb0bce1c0f,
0x3f842aa56138541f,
0xbf75207c6197eb7c,
0x3f2db4799120e074,
0x3f20fc28d915a6e9,
0xbef3ea5b479dc053,
0xbebbffe6df8ec372,
],
[
0x3fec5bf891b4bf18,
0x3fd2e7fb0bde166f,
0x3f842aa53c721766,
0xbf7520796733bbec,
0x3f2db21eebf4144f,
0x3f210545cc78d0f0,
0xbef48ad7e4aa2d10,
0xbeb24a043ad31907,
],
[
0x3fec5bf891ab16e9,
0x3fd2e7fb0dc7b919,
0x3f842aa29d8381e7,
0xbf7520592585d601,
0x3f2da30df1566e43,
0x3f212780ff325aa6,
0xbef5e98ea9819e42,
0xbe9849d52099dcb9,
],
[
0x3fec5bf890ddfa8d,
0x3fd2e7fb28aab312,
0x3f842a8a461f0eb7,
0xbf751f93b2d27114,
0x3f2d66789eed5f95,
0x3f21818ff1832f50,
0xbef84264724049ef,
0x3e9df12b02e82a5a,
],
[
0x3fec5bf887f64fa4,
0x3fd2e7fbfcc05f75,
0x3f842a02323e2099,
0xbf751c86d291ced6,
0x3f2cbd5653cde433,
0x3f223299b32b8583,
0xbefb7fc6e286cd94,
0x3eb49676cb3da393,
],
[
0x3fec5bf84f8e2488,
0x3fd2e7ffe83d2974,
0x3f842821c5cc659c,
0xbf7514805a6196e3,
0x3f2b723680f64bb5,
0x3f233416dcfcd366,
0xbefefe55300afaa7,
0x3ebf0c475fb71e7a,
],
[
0x3fec5bf7999e6afe,
0x3fd2e809c6d4caa7,
0x3f84247256be4a56,
0xbf750838db0c0cf5,
0x3f29e7e867267388,
0x3f24226adee5ce74,
0xbf00c0830af2bf01,
0x3ec26fb6b18e628b,
],
[
0x3fec5bf801fc5ad5,
0x3fd2e80618e8941e,
0x3f84254c04b0b234,
0xbf7509d7cf351201,
0x3f2a01829944820c,
0x3f241d7bb0c7e2de,
0xbf00c2d844916d26,
0x3ec2817d39abc26b,
],
[
0x3fec5c0938a12f13,
0x3fd2e7706c510d79,
0x3f8448392db86aae,
0xbf75526e9c6046f0,
0x3f2facd0bc0e7862,
0x3f21fc4093e1e6b7,
0xbefdf54af68ba968,
0x3ebfe348fc246c15,
],
[
0x3fec5c6dcdadc5d8,
0x3fd2e495072afff3,
0x3f84d6f390564d4d,
0xbf764a7e85749c85,
0x3f37effb62caee80,
0x3f19cb39bc236ae6,
0xbef6d7035785e8f3,
0x3eb755aa2e58fc52,
],
[
0x3fec5dd74381acff,
0x3fd2dbe68140f116,
0x3f86459e1acfda0f,
0xbf7865203923a03d,
0x3f43665053a48049,
0x3f0409e353b761ea,
0xbeeb0b00f567c9f8,
0x3eabc33000611b25,
],
[
0x3fec6175431226d1,
0x3fd2c8dcbb0babcc,
0x3f88f5bfd61e5d2e,
0xbf7bc60de8dff620,
0x3f4d9b7076c7767c,
0xbf0106584fac3547,
0xbed0a56cd1030deb,
0x3e970ee11e7beb48,
],
[
0x3fec68445d99a8e9,
0x3fd2a9d608dbfea2,
0x3f8cc072ddf22cb6,
0xbf7fe5f2efdc5f5c,
0x3f5431d1deff38bc,
0xbf197220e4a1dda8,
0x3ec9e2469e6c1c67,
0x3e4be72535d53d7b,
],
[
0x3fec713c415bf088,
0x3fd28610e83aa38c,
0x3f9049ee1942f46b,
0xbf81c513d165d6fd,
0x3f585bc13e0fcaba,
0xbf22715362e30768,
0x3ede6bfa3c69e8e3,
0xbe852cd85f8dea5b,
],
[
0x3fec770e08b47107,
0x3fd2716324b22047,
0x3f91460d403e6b9c,
0xbf829ab46375f10d,
0x3f5a0e7f00c76fb5,
0xbf2484890f2d7eeb,
0x3ee207b21bbd8496,
0xbe8bbee036671b6a,
],
[
0x3fec6f4a2d01088d,
0x3fd288e494bc89b7,
0x3f905203788a2821,
0xbf81eab2727ce365,
0x3f58ddba75a3c100,
0xbf2347c9a317a175,
0x3ee099c93ce5f44f,
0xbe88e9f9c064f833,
],
[
0x3fec4c9bbce50c7d,
0x3fd2e8175b0e1837,
0x3f89a2d1518c7a4c,
0xbf7f3fa91859127e,
0x3f55431c495b1077,
0xbf1fc1af665bb1f8,
0x3eda0f1d735195cb,
0xbe827b8d6fa224ed,
],
[
0x3fec03cce39d7213,
0x3fd39c2316e290bf,
0x3f7b674438899313,
0xbf783644c88c71fb,
0x3f5047a3da485180,
0xbf1748b54f823d57,
0x3ed20c86d3302f22,
0xbe77f94cafe045a8,
],
[
0x3feb913f0adf6c4b,
0x3fd49c4cedae09fc,
0xbf4a6dea9778f474,
0xbf7006dc4e6c8125,
0x3f461483c254fa5f,
0xbf0e75052760bf18,
0x3ec65425869bc096,
0xbe6bc2df9fbc0f82,
],
[
0x3feafbeda3b7d400,
0x3fd5cb900ee1fb5e,
0xbf8228d16e87de3d,
0xbf6011d44e155bf5,
0x3f3993b736442257,
0xbf017c7ee5efa6ad,
0x3eb886e337d2e3c2,
0xbe5cba4b79e90043,
],
[
0x3fea54849d309eba,
0x3fd701afa55e3d21,
0xbf90c72bb2e2799f,
0xbf33c92573294e34,
0x3f265284f7a6d53a,
0xbef09f09298ed1e8,
0x3ea7153a46cb2e27,
0xbe49ef6ec79265fd,
],
[
0x3fe9b128df667870,
0x3fd816d295a867cb,
0xbf9713f11ea84a26,
0x3f4edcbdd63903bb,
0x3ef44f54fc7a6024,
0xbed45da547d2fcb8,
0x3e9049754d57a9a7,
0xbe32aba05ca26a69,
],
[
0x3fe927f49edf4ace,
0x3fd8ecd207c6a7d1,
0xbf9b8cd215124008,
0x3f5cbab209dd389d,
0xbf12a8920ea6230f,
0x3eb442dfce60b0e2,
0x3e52494e415c7728,
0xbe09a1b1bbb9cee4,
],
[
0x3fe8ca3d7437d06f,
0x3fd973c08b6d33fb,
0xbf9e272ca1fccc06,
0x3f61efd00e2016b6,
0xbf1e6dab18e9d45a,
0x3ed0b446e3469be1,
0xbe7503c584488bed,
0x3e069968660290a4,
],
[
0x3fe8a1f4b154f663,
0x3fd9a9a8b81692d4,
0xbf9f1e9312dd4501,
0x3f632b4d20599404,
0xbf2119c1b5e43b24,
0x3ed42b9874284d56,
0xbe7c17cc1eef4b9d,
0x3e117f0a9057a689,
],
[
0x3fe8b15bfcf78f33,
0x3fd99720c884ab33,
0xbf9ed2265979f5a6,
0x3f62d3c30432692b,
0xbf20a17346c37362,
0x3ed36538f2d21c31,
0xbe7aac6bb10f8b90,
0x3e1061d3a1737044,
],
[
0x3fe8f479e98cb825,
0x3fd94ab3f8d0c80c,
0xbf9da7afe85abf94,
0x3f618fe28f71a3d4,
0xbf1df723b2a63e38,
0x3ed0d190252a7f7c,
0xbe7631fdd49272b0,
0x3e0a17567cab4a94,
],
[
0x3fe9636d647b61c0,
0x3fd8d4aaba0e0212,
0xbf9bf904574e56ea,
0x3f5fb68684d8555d,
0xbf19d06f9cf17bbf,
0x3ecb92b9f0b8acf3,
0xbe7145bde0c499ae,
0x3e033cf1cb08ce4c,
],
[
0x3fe9f4c3301b6d33,
0x3fd844100b4598b3,
0xbf9a0b94e19be990,
0x3f5c0ed55c70532f,
0xbf15a786c9e62b23,
0x3ec5e3f05a04f5c6,
0xbe69ea9db2e37883,
0x3dfb3e5ad2cd0fb2,
],
[
0x3fea9f469c75536c,
0x3fd7a51b3d9eda10,
0xbf980f63a2cb486c,
0x3f5887f72a9f07e0,
0xbf11e4d454f2f994,
0x3ec113d0aed8bdef,
0xbe6311f84083acf4,
0x3df2e4dc2e50e3fa,
],
];
/// Computes 1/erf(x)
///
/// Max ulp 0.5
pub fn f_rerff(x: f32) -> f32 {
let x_u = x.to_bits();
let x_abs = x_u & 0x7fff_ffffu32;
if x == 0. {
return if x.is_sign_negative() {
f32::NEG_INFINITY
} else {
f32::INFINITY
};
}
if x_abs >= 0x4080_0000u32 {
static ONE: [f32; 2] = [1.0, -1.0];
static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
let sign = x.is_sign_negative() as usize;
if x_abs >= 0x7f80_0000u32 {
return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
}
return ONE[sign] + SMALL[sign];
}
// Polynomial approximation see [COEFFS] for generation:
// 1/erf(x) ~ (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14) / x
let xd = x as f64;
let xsq = xd * xd;
const EIGHT: u32 = 3 << 23;
let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
let c = COEFFS[idx];
let x4 = xsq * xsq;
let c0 = f_fmla(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
let c1 = f_fmla(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
let c2 = f_fmla(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
let c3 = f_fmla(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
let x8 = x4 * x4;
let p0 = f_fmla(x4, c1, c0);
let p1 = f_fmla(x4, c3, c2);
((f_fmla(x8, p1, p0)) / xd) as f32
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f_erff_test() {
assert!(f_rerff(f32::NAN).is_nan());
assert_eq!(f_rerff(0.0), f32::INFINITY);
assert_eq!(f_rerff(-0.0), f32::NEG_INFINITY);
assert_eq!(f_rerff(0.015255669), 58.096153);
assert_eq!(f_rerff(1.0), 1.1866608);
assert_eq!(f_rerff(0.5), 1.9212301);
}
}

89
vendor/pxfm/src/exponents/auxiliary.rs vendored Normal file
View File

@@ -0,0 +1,89 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::dyadic_float::{DyadicSign, f64_from_parts};
#[inline]
pub(crate) fn ldexp(d: f64, i: i32) -> f64 {
let mut n = i;
let exp_max = 1023;
let exp_min = -1022;
const EXP_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
// 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
let f_exp_max = f64_from_parts(DyadicSign::Pos, EXP_BIAS << 1, 0);
// 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
let f_exp_min = f64_from_parts(DyadicSign::Pos, 1, 0);
let mut x = d;
if n < exp_min {
// 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
let f_pow_subnorm = f64_from_parts(DyadicSign::Pos, 52 + EXP_BIAS, 0);
let mul = f_exp_min * f_pow_subnorm;
let add = -exp_min - 52i32;
// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
// This must be reachable by three scaling multiplications (two here and one final).
debug_assert!(-exp_min + 52i32 + exp_max <= add * 2 + -exp_min);
x *= mul;
n += add;
if n < exp_min {
x *= mul;
n += add;
if n < exp_min {
n = exp_min;
}
}
} else if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
n = exp_max;
}
}
}
let scale = f64_from_parts(DyadicSign::Pos, (EXP_BIAS as i32 + n) as u64, 0);
x * scale
}
#[inline]
pub(crate) fn fast_ldexp(d: f64, i: i32) -> f64 {
let mut u = d.to_bits();
u = u.wrapping_add((i as u64).wrapping_shl(52));
f64::from_bits(u)
}

411
vendor/pxfm/src/exponents/exp.rs vendored Normal file
View File

@@ -0,0 +1,411 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::{f_fmla, fmla, pow2i, rintk};
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::round::RoundFinite;
/// Exp for given value for const context.
/// This is simplified version just to make a good approximation on const context.
#[inline]
pub const fn exp(d: f64) -> f64 {
const EXP_POLY_1_D: f64 = 2f64;
const EXP_POLY_2_D: f64 = 0.16666666666666674f64;
const EXP_POLY_3_D: f64 = -0.0027777777777777614f64;
const EXP_POLY_4_D: f64 = 6.613756613755705e-5f64;
const EXP_POLY_5_D: f64 = -1.6534391534392554e-6f64;
const EXP_POLY_6_D: f64 = 4.17535139757361979584e-8f64;
const L2_U: f64 = 0.693_147_180_559_662_956_511_601_805_686_950_683_593_75;
const L2_L: f64 = 0.282_352_905_630_315_771_225_884_481_750_134_360_255_254_120_68_e-12;
const R_LN2: f64 =
1.442_695_040_888_963_407_359_924_681_001_892_137_426_645_954_152_985_934_135_449_406_931;
let qf = rintk(d * R_LN2);
let q = qf as i32;
let mut r = fmla(qf, -L2_U, d);
r = fmla(qf, -L2_L, r);
let f = r * r;
// Poly for u = r*(exp(r)+1)/(exp(r)-1)
let mut u = EXP_POLY_6_D;
u = fmla(u, f, EXP_POLY_5_D);
u = fmla(u, f, EXP_POLY_4_D);
u = fmla(u, f, EXP_POLY_3_D);
u = fmla(u, f, EXP_POLY_2_D);
u = fmla(u, f, EXP_POLY_1_D);
let u = 1f64 + 2f64 * r / (u - r);
let i2 = pow2i(q);
u * i2
// if d < -964f64 {
// r = 0f64;
// }
// if d > 709f64 {
// r = f64::INFINITY;
// }
}
pub(crate) static EXP_REDUCE_T0: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0xbc719083535b085e, 0x3ff02c9a3e778061),
(0x3c8d73e2a475b466, 0x3ff059b0d3158574),
(0x3c6186be4bb28500, 0x3ff0874518759bc8),
(0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
(0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
(0xbc96c51039449b3a, 0x3ff11301d0125b51),
(0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
(0xbc819041b9d78a76, 0x3ff172b83c7d517b),
(0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
(0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
(0x3c8dc775814a8494, 0x3ff2063b88628cd6),
(0x3c99b07eb6c70572, 0x3ff2387a6e756238),
(0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
(0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
(0x3c90024754db41d4, 0x3ff2d285a6e4030b),
(0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
(0x3c932721843659a6, 0x3ff33c08b26416ff),
(0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
(0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
(0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
(0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
(0x3c489b7a04ef80d0, 0x3ff44e086061892d),
(0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
(0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
(0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
(0xbc807abe1db13cac, 0x3ff5342b569d4f82),
(0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
(0x3c96324c054647ac, 0x3ff5ab07dd485429),
(0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
(0xbc9383c17e40b496, 0x3ff6247eb03a5585),
(0xbc9bb60987591c34, 0x3ff6623882552225),
(0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
(0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
(0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
(0xbc90245957316dd4, 0x3ff75feb564267c9),
(0xbc841577ee049930, 0x3ff7a11473eb0187),
(0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
(0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
(0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
(0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
(0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
(0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
(0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
(0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
(0xbc9359495d1cd532, 0x3ffa0c667b5de565),
(0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
(0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
(0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
(0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
(0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
(0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
(0x3c811065895048de, 0x3ffc199bdd85529c),
(0x3c92884dff483cac, 0x3ffc67f12e57d14b),
(0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
(0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
(0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
(0x3c9c2300696db532, 0x3ffda9e603db3285),
(0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
(0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
(0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
(0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
(0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
(0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
];
pub(crate) static EXP_REDUCE_T1: [(u64, u64); 64] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
(0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
(0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
(0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
(0x3c984711d4c35ea0, 0x3ff003779a95f959),
(0xbc80484245243778, 0x3ff0042936faa3d8),
(0xbc94b237da2025fa, 0x3ff004dadb113da0),
(0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
(0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
(0xbc94acf197a00142, 0x3ff006eff583fc3d),
(0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
(0x3c7da93f90835f76, 0x3ff0085382faef83),
(0xbc86a79084ab093c, 0x3ff00905554425d4),
(0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
(0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
(0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
(0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
(0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
(0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
(0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
(0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
(0xbc991b2060859320, 0x3ff00f4714af41d3),
(0x3c8427068ab22306, 0x3ff00ff93412315c),
(0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
(0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
(0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
(0xbc734104ee7edae8, 0x3ff012c1fecd613b),
(0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
(0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
(0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
(0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
(0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
(0xbc990565902c5f44, 0x3ff016f0169949ed),
(0x3c870fc41c5c2d54, 0x3ff017a28af25567),
(0x3c94b9a6e145d76c, 0x3ff018550706ab62),
(0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
(0xbc977669f033c7de, 0x3ff019ba16628de2),
(0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
(0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
(0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
(0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
(0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
(0x3c816996709da2e2, 0x3ff01de9fe280ac8),
(0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
(0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
(0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
(0xbc98f06d8a148a32, 0x3ff020b533856324),
(0xbc82bf310fc54eb6, 0x3ff02168143b0281),
(0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
(0xbc9491793e46834c, 0x3ff022cdece68c4f),
(0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
(0xbc9314aa16278aa4, 0x3ff02433e494b755),
(0x3c848daf888e9650, 0x3ff024e6ec0da046),
(0x3c856dc8046821f4, 0x3ff02599fb483385),
(0x3c945b42356b9d46, 0x3ff0264d1244c719),
(0xbc7082ef51b61d7e, 0x3ff027003103b10e),
(0x3c72106ed0920a34, 0x3ff027b357854772),
(0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
(0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
(0x3c564cbba902ca28, 0x3ff029ccf99d720a),
(0x3c94383ef231d206, 0x3ff02a803f2d170d),
(0x3c94a47a505b3a46, 0x3ff02b338c811703),
(0x3c9e471202234680, 0x3ff02be6e199c811),
];
// sets the exponent of a binary64 number to 0 (subnormal range)
#[inline]
pub(crate) fn to_denormal(x: f64) -> f64 {
let mut ix = x.to_bits();
ix &= 0x000fffffffffffff;
f64::from_bits(ix)
}
#[inline]
fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 7] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x39c712f72ecec2cf, 0x3fe0000000000000),
(0x3c65555555554d07, 0x3fc5555555555555),
(0x3c455194d28275da, 0x3fa5555555555555),
(0x3c012faa0e1c0f7b, 0x3f81111111111111),
(0xbbf4ba45ab25d2a3, 0x3f56c16c16da6973),
(0xbbc9091d845ecd36, 0x3f2a01a019eb7f31),
];
let mut r = DoubleDouble::quick_mul_add(
DoubleDouble::from_bit_pair(C[6]),
z,
DoubleDouble::from_bit_pair(C[5]),
);
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[4]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1]));
DoubleDouble::quick_mul_add_f64(r, z, f64::from_bits(0x3ff0000000000000))
}
#[cold]
fn as_exp_accurate(x: f64, t: f64, tz: DoubleDouble, ie: i64) -> f64 {
let mut ix = x.to_bits();
if ((ix >> 52) & 0x7ff) < 0x3c9 {
return 1. + x;
}
/* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23,
thus since l2h is exactly representable on 29 bits, l2h*t is exact. */
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
let dx = f_fmla(-L2.hi, t, x);
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), t);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let mut f = exp_poly_dd(dz);
f = DoubleDouble::quick_mult(dz, f);
if ix > 0xc086232bdd7abcd2u64 {
// x < -708.396
ix = 1i64.wrapping_sub(ie).wrapping_shl(52) as u64;
f = DoubleDouble::quick_mult(f, tz);
f = DoubleDouble::add(tz, f);
let new_f = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.lo += new_f.lo;
f.hi = to_denormal(f.hi + f.lo);
} else {
if tz.hi == 1.0 {
let fhe = DoubleDouble::from_exact_add(tz.hi, f.hi);
let fhl = DoubleDouble::from_exact_add(fhe.lo, f.lo);
f.hi = fhe.hi;
f.lo = fhl.hi;
ix = f.lo.to_bits();
if (ix & 0x000fffffffffffff) == 0 {
let v = fhl.lo.to_bits();
let d: i64 = (((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64).wrapping_shl(1)
as i64)
.wrapping_add(1);
ix = ix.wrapping_add(d as u64);
f.lo = f64::from_bits(ix);
}
} else {
f = DoubleDouble::quick_mult(f, tz);
f = DoubleDouble::add(tz, f);
}
f = DoubleDouble::from_exact_add(f.hi, f.lo);
f.hi = fast_ldexp(f.hi, ie as i32);
}
f.hi
}
/// Computes exponent
///
/// Max found ULP 0.5
pub fn f_exp(x: f64) -> f64 {
let mut ix = x.to_bits();
let aix = ix & 0x7fffffffffffffff;
// exp(x) rounds to 1 to nearest for |x| <= 5.55112e-17
if aix <= 0x3c90000000000000u64 {
// |x| <= 5.55112e-17
return 1.0 + x;
}
if aix >= 0x40862e42fefa39f0u64 {
// |x| >= 709.783
if aix > 0x7ff0000000000000u64 {
return x + x;
} // nan
if aix == 0x7ff0000000000000u64 {
// |x| = inf
return if (ix >> 63) != 0 {
0.0 // x = -inf
} else {
x // x = inf
};
}
if (ix >> 63) == 0 {
// x >= 709.783
let z = std::hint::black_box(f64::from_bits(0x7fe0000000000000));
return z * z;
}
if aix >= 0x40874910d52d3052u64 {
// x <= -745.133
return f64::from_bits(0x18000000000000) * f64::from_bits(0x3c80000000000000);
}
}
const S: f64 = f64::from_bits(0x40b71547652b82fe);
let t = (x * S).round_finite();
let jt: i64 = unsafe {
t.to_int_unchecked::<i64>() // this is already finite here
};
let i0: i64 = (jt >> 6) & 0x3f;
let i1 = jt & 0x3f;
let ie: i64 = jt >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let tz = DoubleDouble::quick_mult(t0, t1);
const L2: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d0718432a1b0e26),
f64::from_bits(0x3f262e42ff000000),
);
/* Use Cody-Waite argument reduction: since |x| < 745, we have |t| < 2^23,
thus since l2h is exactly representable on 29 bits, l2h*t is exact. */
let dx = f_fmla(L2.lo, t, f_fmla(-L2.hi, t, x));
let dx2 = dx * dx;
const CH: [u64; 4] = [
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc55555557e54ff,
0x3fa55555553a12f4,
];
let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let p = f_fmla(dx2, pw0, pw1);
let mut f = DoubleDouble::new(f_fmla(tz.hi * dx, p, tz.lo), tz.hi);
const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
if ix > 0xc086232bdd7abcd2u64 {
// subnormal case: x < -708.396
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
let sums = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.hi = sums.hi;
f.lo += sums.lo;
let ub = f.hi + (f.lo + EPS);
let lb = f.hi + (f.lo - EPS);
if ub != lb {
return as_exp_accurate(x, t, tz, ie);
}
f.hi = to_denormal(lb);
} else {
let ub = f.hi + (f.lo + EPS);
let lb = f.hi + (f.lo - EPS);
if ub != lb {
return as_exp_accurate(x, t, tz, ie);
}
f.hi = fast_ldexp(lb, ie as i32);
}
f.hi
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn exp_test() {
assert!(
(exp(0f64) - 1f64).abs() < 1e-8,
"Invalid result {}",
exp(0f64)
);
assert!(
(exp(5f64) - 148.4131591025766034211155800405522796f64).abs() < 1e-8,
"Invalid result {}",
exp(5f64)
);
}
#[test]
fn f_exp_test() {
assert_eq!(f_exp(0.000000014901161193847656), 1.0000000149011614);
assert_eq!(f_exp(0.), 1.);
assert_eq!(f_exp(5f64), 148.4131591025766034211155800405522796f64);
assert_eq!(f_exp(f64::INFINITY), f64::INFINITY);
assert_eq!(f_exp(f64::NEG_INFINITY), 0.);
assert!(f_exp(f64::NAN).is_nan());
}
}

238
vendor/pxfm/src/exponents/exp10.rs vendored Normal file
View File

@@ -0,0 +1,238 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::exponents::auxiliary::fast_ldexp;
use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
use crate::round_ties_even::RoundTiesEven;
use std::hint::black_box;
#[inline]
fn exp10_poly_dd(z: DoubleDouble) -> DoubleDouble {
const C: [(u64, u64); 6] = [
(0xbcaf48ad494ea102, 0x40026bb1bbb55516),
(0xbcae2bfab318d399, 0x40053524c73cea69),
(0x3ca81f50779e162b, 0x4000470591de2ca4),
(0x3c931a5cc5d3d313, 0x3ff2bd7609fd98c4),
(0x3c8910de8c68a0c2, 0x3fe1429ffd336aa3),
(0xbc605e703d496537, 0x3fca7ed7086882b4),
];
let mut r = DoubleDouble::quick_mul_add(
DoubleDouble::from_bit_pair(C[5]),
z,
DoubleDouble::from_bit_pair(C[4]),
);
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2]));
r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1]));
DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[0]))
}
#[cold]
fn as_exp10_accurate(x: f64) -> f64 {
let mut ix = x.to_bits();
let t = (f64::from_bits(0x40ca934f0979a371) * x).round_ties_even_finite();
let jt: i64 = unsafe {
t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion
};
let i1 = jt & 0x3f;
let i0 = (jt >> 6) & 0x3f;
let ie = jt >> 12;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let dt = DoubleDouble::quick_mult(t0, t1);
const L0: f64 = f64::from_bits(0x3f13441350800000);
const L1: f64 = f64::from_bits(0xbd1f79fef311f12b);
const L2: f64 = f64::from_bits(0xb9aac0b7c917826b);
let dx = x - L0 * t;
let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2, L1), t);
let dz = DoubleDouble::full_add_f64(dx_dd, dx);
let mut f = exp10_poly_dd(dz);
f = DoubleDouble::quick_mult(dz, f);
let mut zfh: f64;
if ix < 0xc0733a7146f72a42u64 {
if (jt & 0xfff) == 0 {
f = DoubleDouble::from_exact_add(f.hi, f.lo);
let zt = DoubleDouble::from_exact_add(dt.hi, f.hi);
f.hi = zt.lo;
f = DoubleDouble::from_exact_add(f.hi, f.lo);
ix = f.hi.to_bits();
if (ix.wrapping_shl(12)) == 0 {
let l = f.lo.to_bits();
let sfh: i64 = ((ix as i64) >> 63) ^ ((l as i64) >> 63);
ix = ix.wrapping_add(((1i64 << 51) ^ sfh) as u64);
}
zfh = zt.hi + f64::from_bits(ix);
} else {
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
f = DoubleDouble::from_exact_add(f.hi, f.lo);
zfh = f.hi;
}
zfh = fast_ldexp(zfh, ie as i32);
} else {
ix = (1u64.wrapping_sub(ie as u64)) << 52;
f = DoubleDouble::quick_mult(f, dt);
f = DoubleDouble::add(dt, f);
let zt = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
f.hi = zt.hi;
f.lo += zt.lo;
zfh = to_denormal(f.to_f64());
}
zfh
}
/// Computes exp10
///
/// Max found ULP 0.5
pub fn f_exp10(x: f64) -> f64 {
let mut ix = x.to_bits();
let aix = ix & 0x7fff_ffff_ffff_ffff;
if aix > 0x40734413509f79feu64 {
// |x| > 0x40734413509f79fe
if aix > 0x7ff0000000000000u64 {
return x + x;
} // nan
if aix == 0x7ff0000000000000u64 {
return if (ix >> 63) != 0 { 0.0 } else { x };
}
if (ix >> 63) == 0 {
return f64::from_bits(0x7fe0000000000000) * 2.0; // x > 308.255
}
if aix > 0x407439b746e36b52u64 {
// x < -323.607
return black_box(f64::from_bits(0x0018000000000000))
* black_box(f64::from_bits(0x3c80000000000000));
}
}
// check x integer to avoid a spurious inexact exception
if ix.wrapping_shl(16) == 0 && (aix >> 48) <= 0x4036 {
let kx = x.round_ties_even_finite();
if kx == x {
let k = kx as i64;
if k >= 0 {
let mut r = 1.0;
for _ in 0..k {
r *= 10.0;
}
return r;
}
}
}
/* avoid spurious underflow: for |x| <= 2.41082e-17
exp10(x) rounds to 1 to nearest */
if aix <= 0x3c7bcb7b1526e50eu64 {
return 1.0 + x; // |x| <= 2.41082e-17
}
let t = (f64::from_bits(0x40ca934f0979a371) * x).round_ties_even_finite();
let jt: i64 = unsafe { t.to_int_unchecked::<i64>() }; // t is already integer this is just a conversion
let i1 = jt & 0x3f;
let i0 = (jt >> 6) & 0x3f;
let ie = jt >> 12;
let t00 = EXP_REDUCE_T0[i0 as usize];
let t01 = EXP_REDUCE_T1[i1 as usize];
let t0 = DoubleDouble::from_bit_pair(t00);
let t1 = DoubleDouble::from_bit_pair(t01);
let mut tz = DoubleDouble::quick_mult(t0, t1);
const L0: f64 = f64::from_bits(0x3f13441350800000);
const L1: f64 = f64::from_bits(0x3d1f79fef311f12b);
let dx = f_fmla(-L1, t, f_fmla(-L0, t, x));
let dx2 = dx * dx;
const CH: [u64; 4] = [
0x40026bb1bbb55516,
0x40053524c73cea69,
0x4000470591fd74e1,
0x3ff2bd760a1f32a5,
];
let p0 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
let p1 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
let p = f_fmla(dx2, p1, p0);
let mut fh = tz.hi;
let fx = tz.hi * dx;
let mut fl = f_fmla(fx, p, tz.lo);
const EPS: f64 = 1.63e-19;
if ix < 0xc0733a7146f72a42u64 {
// x > -307.653
// x > -0x1.33a7146f72a42p+8
let ub = fh + (fl + EPS);
let lb = fh + (fl - EPS);
if lb != ub {
return as_exp10_accurate(x);
}
fh = fast_ldexp(fh + fl, ie as i32);
} else {
// x <= -307.653: exp10(x) < 2^-1022
ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
tz = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
fl += tz.lo;
let ub = fh + (fl + EPS);
let lb = fh + (fl - EPS);
if lb != ub {
return as_exp10_accurate(x);
}
fh = to_denormal(fh + fl);
}
fh
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10f() {
assert_eq!(f_exp10(-3.370739843267434), 0.00042585343701025656);
assert_eq!(f_exp10(1.), 10.0);
assert_eq!(f_exp10(2.), 100.0);
assert_eq!(f_exp10(3.), 1000.0);
assert_eq!(f_exp10(4.), 10000.0);
assert_eq!(f_exp10(5.), 100000.0);
assert_eq!(f_exp10(6.), 1000000.0);
assert_eq!(f_exp10(7.), 10000000.0);
assert_eq!(f_exp10(f64::INFINITY), f64::INFINITY);
assert_eq!(f_exp10(f64::NEG_INFINITY), 0.);
assert!(f_exp10(f64::NAN).is_nan());
}
}

Some files were not shown because too many files have changed in this diff Show More