Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

View File

@@ -0,0 +1 @@
{"files":{"Cargo.toml":"8b29e52e034917845d1d20301c6f8b2c21cd59a4eaae6d530d6097e5822c59f9","LICENSE-APACHE":"ed7961ae78415cd92470464f12cf846d0bf73fcb8c880970d5be3b0e591713e8","LICENSE-MIT":"eaaf6f770f4e5e07ee723197abbae724b7e2152be53cc2105cc618087de7113d","README.md":"2f17925d3dbdeb6404aa5214c650916761f7c015e496581b9dc48377f0385e26","src/lib.rs":"7d4983173c4f65fb99f651a4a32d07e0232e9760e830e3b03c307f80f1c3d866","src/soft_f32/add.rs":"6a1dd0c34614f08ea5ea2080eb418411bb7b2e32425621f2ff1a9b0712989f88","src/soft_f32/cmp.rs":"6b6cb4ad27a6f57a12ce1019fba56bb1369731d84a5ef90b0c97c9f984329a8c","src/soft_f32/const_impl_trait.rs":"ddfe5bd5c4a7cc90c701805846a3990ed288b23357a92458ac2a668e86f960e4","src/soft_f32/copysign.rs":"e4310a05d82ccd64d47106fe2f6082cc137af66f75d6a1432222ecc75dabf8cb","src/soft_f32/cos.rs":"759278573251d0336f0feef20b7cf4416a39a55eb505070e4b9536450e95d1fb","src/soft_f32/div.rs":"c7cc636fc9385716093feb2f11e758b95ff508a8b0ded1b631e41484ab9fd11e","src/soft_f32/floor.rs":"b3f8fe1fc5dd933a943a923de7a32a1551b168d6ba7c7366e05f95eda3cd17e3","src/soft_f32/helpers/k_cosf.rs":"68103e3ec418131191b6229844a3df867db7d4b6bb8038d0d51b0f23caf677c9","src/soft_f32/helpers/k_sinf.rs":"6bf06380ea03516a4eff0d9c4a7b2e35549890a954635de7fe3928fa15bc7de0","src/soft_f32/helpers/mod.rs":"539b5ef3a102ec8521253942606ad19413846c701537723a943a57a888b51f98","src/soft_f32/helpers/rem_pio2f.rs":"23df431316bf70904c147f8077417a5eb05b28c5dcdf5f95aa1137c022216d0e","src/soft_f32/impl_trait.rs":"cb5a7941a4a334f96af2bf110a9d98da7714e3f91da85463486441e70ac61162","src/soft_f32/mod.rs":"a4e23a0780e2ac1ea0cf5798fc1911f580ce3b2daeb062a931225ce214e7a4f2","src/soft_f32/mul.rs":"5105e74500998ca8f98569457644ff71ea47fadb173141ec42242db58c10321b","src/soft_f32/pow.rs":"98330d6dca4f2da2e563e64a1ef5df259e96c1e1ba136caefdd18d5bcf620c64","src/soft_f32/round.rs":"71ce0a44256fb8ee85f79e7a22ce05e764df41677f1ddf2ea525e58baf6272ec","src/soft_f32/sin.rs":"43fcd1b09588fbdf1c993a7fe2b2400b01388e0da5b2fb8a8a6c7b26d4b0dc27","src/soft_f32/sqrt.rs":"b2b48001c03c3aa8da36142ccd469130130d3caceb8f5db3f5db216de4dfcb40","src/soft_f32/trunc.rs":"228dc1253fccf3b0b5ce284eb8f2b313f8d428750102c9a839a80e6e77bf7c43","src/soft_f64/add.rs":"aa4014fbf46ddfcdcefec83c38eace9875b9f3c6e200ad76d5259a59e69e6c55","src/soft_f64/cmp.rs":"bb7bb39eb390d1e2cc5511a04ce2980b44b872dc5e5d13d89e6de80417092879","src/soft_f64/const_impl_trait.rs":"69cc20e81950ab7317ed3862559a8e976b29ef419790c43c0b28ff3a11bba892","src/soft_f64/copysign.rs":"66de6694c6c7eb105f324f36690529ca7c4af3711ad58c98dc6bc9bc4a5c670a","src/soft_f64/cos.rs":"9a4fd04ae87044255a013a86a68e8ec485e7368370c55618d7b49000b81261c5","src/soft_f64/div.rs":"30decf7e28999028929afdf8a64d95906cc7dc9de77fb7b7914e5365daeacc15","src/soft_f64/floor.rs":"cc448f66045c3ec305c88e34491c6223ddfa9274363232139095b95f9d637d09","src/soft_f64/helpers/cmp.rs":"32460142a6eadc9292e3848d030d37495c9eeec2e4ae596c2fcb2bdbf3d495ce","src/soft_f64/helpers/k_cos.rs":"704b9c6ac2626ba20753759b1eb0505e341aedd8f78d2fc0b2eed3010dbcf8dc","src/soft_f64/helpers/k_sin.rs":"7fde68b4384fd1a081c867bbc3d4562222b496a20f334031270926d7430a551b","src/soft_f64/helpers/mod.rs":"31d77b5dde18a8a41ed3280e165b29c5cbfb0bb54627428170eaf1ff1fd0a26c","src/soft_f64/helpers/rem_pio2.rs":"434482facfbf9da484d594a2537537f01c1f4ae36434953f4541a285bd995d93","src/soft_f64/helpers/rem_pio2_large.rs":"e6caafc05e7def0a3bf1beaee4615a83d94a29794eb419ef8de97808380d68a9","src/soft_f64/helpers/scalbn.rs":"10e909382b85fe5d04df1ea5eac9d5f34126cc23e712f794844ba76ab01d3dcd","src/soft_f64/impl_trait.rs":"1bc35ab88d6ccfc4b261527a151866fc5e30f9512e24eb188505a131d61b192e","src/soft_f64/mod.rs":"8fe506108771cd1afb20f86a0ac735ab7c47406d6db96d8c446b1f28e473238a","src/soft_f64/mul.rs":"86e84b69ccb5165d04399eb6d028cf3430b82db62e257d07bcd756629d3825e4","src/soft_f64/pow.rs":"aaead621890d7e18f516b0442dd32b77c52b4c3a7ac909fba0c6db28008cb016","src/soft_f64/round.rs":"f7289d66c6f6586aa77b6e08c464b073ca55cedbd55f15b777da930a03d4ae0e","src/soft_f64/sin.rs":"ad5f8c1156e8a61cee67691ec833d7f55e012d771c5f406b55a3f0b243b96813","src/soft_f64/sqrt.rs":"a94f30bee2a07cc746b012c65ff0e1db2e65d90b6c914e61a9eafc672fc22872","src/soft_f64/trunc.rs":"a3b5bca15416d128052f7759cdf29825f58e242757d67320d70150ce3fa8ecf5"},"package":"87ca1caa64ef4ed453e68bb3db612e51cf1b2f5b871337f0fcab1c8f87cc3dff"}

40
vendor/const_soft_float/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,40 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
name = "const_soft_float"
version = "0.1.4"
description = "Const Soft Float Point"
readme = "README.md"
keywords = [
"const",
"soft",
"float-point",
]
categories = ["no-std"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/823984418/const_soft_float"
[profile.test]
opt-level = 3
lto = "fat"
debug-assertions = true
[dependencies]
[dev-dependencies.cfg-if]
version = "1"
[features]
const_mut_refs = []
const_trait_impl = []
no_std = []

201
vendor/const_soft_float/LICENSE-APACHE vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2023 Kirk Nickish and https://github.com/823984418
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
vendor/const_soft_float/LICENSE-MIT vendored Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) 2023 Kirk Nickish and https://github.com/823984418
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

58
vendor/const_soft_float/README.md vendored Normal file
View File

@@ -0,0 +1,58 @@
# Rust float-point in constant context
Floating-point code is from `compiler_builtins = "0.1.94"` and `libm = "0.2.6"`, and has been rewritten for use in a constant context.
Fuzzing of all operations is performed against the relevant reference code to ensure correctness of the port, but please open an issue if there is any inconsistent behavior.
Exported Soft Float Types:
* `SoftF32`
* `SoftF64`
Features:
* `no_std` - Enabled by default
* `const_trait_impl` - For const operator implementations
* `const_mut_refs` - For const operator with assignment implementations
On `stable`:
```
const fn const_f32_add(a: f32, b: f32) -> f32 {
SoftF32(a).add(SoftF32(b)).to_f32()
}
```
On `nightly` with `const_trait_impl` usage:
```
const fn const_f32_add(a: f32, b: f32) -> f32 {
(SoftF32(a) + SoftF32(b)).to_f32()
}
```
On `nightly` with `const_mut_refs` usage:
```
const fn const_f32_add(a: f32, b: f32) -> f32 {
let mut x = SoftF32(a);
x += SoftF32(b);
x.to_f32()
}
```
<br>
Implemented `const` Functions:
- `from_(f32/f64)`
- `to_(f32/f64)`
- `from_bits`
- `to_bits`
- `add`
- `sub`
- `mul`
- `div`
- `cmp`
- `neg`
- `sqrt`
- `powi`
- `copysign`
- `trunc`
- `round`
- `floor`
- `sin`
- `cos`

159
vendor/const_soft_float/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//! # Rust float-point in constant context
//!
//! Features:
//! * `no_std`
//! * `const_trait_impl`
//! * `const_mut_refs`
//!
//! work in `stable`:
//! ```
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! SoftF32(a).add(SoftF32(b)).to_f32()
//! }
//! ```
//!
//!
//! with `const_trait_impl` usage (requires `nightly`):
//! ```
//! # cfg_if::cfg_if! {
//! # if #[cfg(nightly)] {
//! # #![feature(const_trait_impl)]
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! (SoftF32(a) + SoftF32(b)).to_f32()
//! }
//! # }
//! # }
//! ```
//!
//! with `const_mut_refs` usage (requires `nightly`):
//! ```
//! # cfg_if::cfg_if! {
//! # if #[cfg(nightly)] {
//! # #![feature(const_trait_impl)]
//! # #![feature(const_mut_refs)]
//! # use const_soft_float::soft_f32::SoftF32;
//! const fn const_f32_add(a: f32, b: f32) -> f32 {
//! let mut x = SoftF32(a);
//! x += SoftF32(b);
//! x.to_f32()
//! }
//! # }
//! # }
//! ```
//!
//!
#![cfg_attr(feature = "no_std", no_std)]
#![cfg_attr(feature = "const_trait_impl", feature(const_trait_impl))]
#![cfg_attr(feature = "const_mut_refs", feature(const_mut_refs))]
pub mod soft_f32;
pub mod soft_f64;
const fn abs_diff(a: i32, b: i32) -> u32 {
a.wrapping_sub(b).wrapping_abs() as u32
}
#[cfg(test)]
mod tests {
use crate::soft_f32::SoftF32;
use crate::soft_f64::SoftF64;
const RANGE: core::ops::Range<i32> = -1000..1000;
const F32_FACTOR: f32 = 10.0;
const F64_FACTOR: f64 = 1000.0;
#[test]
fn f32_add() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).add(SoftF32(b)).0, a + b);
}
}
}
#[test]
fn f32_sub() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).sub(SoftF32(b)).0, a - b);
}
}
}
#[test]
fn f32_mul() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
assert_eq!(SoftF32(a).mul(SoftF32(b)).0, a * b);
}
}
}
#[test]
fn f32_div() {
for a in RANGE {
let a = a as f32 * F32_FACTOR;
for b in RANGE {
let b = b as f32 * F32_FACTOR;
let x = SoftF32(a).div(SoftF32(b)).0;
let y = a / b;
assert!(x == y || x.is_nan() && y.is_nan())
}
}
}
#[test]
fn f64_add() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).sub(SoftF64(b)).0, a - b);
}
}
}
#[test]
fn f64_sub() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).sub(SoftF64(b)).0, a - b);
}
}
}
#[test]
fn f64_mul() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
assert_eq!(SoftF64(a).mul(SoftF64(b)).0, a * b);
}
}
}
#[test]
fn f64_div() {
for a in RANGE {
let a = a as f64 * F64_FACTOR;
for b in RANGE {
let b = b as f64 * F64_FACTOR;
let x = SoftF64(a).div(SoftF64(b)).0;
let y = a / b;
assert!(x == y || x.is_nan() && y.is_nan())
}
}
}
}

View File

@@ -0,0 +1,193 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
type FInt = u32;
pub(crate) const fn add(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS as FInt;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let mut a_rep = a.repr();
let mut b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// Detect if a or b is zero, infinity, or NaN.
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_abs | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_abs | quiet_bit);
}
if a_abs == inf_rep {
// +/-infinity + -/+infinity = qNaN
if (a.repr() ^ b.repr()) == sign_bit {
return F::from_repr(qnan_rep);
} else {
// +/-infinity + anything remaining = +/- infinity
return a;
}
}
// anything remaining + +/-infinity = +/-infinity
if b_abs == inf_rep {
return b;
}
// zero + anything = anything
if a_abs == 0 {
// but we need to get the sign right for zero + zero
if b_abs == 0 {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}
// anything + zero = anything
if b_abs == 0 {
return a;
}
}
// Swap a and b if necessary so that a has the larger absolute value.
if b_abs > a_abs {
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
let tmp = a_rep;
a_rep = b_rep;
b_rep = tmp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits) as _;
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits) as _;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// normalize any denormals, and adjust the exponent accordingly.
if a_exponent == 0 {
let (exponent, significand) = F::normalize(a_significand);
a_exponent = exponent;
a_significand = significand;
}
if b_exponent == 0 {
let (exponent, significand) = F::normalize(b_significand);
b_exponent = exponent;
b_significand = significand;
}
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
let result_sign = a_rep & sign_bit;
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize(), but setting it twice won't hurt
// anything.)
a_significand = (a_significand | implicit_bit) << 3;
b_significand = (b_significand | implicit_bit) << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent) as _;
if align != 0 {
if align < bits {
let sticky = (b_significand << bits.wrapping_sub(align) != 0) as FInt;
b_significand = (b_significand >> align) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
}
}
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == 0 {
return F::from_repr(0);
}
// If partial cancellation occured, we need to left-shift the result
// and adjust the exponent:
if a_significand < implicit_bit << 3 {
let shift =
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
a_significand <<= shift;
a_exponent -= shift;
}
} else {
// addition
a_significand += b_significand;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != 0 {
let sticky = (a_significand & one != 0) as FInt;
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if a_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | result_sign);
}
if a_exponent <= 0 {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
let shift = (1 - a_exponent) as _;
let sticky = ((a_significand << bits.wrapping_sub(shift)) != 0) as FInt;
a_significand = a_significand >> shift | sticky;
a_exponent = 0;
}
// Low three bits are round, guard, and sticky.
let a_significand_i32: i32 = a_significand as _;
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
// Shift the significand into place, and mask off the implicit bit.
let mut result = a_significand >> 3 & significand_mask;
// Insert the exponent and sign.
result |= (a_exponent as FInt) << significand_bits;
result |= result_sign;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if round_guard_sticky > 0x4 {
result += one;
}
if round_guard_sticky == 0x4 {
result += result & one;
}
F::from_repr(result)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(1.0).add(SoftF32(1.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,63 @@
use crate::soft_f32::SoftF32;
use core::cmp::Ordering;
type F = SoftF32;
type FInt = u32;
type FSignedInt = i32;
const UNORDERED: Option<Ordering> = None;
const EQUAL: Option<Ordering> = Some(Ordering::Equal);
const GREATER: Option<Ordering> = Some(Ordering::Greater);
const LESS: Option<Ordering> = Some(Ordering::Less);
pub(crate) const fn cmp(a: F, b: F) -> Option<Ordering> {
let one: FInt = 1;
let zero: FInt = 0;
let szero: FSignedInt = 0;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let a_rep = a.repr();
let b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// If either a or b is NaN, they are unordered.
if a_abs > inf_rep || b_abs > inf_rep {
return UNORDERED;
}
// If a and b are both zeros, they are equal.
if a_abs | b_abs == zero {
return EQUAL;
}
let a_srep = a.signed_repr();
let b_srep = b.signed_repr();
// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if a_srep & b_srep >= szero {
if a_srep < b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
} else if a_srep > b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
}

View File

@@ -0,0 +1,84 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
impl const From<f32> for F {
fn from(value: f32) -> Self {
F::from_f32(value)
}
}
impl const PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl const PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl const core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl const core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl const core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl const core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,23 @@
use super::SoftF32;
/// Sign of Y, magnitude of X (SoftF32)
///
/// Constructs a number with the magnitude (absolute value) of its
/// first argument, `x`, and the sign of its second argument, `y`.
pub(crate) const fn copysign(x: SoftF32, y: SoftF32) -> SoftF32 {
let mut ux = x.to_bits();
let uy = y.to_bits();
ux &= 0x7fffffff;
ux |= uy & 0x80000000;
SoftF32::from_bits(ux)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(1.0).copysign(SoftF32(-0.0)).0, -1.0)
}
}

View File

@@ -0,0 +1,96 @@
/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use core::f64::consts::FRAC_PI_2;
use crate::soft_f64::SoftF64;
use super::{
helpers::{k_cosf, k_sinf, rem_pio2f},
SoftF32,
};
/* Small multiples of pi/2 rounded to double precision. */
const C1_PIO2: SoftF64 = SoftF64(1.).mul(SoftF64(FRAC_PI_2)); /* 0x3FF921FB, 0x54442D18 */
const C2_PIO2: SoftF64 = SoftF64(2.).mul(SoftF64(FRAC_PI_2)); /* 0x400921FB, 0x54442D18 */
const C3_PIO2: SoftF64 = SoftF64(3.).mul(SoftF64(FRAC_PI_2)); /* 0x4012D97C, 0x7F3321D2 */
const C4_PIO2: SoftF64 = SoftF64(4.).mul(SoftF64(FRAC_PI_2)); /* 0x401921FB, 0x54442D18 */
pub const fn cos(x: SoftF32) -> SoftF32 {
let x64 = SoftF64(x.0 as f64);
let x1p120 = SoftF32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x != 0 */
let _ = x.add(x1p120);
return SoftF32(1.0);
}
return k_cosf(x64);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix > 0x4016cbe3 {
/* |x| ~> 3*pi/4 */
return k_cosf(if sign {
x64.add(C2_PIO2)
} else {
x64.sub(C2_PIO2)
})
.neg();
} else if sign {
return k_sinf(x64.add(C1_PIO2));
} else {
return k_sinf(C1_PIO2.sub(x64));
}
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix > 0x40afeddf {
/* |x| ~> 7*pi/4 */
return k_cosf(if sign {
x64.add(C4_PIO2)
} else {
x64.sub(C4_PIO2)
});
} else if sign {
return k_sinf(x64.neg().sub(C3_PIO2));
} else {
return k_sinf(x64.sub(C3_PIO2));
}
}
/* cos(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x.sub(x);
}
/* general argument reduction needed */
let (n, y) = rem_pio2f(x);
match n & 3 {
0 => k_cosf(y),
1 => k_sinf(y.neg()),
2 => k_cosf(y).neg(),
_ => k_sinf(y),
}
}

View File

@@ -0,0 +1,444 @@
use crate::soft_f32::{u32_widen_mul, SoftF32};
type F = SoftF32;
type FInt = u32;
pub(crate) const fn div(a: F, b: F) -> F {
const NUMBER_OF_HALF_ITERATIONS: usize = 0;
const NUMBER_OF_FULL_ITERATIONS: usize = 3;
const USE_NATIVE_FULL_ITERATIONS: bool = true;
let one = 1;
let zero = 0;
let hw = F::BITS / 2;
let lo_mask = u32::MAX >> hw;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
#[inline(always)]
const fn negate_u32(a: u32) -> u32 {
(<i32>::wrapping_neg(a as i32)) as u32
}
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent;
let b_exponent = (b_rep >> significand_bits) & max_exponent;
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1)
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1)
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN / anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything / NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs == inf_rep {
// infinity / infinity = NaN
return F::from_repr(qnan_rep);
} else {
// infinity / anything else = +/- infinity
return F::from_repr(a_abs | quotient_sign);
}
}
// anything else / infinity = +/- 0
if b_abs == inf_rep {
return F::from_repr(quotient_sign);
}
if a_abs == zero {
if b_abs == zero {
// zero / zero = NaN
return F::from_repr(qnan_rep);
} else {
// zero / anything else = +/- zero
return F::from_repr(quotient_sign);
}
}
// anything else / zero = +/- infinity
if b_abs == zero {
return F::from_repr(inf_rep | quotient_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale -= exponent;
b_significand = significand;
}
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
a_significand |= implicit_bit;
b_significand |= implicit_bit;
let written_exponent: i32 = (a_exponent
.wrapping_sub(b_exponent)
.wrapping_add(scale as u32))
.wrapping_add(exponent_bias) as i32;
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
// The max error for this approximation is achieved at endpoints, so
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
// which is about 4.5 bits.
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
// Then, refine the reciprocal estimate using a quadratically converging
// Newton-Raphson iteration:
// x_{n+1} = x_n * (2 - x_n * b)
//
// Let b be the original divisor considered "in infinite precision" and
// obtained from IEEE754 representation of function argument (with the
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
// UQ1.(W-1).
//
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
// bits of divisor significand (with the implicit bit set). Corresponds to
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
// version of b_UQ1.
//
// Let e_n := x_n - 1/b_hw
// E_n := x_n - 1/b
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
// = abs(e_n) + (b - b_hw) / (b*b_hw)
// <= abs(e_n) + 2 * 2^-HW
// rep_t-sized iterations may be slower than the corresponding half-width
// variant depending on the handware and whether single/double/quad precision
// is selected.
// NB: Using half-width iterations increases computation errors due to
// rounding, so error estimations have to be computed taking the selected
// mode into account!
#[allow(clippy::absurd_extreme_comparisons)]
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u16 = (b_significand >> (significand_bits + 1 - hw)) as u16;
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504_u32 as u16).wrapping_shl(hw.wrapping_sub(32));
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u16 = {
let mut x_uq0_hw: u16 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
// * C_hw being some (irrational) number **truncated** to W0 bits
// Please note that e_0 is calculated against the infinitely precise
// reciprocal of b_hw (that is, **truncated** version of b).
//
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
// By construction, 1 <= b < 2
// f(x) = x * (2 - b*x) = 2*x - b*x^2
// f'(x) = 2 * (1 - b*x)
//
// On the [0, 1] interval, f(0) = 0,
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
// then it decreses to f(1) = 2 - b
//
// Let g(x) = x - f(x) = b*x^2 - x.
// On (0, 1/b), g(x) < 0 <=> f(x) > x
// On (1/b, 1], g(x) > 0 <=> f(x) < x
//
// For half-width iterations, b_hw is used instead of b.
#[allow(clippy::reversed_empty_ranges)]
let mut idx = 0;
while idx < NUMBER_OF_HALF_ITERATIONS {
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
// of corr_UQ1_hw.
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u16 = 0_u32
.wrapping_sub((x_uq0_hw as u32).wrapping_mul(b_uq1_hw as u32) >> hw)
as u16;
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
// considered to be 0 to be able to represent it in UQ0.HW.
// From the above analysis of f(x), if corr_UQ1_hw would be represented
// without any intermediate loss of precision (that is, in twice_rep_t)
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
// to 1.0 being not representable as UQ0.HW).
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u32).wrapping_mul(corr_uq1_hw as u32) >> (hw - 1)) as u16;
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
// any number of iterations, so just subtract 2 from the reciprocal
// approximation after last iteration.
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
// = 1 - e_n * b_hw + 2*eps1
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
// \------ >0 -------/ \-- >0 ---/
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
idx += 1;
}
// For initial half-width iterations, U = 2^-HW
// Let abs(e_n) <= u_n * U,
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
// Account for possible overflow (see above). For an overflow to occur for the
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u16)
};
// Error estimations for full-precision iterations are calculated just
// as above, but with U := 2^-W and taking extra decrementing into account.
// We need at least one such iteration.
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u32 = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
// <--- higher half ---><--- lower half --->
// [x_UQ0_hw * b_UQ1_hw]
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u32(
(x_uq0_hw as u32) * (b_uq1_hw as u32) + (((x_uq0_hw as u32) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0 = (((x_uq0_hw as u32) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u32) * lo_corr) >> (hw - 1))
.wrapping_sub(2);
// 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work
// On error estimation:
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
// + (2^-HW + 2^-W))
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
// Then like for the half-width iterations:
// With 0 <= eps1, eps2 < 2^-W
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n
let c = 0x7504F333_u32 << (F::BITS - 32);
let x_uq0 = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32
x_uq0
};
let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS {
let mut idx = 0;
while idx < NUMBER_OF_FULL_ITERATIONS {
let corr_uq1: u32 = 0_u64
.wrapping_sub(((x_uq0 as u64).wrapping_mul(b_uq1 as u64)).wrapping_shr(F::BITS))
as u32;
x_uq0 = (((x_uq0 as u64) * (corr_uq1 as u64)) >> (F::BITS - 1)) as u32;
idx += 1;
}
x_uq0
} else {
// not using native full iterations
x_uq0
};
// Finally, account for possible overflow, as explained above.
x_uq0 = x_uq0.wrapping_sub(2);
// u_n for different precisions (with N-1 half-width iterations):
// W0 is the precision of C
// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
// Estimated with bc:
// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
// u_3 | < 7.31 | | < 7.31 | < 27054456580
// u_4 | | | | < 80.4
// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
// Add 2 to U_N due to final decrement.
let reciprocal_precision: FInt = 10;
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?
let mut quotient: FInt = u32_widen_mul(x_uq0, a_significand << 1).1;
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
// adjust it to be in [1.0, 2.0) as UQ1.SB.
let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) {
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
// effectively doubling its value as well as its error estimation.
let residual_lo = (a_significand << (significand_bits + 1))
.wrapping_sub(quotient.wrapping_mul(b_significand));
a_significand <<= 1;
(residual_lo, written_exponent.wrapping_sub(1))
} else {
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
quotient >>= 1;
let residual_lo =
(a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand));
(residual_lo, written_exponent)
};
//drop mutability
let quotient = quotient;
// NB: residualLo is calculated above for the normal result case.
// It is re-computed on denormal path that is expected to be not so
// performance-sensitive.
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
// Each NextAfter() increments the floating point value by at least 2^-SB
// (more, if exponent was incremented).
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
// q
// | | * | | | | |
// <---> 2^t
// | | | | | * | |
// q
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
// (8*P) * 2^-W < 0.5 * 2^-SB
// P < 2^(W-4-SB)
// Generally, for at most R NextAfter() to be enough,
// P < (2*R - 1) * 2^(W-4-SB)
// For f32 (0+3): 10 < 32 (OK)
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
// For f64: 220 < 256 (OK)
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
// If we have overflowed the exponent, return infinity
if written_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | quotient_sign);
}
// Now, quotient <= the correctly-rounded result
// and may need taking NextAfter() up to 3 times (see error estimates above)
// r = a - b * q
let abs_result = if written_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= ((written_exponent as u32) << significand_bits) as u32;
residual <<= 1;
ret
} else {
if (significand_bits as i32 + written_exponent) < 0 {
return F::from_repr(quotient_sign);
}
let ret = quotient.wrapping_shr(negate_u32(written_exponent as u32) + 1);
residual = (a_significand
.wrapping_shl(significand_bits.wrapping_add(written_exponent as u32))
as u32)
.wrapping_sub((ret.wrapping_mul(b_significand)) << 1);
ret
};
// Round
let abs_result = {
residual += abs_result & one; // tie to even
// The above line conditionally turns the below LT comparison into LTE
if residual > b_significand {
abs_result + one
} else {
abs_result
}
};
F::from_repr(abs_result | quotient_sign)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(10.0).div(SoftF32(5.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,57 @@
use super::SoftF32;
/// Floor (SoftF32)
///
/// Finds the nearest integer less than or equal to `x`.
pub const fn floor(x: SoftF32) -> SoftF32 {
let mut ui = x.to_bits();
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
if e >= 23 {
return x;
}
if e >= 0 {
let m: u32 = 0x007fffff >> e;
if (ui & m) == 0 {
return x;
}
// force_eval!(x + SoftF32::from_bits(0x7b800000));
if ui >> 31 != 0 {
ui += m;
}
ui &= !m;
} else {
// force_eval!(x + SoftF32::from_bits(0x7b800000));
if ui >> 31 == 0 {
ui = 0;
} else if ui << 1 != 0 {
return SoftF32(-1.0);
}
}
SoftF32::from_bits(ui)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(floor(SoftF32(0.5)).0, 0.0);
assert_eq!(floor(SoftF32(1.1)).0, 1.0);
assert_eq!(floor(SoftF32(2.9)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
#[test]
fn spec_tests() {
// Not Asserted: that the current rounding mode has no effect.
assert!(floor(SoftF32(f32::NAN)).0.is_nan());
for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY]
.iter()
.copied()
{
assert_eq!(SoftF32(f).floor().0, f);
}
}
}

View File

@@ -0,0 +1,31 @@
/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::{soft_f32::SoftF32, soft_f64::SoftF64};
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
const C0: SoftF64 = SoftF64(-0.499999997251031003120); /* -0x1ffffffd0c5e81.0p-54 */
const C1: SoftF64 = SoftF64(0.0416666233237390631894); /* 0x155553e1053a42.0p-57 */
const C2: SoftF64 = SoftF64(-0.00138867637746099294692); /* -0x16c087e80f1e27.0p-62 */
const C3: SoftF64 = SoftF64(0.0000243904487962774090654); /* 0x199342e0ee5069.0p-68 */
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn k_cosf(x: SoftF64) -> SoftF32 {
let z = x.mul(x);
let w = z.mul(z);
let r = C2.add(z.mul(C3));
SoftF32((((SoftF64(1.0).add(z.mul(C0))).add(w.mul(C1))).add((w.mul(z)).mul(r))).0 as f32)
}

View File

@@ -0,0 +1,14 @@
use crate::{soft_f32::SoftF32, soft_f64::SoftF64};
/// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_sinf.rs
pub(crate) const fn k_sinf(x: SoftF64) -> SoftF32 {
const S1: SoftF64 = SoftF64(-0.166666666416265235595); /* -0x15555554cbac77.0p-55 */
const S2: SoftF64 = SoftF64(0.0083333293858894631756); /* 0x111110896efbb2.0p-59 */
const S3: SoftF64 = SoftF64(-0.000198393348360966317347); /* -0x1a00f9e2cae774.0p-65 */
const S4: SoftF64 = SoftF64(0.0000027183114939898219064); /* 0x16cd878c3b46a7.0p-71 */
let z = x.mul(x);
let w = z.mul(z);
let r = S3.add(z.mul(S4));
let s = z.mul(x);
SoftF32((x.add(s.mul(S1.add(z.mul(S2))))).add(s.mul(w).mul(r)).0 as f32)
}

View File

@@ -0,0 +1,8 @@
mod k_cosf;
mod k_sinf;
mod rem_pio2f;
pub(crate) use k_cosf::k_cosf;
pub(crate) use k_sinf::k_sinf;
pub(crate) use rem_pio2f::rem_pio2f;

View File

@@ -0,0 +1,65 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Debugged and optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::{
soft_f32::SoftF32,
soft_f64::{helpers::rem_pio2_large, SoftF64},
};
const TOINT: SoftF64 = SoftF64(1.5).div(SoftF64(f64::EPSILON));
/// 53 bits of 2/pi
const INV_PIO2: SoftF64 = SoftF64(6.36619772367581382433e-01); /* 0x3FE45F30, 0x6DC9C883 */
/// first 25 bits of pi/2
const PIO2_1: SoftF64 = SoftF64(1.57079631090164184570e+00); /* 0x3FF921FB, 0x50000000 */
/// pi/2 - pio2_1
const PIO2_1T: SoftF64 = SoftF64(1.58932547735281966916e-08); /* 0x3E5110b4, 0x611A6263 */
/// Return the remainder of x rem pi/2 in *y
///
/// use double precision for everything except passing x
/// use __rem_pio2_large() for large x
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn rem_pio2f(x: SoftF32) -> (i32, SoftF64) {
let x64 = SoftF64(x.0 as f64);
let mut tx: [SoftF64; 1] = [SoftF64(0.0)];
let ty: [SoftF64; 1] = [SoftF64(0.0)];
let ix = x.to_bits() & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if ix < 0x4dc90fdb {
/* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
let tmp = x64.mul(INV_PIO2).add(TOINT);
let f_n = tmp.sub(TOINT);
return (f_n.0 as i32, x64.sub(f_n.mul(PIO2_1)).sub(f_n.mul(PIO2_1T)));
}
if ix >= 0x7f800000 {
/* x is inf or NaN */
return (0, x64.sub(x64));
}
/* scale x into [2^23, 2^24-1] */
let sign = (x.to_bits() >> 31) != 0;
let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */
tx[0] = SoftF64(SoftF32::from_bits(ix - (e0 << 23) as u32).0 as f64);
let (n, ty) = rem_pio2_large(&tx, &ty, e0, 0);
if sign {
return (-n, ty[0].neg());
}
(n, ty[0])
}

View File

@@ -0,0 +1,68 @@
use crate::soft_f32::SoftF32;
type F = SoftF32;
impl From<f32> for F {
fn from(value: f32) -> Self {
F::from_f32(value)
}
}
impl PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,174 @@
mod helpers;
pub mod add;
pub mod cmp;
pub mod copysign;
pub mod cos;
pub mod div;
pub mod floor;
pub mod mul;
pub mod pow;
pub mod round;
pub mod sin;
pub mod sqrt;
pub mod trunc;
#[cfg(feature = "const_trait_impl")]
pub mod const_impl_trait;
#[cfg(feature = "const_trait_impl")]
pub use const_impl_trait as impl_trait;
#[cfg(not(feature = "const_trait_impl"))]
pub mod impl_trait;
#[derive(Default, Copy, Clone)]
#[repr(transparent)]
pub struct SoftF32(pub f32);
impl SoftF32 {
pub const fn from_f32(a: f32) -> Self {
Self(a)
}
pub const fn to_f32(self) -> f32 {
self.0
}
pub const fn from_bits(a: u32) -> Self {
Self(unsafe { core::mem::transmute(a) })
}
pub const fn to_bits(self) -> u32 {
unsafe { core::mem::transmute(self.0) }
}
pub const fn add(self, rhs: Self) -> Self {
add::add(self, rhs)
}
pub const fn mul(self, rhs: Self) -> Self {
mul::mul(self, rhs)
}
pub const fn div(self, rhs: Self) -> Self {
div::div(self, rhs)
}
pub const fn cmp(self, rhs: Self) -> Option<core::cmp::Ordering> {
cmp::cmp(self, rhs)
}
pub const fn neg(self) -> Self {
Self::from_repr(self.repr() ^ Self::SIGN_MASK)
}
pub const fn sub(self, rhs: Self) -> Self {
self.add(rhs.neg())
}
pub const fn sqrt(self) -> Self {
sqrt::sqrtf(self)
}
pub const fn powi(self, n: i32) -> Self {
pow::pow(self, n)
}
pub const fn copysign(self, other: Self) -> Self {
copysign::copysign(self, other)
}
pub const fn trunc(self) -> Self {
trunc::trunc(self)
}
pub const fn round(self) -> Self {
round::round(self)
}
pub const fn floor(self) -> Self {
floor::floor(self)
}
pub const fn sin(self) -> Self {
sin::sinf(self)
}
pub const fn cos(self) -> Self {
cos::cos(self)
}
}
type SelfInt = u32;
type SelfSignedInt = i32;
type SelfExpInt = i16;
#[allow(unused)]
impl SoftF32 {
const ZERO: Self = Self(0.0);
const ONE: Self = Self(1.0);
const BITS: u32 = 32;
const SIGNIFICAND_BITS: u32 = 23;
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
const SIGN_MASK: SelfInt = 1 << (Self::BITS - 1);
const SIGNIFICAND_MASK: SelfInt = (1 << Self::SIGNIFICAND_BITS) - 1;
const IMPLICIT_BIT: SelfInt = 1 << Self::SIGNIFICAND_BITS;
const EXPONENT_MASK: SelfInt = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
const fn repr(self) -> SelfInt {
self.to_bits()
}
const fn signed_repr(self) -> SelfSignedInt {
self.to_bits() as SelfSignedInt
}
const fn sign(self) -> bool {
self.signed_repr() < 0
}
const fn exp(self) -> SelfExpInt {
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as SelfExpInt
}
const fn frac(self) -> SelfInt {
self.to_bits() & Self::SIGNIFICAND_MASK
}
const fn imp_frac(self) -> SelfInt {
self.frac() | Self::IMPLICIT_BIT
}
const fn from_repr(a: SelfInt) -> Self {
Self::from_bits(a)
}
const fn from_parts(sign: bool, exponent: SelfInt, significand: SelfInt) -> Self {
Self::from_repr(
((sign as SelfInt) << (Self::BITS - 1))
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
| (significand & Self::SIGNIFICAND_MASK),
)
}
const fn normalize(significand: SelfInt) -> (i32, SelfInt) {
let shift = significand
.leading_zeros()
.wrapping_sub((1u32 << Self::SIGNIFICAND_BITS).leading_zeros());
(
1i32.wrapping_sub(shift as i32),
significand << shift as SelfInt,
)
}
const fn is_subnormal(self) -> bool {
(self.repr() & Self::EXPONENT_MASK) == 0
}
}
const fn u64_lo(x: u64) -> u32 {
x as u32
}
const fn u64_hi(x: u64) -> u32 {
(x >> 32) as u32
}
const fn u32_widen_mul(a: u32, b: u32) -> (u32, u32) {
let x = u64::wrapping_mul(a as _, b as _);
(u64_lo(x), u64_hi(x))
}

View File

@@ -0,0 +1,194 @@
use crate::soft_f32::{u32_widen_mul, SoftF32};
type F = SoftF32;
type FInt = u32;
const fn widen_mul(a: FInt, b: FInt) -> (FInt, FInt) {
u32_widen_mul(a, b)
}
pub(crate) const fn mul(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let exponent_bits = F::EXPONENT_BITS;
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as FInt;
let b_exponent = (b_rep >> significand_bits) & max_exponent as FInt;
let product_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(a_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
if b_abs == inf_rep {
if a_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(b_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
// zero * anything = +/- zero
if a_abs == zero {
return F::from_repr(product_sign);
}
// anything * zero = +/- zero
if b_abs == zero {
return F::from_repr(product_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale += exponent;
b_significand = significand;
}
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
let (mut product_low, mut product_high) =
widen_mul(a_significand, b_significand << exponent_bits);
let a_exponent_i32: i32 = a_exponent as _;
let b_exponent_i32: i32 = b_exponent as _;
let mut product_exponent: i32 = a_exponent_i32
.wrapping_add(b_exponent_i32)
.wrapping_add(scale)
.wrapping_sub(exponent_bias as i32);
// Normalize the significand, adjust exponent if needed.
if (product_high & implicit_bit) != zero {
product_exponent = product_exponent.wrapping_add(1);
} else {
product_high = (product_high << 1) | (product_low >> (bits - 1));
product_low <<= 1;
}
// If we have overflowed the type, return +/- infinity.
if product_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | product_sign);
}
if product_exponent <= 0 {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
let shift = one.wrapping_sub(product_exponent as FInt) as u32;
if shift >= bits {
return F::from_repr(product_sign);
}
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of productLo.
if shift < bits {
let sticky = product_low << (bits - shift);
product_low = product_high << (bits - shift) | product_low >> shift | sticky;
product_high >>= shift;
} else if shift < (2 * bits) {
let sticky = product_high << (2 * bits - shift) | product_low;
product_low = product_high >> (shift - bits) | sticky;
product_high = zero;
} else {
product_high = zero;
}
} else {
// Result is normal before rounding; insert the exponent.
product_high &= significand_mask;
product_high |= (product_exponent as FInt) << significand_bits;
}
// Insert the sign of the result:
product_high |= product_sign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if product_low > sign_bit {
product_high += one;
}
if product_low == sign_bit {
product_high += product_high & one;
}
F::from_repr(product_high)
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(2.0).mul(SoftF32(2.0)).0, 4.0)
}
}

View File

@@ -0,0 +1,37 @@
use crate::abs_diff;
use crate::soft_f32::SoftF32;
type F = SoftF32;
pub(crate) const fn pow(a: F, b: i32) -> F {
let mut a = a;
let recip = b < 0;
let mut pow = abs_diff(b, 0);
let mut mul = F::ONE;
loop {
if (pow & 1) != 0 {
mul = mul.mul(a);
}
pow >>= 1;
if pow == 0 {
break;
}
a = a.mul(a);
}
if recip {
F::ONE.div(mul)
} else {
mul
}
}
#[cfg(test)]
mod test {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(SoftF32(2.0).powi(2).0, 4.0)
}
}

View File

@@ -0,0 +1,31 @@
use super::SoftF32;
pub(crate) const fn round(x: SoftF32) -> SoftF32 {
SoftF32::trunc(x.add(SoftF32::copysign(
SoftF32(0.5).sub(SoftF32(0.25).mul(SoftF32(f32::EPSILON))),
x,
)))
}
#[cfg(test)]
mod tests {
use super::SoftF32;
#[test]
fn negative_zero() {
assert_eq!(
SoftF32::round(SoftF32(-0.0)).to_bits(),
SoftF32(-0.0).to_bits()
);
}
#[test]
fn sanity_check() {
assert_eq!((SoftF32(-1.0)).round().0, -1.0);
assert_eq!((SoftF32(2.8)).round().0, 3.0);
assert_eq!((SoftF32(-0.5)).round().0, -1.0);
assert_eq!((SoftF32(0.5)).round().0, 1.0);
assert_eq!((SoftF32(-1.5)).round().0, -2.0);
assert_eq!((SoftF32(1.5)).round().0, 2.0);
}
}

View File

@@ -0,0 +1,115 @@
/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
* Optimized by Bruce D. Evans.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use core::f64::consts::FRAC_PI_2;
use crate::soft_f64::SoftF64;
use super::{
helpers::{k_cosf, k_sinf, rem_pio2f},
SoftF32,
};
/* Small multiples of pi/2 rounded to double precision. */
const S1_PIO2: SoftF64 = SoftF64(1.).mul(SoftF64(FRAC_PI_2)); /* 0x3FF921FB, 0x54442D18 */
const S2_PIO2: SoftF64 = SoftF64(2.).mul(SoftF64(FRAC_PI_2)); /* 0x400921FB, 0x54442D18 */
const S3_PIO2: SoftF64 = SoftF64(3.).mul(SoftF64(FRAC_PI_2)); /* 0x4012D97C, 0x7F3321D2 */
const S4_PIO2: SoftF64 = SoftF64(4.).mul(SoftF64(FRAC_PI_2)); /* 0x401921FB, 0x54442D18 */
pub const fn sinf(x: SoftF32) -> SoftF32 {
let x64 = SoftF64(x.0 as f64);
let x1p120 = SoftF32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
if ix < 0x00800000 {
let _ = x.div(x1p120);
} else {
let _ = x.add(x1p120);
};
return x;
}
return k_sinf(x64);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix <= 0x4016cbe3 {
/* |x| ~<= 3pi/4 */
if sign {
return k_cosf(x64.add(S1_PIO2)).neg();
} else {
return k_cosf(x64.sub(S1_PIO2));
}
}
return k_sinf(if sign {
x64.add(S2_PIO2).neg()
} else {
x64.sub(S2_PIO2).neg()
});
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix <= 0x40afeddf {
/* |x| ~<= 7*pi/4 */
if sign {
return k_cosf(x64.add(S3_PIO2));
} else {
return k_cosf(x64.sub(S3_PIO2)).neg();
}
}
return k_sinf(if sign {
x64.add(S4_PIO2)
} else {
x64.sub(S4_PIO2)
});
}
/* sin(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x.sub(x);
}
/* general argument reduction needed */
let (n, y) = rem_pio2f(x);
match n & 3 {
0 => k_sinf(y),
1 => k_cosf(y),
2 => k_sinf(y.neg()),
_ => k_cosf(y).neg(),
}
}
#[cfg(test)]
mod test {
use core::f32::consts::{FRAC_2_PI, FRAC_PI_2, FRAC_PI_3, PI};
use super::*;
#[test]
fn test_basic() {
for val in [0.0, FRAC_PI_3, FRAC_PI_2, PI, FRAC_2_PI] {
assert_eq!(SoftF32(val).sin().to_f32(), val.sin())
}
}
}

View File

@@ -0,0 +1,137 @@
/* origin: Rust libm https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/sqrtf.rs */
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
use crate::soft_f32::SoftF32;
use core::cmp::Ordering;
pub(crate) const fn sqrtf(x: SoftF32) -> SoftF32 {
const TINY: SoftF32 = SoftF32(1.0e-30);
let mut z: SoftF32;
let sign: i32 = 0x80000000_u32 as i32;
let mut ix: i32;
let mut s: i32;
let mut q: i32;
let mut m: i32;
let mut t: i32;
let mut i: i32;
let mut r: u32;
ix = x.to_bits() as i32;
/* take care of Inf and NaN */
if (ix as u32 & 0x7f800000) == 0x7f800000 {
return x.mul(x).add(x); /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
}
/* take care of zero */
if ix <= 0 {
if (ix & !sign) == 0 {
return x; /* sqrt(+-0) = +-0 */
}
if ix < 0 {
return (x.sub(x)).div(x.sub(x)); /* sqrt(-ve) = sNaN */
}
}
/* normalize x */
m = ix >> 23;
if m == 0 {
/* subnormal x */
i = 0;
while ix & 0x00800000 == 0 {
ix <<= 1;
i = i + 1;
}
m -= i - 1;
}
m -= 127; /* unbias exponent */
ix = (ix & 0x007fffff) | 0x00800000;
if m & 1 == 1 {
/* odd m, double x to make it even */
ix += ix;
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = 0;
s = 0;
r = 0x01000000; /* r = moving bit from right to left */
while r != 0 {
t = s + r as i32;
if t <= ix {
s = t + r as i32;
ix -= t;
q += r as i32;
}
ix += ix;
r >>= 1;
}
/* use floating add to find out rounding direction */
if ix != 0 {
z = SoftF32(1.0).sub(TINY); /* raise inexact flag */
if ge(z, 1.0) {
z = SoftF32(1.0).add(TINY);
if gt(z, 1.0) {
q += 2;
} else {
q += q & 1;
}
}
}
ix = (q >> 1) + 0x3f000000;
ix += m << 23;
SoftF32::from_bits(ix as u32)
}
const fn gt(l: SoftF32, r: f32) -> bool {
if let Some(ord) = l.cmp(SoftF32(r)) {
match ord {
Ordering::Greater => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
const fn ge(l: SoftF32, r: f32) -> bool {
if let Some(ord) = l.cmp(SoftF32(r)) {
match ord {
Ordering::Less => false,
_ => true,
}
} else {
panic!("Failed to compare values");
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::f32::*;
#[test]
fn sanity_check() {
assert_eq!(sqrtf(SoftF32(100.0)).0, 10.0);
assert_eq!(sqrtf(SoftF32(4.0)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
#[test]
fn spec_tests() {
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrtf(SoftF32(-1.0)).0.is_nan());
assert!(sqrtf(SoftF32(NAN)).0.is_nan());
for f in [0.0, -0.0, INFINITY].iter().copied() {
assert_eq!(sqrtf(SoftF32(f)).0, f);
}
}
}

View File

@@ -0,0 +1,30 @@
use super::SoftF32;
pub(crate) const fn trunc(x: SoftF32) -> SoftF32 {
let mut i: u32 = x.to_bits();
let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9;
let m: u32;
if e >= 23 + 9 {
return x;
}
if e < 9 {
e = 1;
}
m = -1i32 as u32 >> e;
if (i & m) == 0 {
return x;
}
i &= !m;
SoftF32::from_bits(i)
}
#[cfg(test)]
mod tests {
use crate::soft_f32::SoftF32;
#[test]
fn sanity_check() {
assert_eq!(super::trunc(SoftF32(1.1)).0, 1.0);
}
}

View File

@@ -0,0 +1,193 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
type FInt = u64;
pub(crate) const fn add(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS as FInt;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let mut a_rep = a.repr();
let mut b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// Detect if a or b is zero, infinity, or NaN.
if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_abs | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_abs | quiet_bit);
}
if a_abs == inf_rep {
// +/-infinity + -/+infinity = qNaN
if (a.repr() ^ b.repr()) == sign_bit {
return F::from_repr(qnan_rep);
} else {
// +/-infinity + anything remaining = +/- infinity
return a;
}
}
// anything remaining + +/-infinity = +/-infinity
if b_abs == inf_rep {
return b;
}
// zero + anything = anything
if a_abs == 0 {
// but we need to get the sign right for zero + zero
if b_abs == 0 {
return F::from_repr(a.repr() & b.repr());
} else {
return b;
}
}
// anything + zero = anything
if b_abs == 0 {
return a;
}
}
// Swap a and b if necessary so that a has the larger absolute value.
if b_abs > a_abs {
// Don't use mem::swap because it may generate references to memcpy in unoptimized code.
let tmp = a_rep;
a_rep = b_rep;
b_rep = tmp;
}
// Extract the exponent and significand from the (possibly swapped) a and b.
let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits) as _;
let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits) as _;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
// normalize any denormals, and adjust the exponent accordingly.
if a_exponent == 0 {
let (exponent, significand) = F::normalize(a_significand);
a_exponent = exponent;
a_significand = significand;
}
if b_exponent == 0 {
let (exponent, significand) = F::normalize(b_significand);
b_exponent = exponent;
b_significand = significand;
}
// The sign of the result is the sign of the larger operand, a. If they
// have opposite signs, we are performing a subtraction; otherwise addition.
let result_sign = a_rep & sign_bit;
let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
// Shift the significands to give us round, guard and sticky, and or in the
// implicit significand bit. (If we fell through from the denormal path it
// was already set by normalize(), but setting it twice won't hurt
// anything.)
a_significand = (a_significand | implicit_bit) << 3;
b_significand = (b_significand | implicit_bit) << 3;
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
let align = a_exponent.wrapping_sub(b_exponent) as _;
if align != 0 {
if align < bits {
let sticky = (b_significand << bits.wrapping_sub(align) != 0) as FInt;
b_significand = (b_significand >> align) | sticky;
} else {
b_significand = one; // sticky; b is known to be non-zero.
}
}
if subtraction {
a_significand = a_significand.wrapping_sub(b_significand);
// If a == -b, return +zero.
if a_significand == 0 {
return F::from_repr(0);
}
// If partial cancellation occured, we need to left-shift the result
// and adjust the exponent:
if a_significand < implicit_bit << 3 {
let shift =
a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
a_significand <<= shift;
a_exponent -= shift;
}
} else {
// addition
a_significand += b_significand;
// If the addition carried up, we need to right-shift the result and
// adjust the exponent:
if a_significand & implicit_bit << 4 != 0 {
let sticky = (a_significand & one != 0) as FInt;
a_significand = a_significand >> 1 | sticky;
a_exponent += 1;
}
}
// If we have overflowed the type, return +/- infinity:
if a_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | result_sign);
}
if a_exponent <= 0 {
// Result is denormal before rounding; the exponent is zero and we
// need to shift the significand.
let shift = (1 - a_exponent) as _;
let sticky = ((a_significand << bits.wrapping_sub(shift)) != 0) as FInt;
a_significand = a_significand >> shift | sticky;
a_exponent = 0;
}
// Low three bits are round, guard, and sticky.
let a_significand_i32: i32 = a_significand as _;
let round_guard_sticky: i32 = a_significand_i32 & 0x7;
// Shift the significand into place, and mask off the implicit bit.
let mut result = a_significand >> 3 & significand_mask;
// Insert the exponent and sign.
result |= (a_exponent as FInt) << significand_bits;
result |= result_sign;
// Final rounding. The result may overflow to infinity, but that is the
// correct result in that case.
if round_guard_sticky > 0x4 {
result += one;
}
if round_guard_sticky == 0x4 {
result += result & one;
}
F::from_repr(result)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(1.0).add(SoftF64(1.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,63 @@
use crate::soft_f64::SoftF64;
use core::cmp::Ordering;
type F = SoftF64;
type FInt = u64;
type FSignedInt = i64;
const UNORDERED: Option<Ordering> = None;
const EQUAL: Option<Ordering> = Some(Ordering::Equal);
const GREATER: Option<Ordering> = Some(Ordering::Greater);
const LESS: Option<Ordering> = Some(Ordering::Less);
pub(crate) const fn cmp(a: F, b: F) -> Option<Ordering> {
let one: FInt = 1;
let zero: FInt = 0;
let szero: FSignedInt = 0;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let a_rep = a.repr();
let b_rep = b.repr();
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// If either a or b is NaN, they are unordered.
if a_abs > inf_rep || b_abs > inf_rep {
return UNORDERED;
}
// If a and b are both zeros, they are equal.
if a_abs | b_abs == zero {
return EQUAL;
}
let a_srep = a.signed_repr();
let b_srep = b.signed_repr();
// If at least one of a and b is positive, we get the same result comparing
// a and b as signed integers as we would with a fp_ting-point compare.
if a_srep & b_srep >= szero {
if a_srep < b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
// Otherwise, both are negative, so we need to flip the sense of the
// comparison to get the correct result. (This assumes a twos- or ones-
// complement integer representation; if integers are represented in a
// sign-magnitude representation, then this flip is incorrect).
} else if a_srep > b_srep {
LESS
} else if a_srep == b_srep {
EQUAL
} else {
GREATER
}
}

View File

@@ -0,0 +1,84 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
impl const From<f64> for F {
fn from(value: f64) -> Self {
F::from_f64(value)
}
}
impl const PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl const PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl const core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl const core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl const core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl const core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
#[cfg(feature = "const_mut_refs")]
impl const core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
#[cfg(not(feature = "const_mut_refs"))]
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,13 @@
use super::SoftF64;
/// Sign of Y, magnitude of X (f64)
///
/// Constructs a number with the magnitude (absolute value) of its
/// first argument, `x`, and the sign of its second argument, `y`.
pub(crate) const fn copysign(x: SoftF64, y: SoftF64) -> SoftF64 {
let mut ux = x.to_bits();
let uy = y.to_bits();
ux &= (!0) >> 1;
ux |= uy & (1 << 63);
SoftF64::from_bits(ux)
}

View File

@@ -0,0 +1,86 @@
// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/cos.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use super::{
helpers::{k_cos, k_sin, rem_pio2},
SoftF64,
};
// cos(x)
// Return cosine function of x.
//
// kernel function:
// k_sin ... sine function on [-pi/4,pi/4]
// k_cos ... cosine function on [-pi/4,pi/4]
// rem_pio2 ... argument reduction routine
//
// Method.
// Let S,C and T denote the sin, cos and tan respectively on
// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
// in [-pi/4 , +pi/4], and let n = k mod 4.
// We have
//
// n sin(x) cos(x) tan(x)
// ----------------------------------------------------------
// 0 S C T
// 1 C -S -1/T
// 2 -S -C T
// 3 -C S -1/T
// ----------------------------------------------------------
//
// Special cases:
// Let trig be any of sin, cos, or tan.
// trig(+-INF) is NaN, with signals;
// trig(NaN) is that NaN;
//
// Accuracy:
// TRIG(x) returns trig(x) nearly rounded
//
pub(crate) const fn cos(x: SoftF64) -> SoftF64 {
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
/* |x| ~< pi/4 */
if ix <= 0x3fe921fb {
if ix < 0x3e46a09e {
/* if x < 2**-27 * sqrt(2) */
/* raise inexact if x != 0 */
if x.0 as i32 == 0 {
return SoftF64::ONE;
}
}
return k_cos(x, SoftF64::ZERO);
}
/* cos(Inf or NaN) is NaN */
if ix >= 0x7ff00000 {
return x.sub(x);
}
/* argument reduction needed */
let (n, y0, y1) = rem_pio2(x);
match n & 3 {
0 => k_cos(y0, y1),
1 => k_sin(y0, y1, 1).neg(),
2 => k_cos(y0, y1).neg(),
_ => k_sin(y0, y1, 1),
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn test_large_neg() {
assert_eq!(SoftF64(-1647101.0).cos().to_f64(), (-1647101.0_f64).cos())
}
}

View File

@@ -0,0 +1,439 @@
use crate::soft_f64::{u64_widen_mul, SoftF64};
type F = SoftF64;
type FInt = u64;
pub(crate) const fn div(a: F, b: F) -> F {
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
const USE_NATIVE_FULL_ITERATIONS: bool = false;
let one = 1;
let zero = 0;
let hw = F::BITS / 2;
let lo_mask = u64::MAX >> hw;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
#[inline(always)]
const fn negate_u64(a: u64) -> u64 {
(<i64>::wrapping_neg(a as i64)) as u64
}
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as u64;
let b_exponent = (b_rep >> significand_bits) & max_exponent as u64;
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as u64
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as u64
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN / anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything / NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs == inf_rep {
// infinity / infinity = NaN
return F::from_repr(qnan_rep);
} else {
// infinity / anything else = +/- infinity
return F::from_repr(a_abs | quotient_sign);
}
}
// anything else / infinity = +/- 0
if b_abs == inf_rep {
return F::from_repr(quotient_sign);
}
if a_abs == zero {
if b_abs == zero {
// zero / zero = NaN
return F::from_repr(qnan_rep);
} else {
// zero / anything else = +/- zero
return F::from_repr(quotient_sign);
}
}
// anything else / zero = +/- infinity
if b_abs == zero {
return F::from_repr(inf_rep | quotient_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale -= exponent;
b_significand = significand;
}
}
// Set the implicit significand bit. If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.
a_significand |= implicit_bit;
b_significand |= implicit_bit;
let written_exponent: i64 = a_exponent
.wrapping_sub(b_exponent)
.wrapping_add(scale as u64)
.wrapping_add(exponent_bias as u64) as i64;
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
// The max error for this approximation is achieved at endpoints, so
// abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
// which is about 4.5 bits.
// The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
// Then, refine the reciprocal estimate using a quadratically converging
// Newton-Raphson iteration:
// x_{n+1} = x_n * (2 - x_n * b)
//
// Let b be the original divisor considered "in infinite precision" and
// obtained from IEEE754 representation of function argument (with the
// implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
// UQ1.(W-1).
//
// Let b_hw be an infinitely precise number obtained from the highest (HW-1)
// bits of divisor significand (with the implicit bit set). Corresponds to
// half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
// version of b_UQ1.
//
// Let e_n := x_n - 1/b_hw
// E_n := x_n - 1/b
// abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
// = abs(e_n) + (b - b_hw) / (b*b_hw)
// <= abs(e_n) + 2 * 2^-HW
// rep_t-sized iterations may be slower than the corresponding half-width
// variant depending on the handware and whether single/double/quad precision
// is selected.
// NB: Using half-width iterations increases computation errors due to
// rounding, so error estimations have to be computed taking the selected
// mode into account!
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
// Starting with (n-1) half-width iterations
let b_uq1_hw: u32 = (b_significand >> (significand_bits + 1 - hw)) as u32;
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
// with W0 being either 16 or 32 and W0 <= HW.
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
// HW is at least 32. Shifting into the highest bits if needed.
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
// so x0 fits to UQ0.HW without wrapping.
let x_uq0_hw: u32 = {
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
// dbg!(x_uq0_hw);
// An e_0 error is comprised of errors due to
// * x0 being an inherently imprecise first approximation of 1/b_hw
// * C_hw being some (irrational) number **truncated** to W0 bits
// Please note that e_0 is calculated against the infinitely precise
// reciprocal of b_hw (that is, **truncated** version of b).
//
// e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
// By construction, 1 <= b < 2
// f(x) = x * (2 - b*x) = 2*x - b*x^2
// f'(x) = 2 * (1 - b*x)
//
// On the [0, 1] interval, f(0) = 0,
// then it increses until f(1/b) = 1 / b, maximum on (0, 1),
// then it decreses to f(1) = 2 - b
//
// Let g(x) = x - f(x) = b*x^2 - x.
// On (0, 1/b), g(x) < 0 <=> f(x) > x
// On (1/b, 1], g(x) > 0 <=> f(x) < x
//
// For half-width iterations, b_hw is used instead of b.
let mut idx = 0;
while idx < NUMBER_OF_HALF_ITERATIONS {
// corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
// of corr_UQ1_hw.
// "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
// On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
// expected to be strictly positive because b_UQ1_hw has its highest bit set
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
let corr_uq1_hw: u32 = 0_u64
.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw)
as u32;
// dbg!(corr_uq1_hw);
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
// considered to be 0 to be able to represent it in UQ0.HW.
// From the above analysis of f(x), if corr_UQ1_hw would be represented
// without any intermediate loss of precision (that is, in twice_rep_t)
// x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
// less otherwise. On the other hand, to obtain [1.]000..., one have to pass
// 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
// to 1.0 being not representable as UQ0.HW).
// The fact corr_UQ1_hw was virtually round up (due to result of
// multiplication being **first** truncated, then negated - to improve
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
// dbg!(x_uq0_hw);
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
// any number of iterations, so just subtract 2 from the reciprocal
// approximation after last iteration.
// In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
// corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
// = 1 - e_n * b_hw + 2*eps1
// x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
// = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
// = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
// e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
// = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
// \------ >0 -------/ \-- >0 ---/
// abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
idx += 1;
}
// For initial half-width iterations, U = 2^-HW
// Let abs(e_n) <= u_n * U,
// then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
// u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
// Account for possible overflow (see above). For an overflow to occur for the
// first time, for "ideal" corr_UQ1_hw (that is, without intermediate
// truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
// value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
// be not below that value (see g(x) above), so it is safe to decrement just
// once after the final iteration. On the other hand, an effective value of
// divisor changes after this point (from b_hw to b), so adjust here.
x_uq0_hw.wrapping_sub(1_u32)
};
// Error estimations for full-precision iterations are calculated just
// as above, but with U := 2^-W and taking extra decrementing into account.
// We need at least one such iteration.
// Simulating operations on a twice_rep_t to perform a single final full-width
// iteration. Using ad-hoc multiplication implementations to take advantage
// of particular structure of operands.
let blo: u64 = b_uq1 & lo_mask;
// x_UQ0 = x_UQ0_hw * 2^HW - 1
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
//
// <--- higher half ---><--- lower half --->
// [x_UQ0_hw * b_UQ1_hw]
// + [ x_UQ0_hw * blo ]
// - [ b_UQ1 ]
// = [ result ][.... discarded ...]
let corr_uq1 = negate_u64(
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
); // account for *possible* carry
let lo_corr = corr_uq1 & lo_mask;
let hi_corr = corr_uq1 >> hw;
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
let mut x_uq0: FInt = (((x_uq0_hw as u64) * hi_corr) << 1)
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
.wrapping_sub(2); // 1 to account for the highest bit of corr_UQ1 can be 1
// 1 to account for possible carry
// Just like the case of half-width iterations but with possibility
// of overflowing by one extra Ulp of x_UQ0.
x_uq0 -= one;
// ... and then traditional fixup by 2 should work
// On error estimation:
// abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
// + (2^-HW + 2^-W))
// abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
// Then like for the half-width iterations:
// With 0 <= eps1, eps2 < 2^-W
// E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
// abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
// abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
x_uq0
} else {
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
let c: FInt = 0x7504F333 << (F::BITS - 32);
let x_uq0: FInt = c.wrapping_sub(b_uq1);
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
x_uq0
};
let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS {
let mut idx = 0;
while idx < NUMBER_OF_FULL_ITERATIONS {
let corr_uq1: u64 = 0_u64.wrapping_sub((x_uq0 * b_uq1) >> F::BITS);
x_uq0 = (((x_uq0 as u128) * (corr_uq1 as u128)) >> (F::BITS - 1)) as u64;
idx += 1;
}
x_uq0
} else {
// not using native full iterations
x_uq0
};
// Finally, account for possible overflow, as explained above.
x_uq0 = x_uq0.wrapping_sub(2);
// u_n for different precisions (with N-1 half-width iterations):
// W0 is the precision of C
// u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
// Estimated with bc:
// define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
// define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
// define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
// define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
// | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
// u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
// u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
// u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
// u_3 | < 7.31 | | < 7.31 | < 27054456580
// u_4 | | | | < 80.4
// Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
// Add 2 to U_N due to final decrement.
let reciprocal_precision: FInt = 220;
// Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
let x_uq0 = x_uq0 - reciprocal_precision;
// Now 1/b - (2*P) * 2^-W < x < 1/b
// FIXME Is x_UQ0 still >= 0.5?
let mut quotient: FInt = u64_widen_mul(x_uq0, a_significand << 1).1;
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
// adjust it to be in [1.0, 2.0) as UQ1.SB.
let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) {
// Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
// effectively doubling its value as well as its error estimation.
let residual_lo = (a_significand << (significand_bits + 1))
.wrapping_sub(quotient.wrapping_mul(b_significand));
a_significand <<= 1;
(residual_lo, written_exponent.wrapping_sub(1))
} else {
// Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
// to UQ1.SB by right shifting by 1. Least significant bit is omitted.
quotient >>= 1;
let residual_lo =
(a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand));
(residual_lo, written_exponent)
};
//drop mutability
let quotient = quotient;
// NB: residualLo is calculated above for the normal result case.
// It is re-computed on denormal path that is expected to be not so
// performance-sensitive.
// Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
// Each NextAfter() increments the floating point value by at least 2^-SB
// (more, if exponent was incremented).
// Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
// q
// | | * | | | | |
// <---> 2^t
// | | | | | * | |
// q
// To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
// (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
// (8*P) * 2^-W < 0.5 * 2^-SB
// P < 2^(W-4-SB)
// Generally, for at most R NextAfter() to be enough,
// P < (2*R - 1) * 2^(W-4-SB)
// For f32 (0+3): 10 < 32 (OK)
// For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
// For f64: 220 < 256 (OK)
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
// If we have overflowed the exponent, return infinity
if written_exponent >= max_exponent as i64 {
return F::from_repr(inf_rep | quotient_sign);
}
// Now, quotient <= the correctly-rounded result
// and may need taking NextAfter() up to 3 times (see error estimates above)
// r = a - b * q
let abs_result = if written_exponent > 0 {
let mut ret = quotient & significand_mask;
ret |= (written_exponent as u64) << significand_bits;
residual <<= 1;
ret
} else {
if (significand_bits as i64 + written_exponent) < 0 {
return F::from_repr(quotient_sign);
}
let ret = quotient.wrapping_shr((negate_u64(written_exponent as u64) + 1) as u32);
residual = a_significand
.wrapping_shl(significand_bits.wrapping_add(written_exponent as u32))
.wrapping_sub(((ret).wrapping_mul(b_significand)) << 1);
ret
};
// Round
let abs_result = {
residual += abs_result & one; // tie to even
// conditionally turns the below LT comparison into LTE
if residual > b_significand {
abs_result + one
} else {
abs_result
}
};
F::from_repr(abs_result | quotient_sign)
}
#[cfg(test)]
mod test {
use super::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(10.0).div(SoftF64(5.0)).0, 2.0)
}
}

View File

@@ -0,0 +1,61 @@
use super::{
helpers::{eq, gt},
SoftF64,
};
const TOINT: SoftF64 = SoftF64(1.0).div(SoftF64(f64::EPSILON));
/// Floor (f64)
///
/// Finds the nearest integer less than or equal to `x`.
pub const fn floor(x: SoftF64) -> SoftF64 {
let ui = x.to_bits();
let e = ((ui >> 52) & 0x7ff) as i32;
if (e >= 0x3ff + 52) || eq(x, SoftF64::ZERO) {
return x;
}
/* y = int(x) - x, where int(x) is an integer neighbor of x */
let y = if (ui >> 63) != 0 {
x.sub(TOINT).add(TOINT).sub(x)
} else {
x.add(TOINT).sub(TOINT).sub(x)
};
/* special case because of non-nearest rounding modes */
if e < 0x3ff {
return if (ui >> 63) != 0 {
SoftF64(-1.0)
} else {
SoftF64::ZERO
};
}
if gt(y, SoftF64::ZERO) {
x.add(y).sub(SoftF64::ONE)
} else {
x.add(y)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanity_check() {
assert_eq!(floor(SoftF64(1.1)).0, 1.0);
assert_eq!(floor(SoftF64(2.9)).0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
#[test]
fn spec_tests() {
// Not Asserted: that the current rounding mode has no effect.
assert!(floor(SoftF64(f64::NAN)).0.is_nan());
for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY]
.iter()
.copied()
{
assert_eq!(floor(SoftF64(f)).0, f);
}
}
}

View File

@@ -0,0 +1,36 @@
use core::cmp::Ordering;
use crate::soft_f64::SoftF64;
pub(crate) const fn eq(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Equal => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
pub(crate) const fn gt(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Greater => true,
_ => false,
}
} else {
panic!("Failed to compare values");
}
}
pub(crate) const fn ge(l: SoftF64, r: SoftF64) -> bool {
if let Some(ord) = l.cmp(r) {
match ord {
Ordering::Less => false,
_ => true,
}
} else {
panic!("Failed to compare values");
}
}

View File

@@ -0,0 +1,66 @@
// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_cos.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunSoft, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use crate::soft_f64::SoftF64;
const C1: SoftF64 = SoftF64(4.16666666666666019037e-02); /* 0x3FA55555, 0x5555554C */
const C2: SoftF64 = SoftF64(-1.38888888888741095749e-03); /* 0xBF56C16C, 0x16C15177 */
const C3: SoftF64 = SoftF64(2.48015872894767294178e-05); /* 0x3EFA01A0, 0x19CB1590 */
const C4: SoftF64 = SoftF64(-2.75573143513906633035e-07); /* 0xBE927E4F, 0x809C52AD */
const C5: SoftF64 = SoftF64(2.08757232129817482790e-09); /* 0x3E21EE9E, 0xBDB4B1C4 */
const C6: SoftF64 = SoftF64(-1.13596475577881948265e-11); /* 0xBDA8FAE9, 0xBE8838D4 */
// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
//
// Algorithm
// 1. Since cos(-x) = cos(x), we need only to consider positive x.
// 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
// 3. cos(x) is approximated by a polynomial of degree 14 on
// [0,pi/4]
// 4 14
// cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
// where the remez error is
//
// | 2 4 6 8 10 12 14 | -58
// |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
// | |
//
// 4 6 8 10 12 14
// 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
// cos(x) ~ 1 - x*x/2 + r
// since cos(x+y) ~ cos(x) - sin(x)*y
// ~ cos(x) - x*y,
// a correction term is necessary in cos(x) and hence
// cos(x+y) = 1 - (x*x/2 - (r - x*y))
// For better accuracy, rearrange to
// cos(x+y) ~ w + (tmp + (r-x*y))
// where w = 1 - x*x/2 and tmp is a tiny correction term
// (1 - x*x/2 == w + tmp exactly in infinite precision).
// The exactness of w + tmp in infinite precision depends on w
// and tmp having the same precision as x. If they have extra
// precision due to compiler bugs, then the extra precision is
// only good provided it is retained in all terms of the final
// expression for cos(). Retention happens in all cases tested
// under FreeBSD, so don't pessimize things by forcibly clipping
// any extra precision in w.
pub(crate) const fn k_cos(x: SoftF64, y: SoftF64) -> SoftF64 {
let z = x.mul(x);
let w = z.mul(z);
let r = z
.mul(C1.add(z.mul(C2.add(z.mul(C3)))))
.add(w.mul(w.mul(C4.add(z.mul(C5.add(z.mul(C6)))))));
let hz = SoftF64(0.5).mul(z);
let w = SoftF64::ZERO.sub(hz);
w.add(((SoftF64::ONE.sub(w)).sub(hz)).add(z.mul(r).sub(x.mul(y))))
}

View File

@@ -0,0 +1,62 @@
// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c,
// https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/k_sin.rs
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunSoft, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use crate::soft_f64::SoftF64;
const S1: SoftF64 = SoftF64(-1.66666666666666324348e-01); /* 0xBFC55555, 0x55555549 */
const S2: SoftF64 = SoftF64(8.33333333332248946124e-03); /* 0x3F811111, 0x1110F8A6 */
const S3: SoftF64 = SoftF64(-1.98412698298579493134e-04); /* 0xBF2A01A0, 0x19C161D5 */
const S4: SoftF64 = SoftF64(2.75573137070700676789e-06); /* 0x3EC71DE3, 0x57B1FE7D */
const S5: SoftF64 = SoftF64(-2.50507602534068634195e-08); /* 0xBE5AE5E6, 0x8A2B9CEB */
const S6: SoftF64 = SoftF64(1.58969099521155010221e-10); /* 0x3DE5D93A, 0x5ACFD57C */
// kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
// Input x is assumed to be bounded by ~pi/4 in magnitude.
// Input y is the tail of x.
// Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
//
// Algorithm
// 1. Since sin(-x) = -sin(x), we need only to consider positive x.
// 2. Callers must return sin(-0) = -0 without calling here since our
// odd polynomial is not evaluated in a way that preserves -0.
// Callers may do the optimization sin(x) ~ x for tiny x.
// 3. sin(x) is approximated by a polynomial of degree 13 on
// [0,pi/4]
// 3 13
// sin(x) ~ x + S1*x + ... + S6*x
// where
//
// |sin(x) 2 4 6 8 10 12 | -58
// |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
// | x |
//
// 4. sin(x+y) = sin(x) + sin'(x')*y
// ~ sin(x) + (1-x*x/2)*y
// For better accuracy, let
// 3 2 2 2 2
// r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
// then 3 2
// sin(x) = x + (S1*x + (x *(r-y/2)+y))
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn k_sin(x: SoftF64, y: SoftF64, iy: i32) -> SoftF64 {
let z = x.mul(x);
let w = z.mul(z);
let r = S2
.add(z.mul(S3.add(z.mul(S4))))
.add(z.mul(w.mul(S5.add(z.mul(S6)))));
let v = z.mul(x);
if iy == 0 {
x.add(v.mul(S1.add(z.mul(r))))
} else {
x.sub((z.mul(SoftF64(0.5).mul(y).sub(v.mul(r))).sub(y)).sub(v.mul(S1)))
}
}

View File

@@ -0,0 +1,13 @@
mod cmp;
mod k_cos;
mod k_sin;
mod rem_pio2;
mod rem_pio2_large;
mod scalbn;
pub(crate) use cmp::{eq, ge, gt};
pub(crate) use k_cos::k_cos;
pub(crate) use k_sin::k_sin;
pub(crate) use rem_pio2::rem_pio2;
pub(crate) use rem_pio2_large::rem_pio2_large;
pub(crate) use scalbn::scalbn;

View File

@@ -0,0 +1,241 @@
use crate::soft_f64::{helpers::eq, SoftF64};
// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
// Optimized by Bruce D. Evans. */
use super::rem_pio2_large;
// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
// #define EPS DBL_EPSILON
const EPS: SoftF64 = SoftF64(2.2204460492503131e-16);
// #elif FLT_EVAL_METHOD==2
// #define EPS LDBL_EPSILON
// #endif
// TODO: Support FLT_EVAL_METHOD?
const TO_INT: SoftF64 = SoftF64(1.5).div(EPS);
/// 53 bits of 2/pi
const INV_PIO2: SoftF64 = SoftF64(6.36619772367581382433e-01); /* 0x3FE45F30, 0x6DC9C883 */
/// first 33 bits of pi/2
const PIO2_1: SoftF64 = SoftF64(1.57079632673412561417e+00); /* 0x3FF921FB, 0x54400000 */
/// pi/2 - PIO2_1
const PIO2_1T: SoftF64 = SoftF64(6.07710050650619224932e-11); /* 0x3DD0B461, 0x1A626331 */
/// second 33 bits of pi/2
const PIO2_2: SoftF64 = SoftF64(6.07710050630396597660e-11); /* 0x3DD0B461, 0x1A600000 */
/// pi/2 - (PIO2_1+PIO2_2)
const PIO2_2T: SoftF64 = SoftF64(2.02226624879595063154e-21); /* 0x3BA3198A, 0x2E037073 */
/// third 33 bits of pi/2
const PIO2_3: SoftF64 = SoftF64(2.02226624871116645580e-21); /* 0x3BA3198A, 0x2E000000 */
/// pi/2 - (PIO2_1+PIO2_2+PIO2_3)
const PIO2_3T: SoftF64 = SoftF64(8.47842766036889956997e-32); /* 0x397B839A, 0x252049C1 */
// return the remainder of x rem pi/2 in y[0]+y[1]
// use rem_pio2_large() for large x
//
// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
pub(crate) const fn rem_pio2(x: SoftF64) -> (i32, SoftF64, SoftF64) {
let x1p24 = SoftF64::from_bits(0x4170000000000000);
let sign = (SoftF64::to_bits(x) >> 63) as i32;
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
const fn medium(x: SoftF64, ix: u32) -> (i32, SoftF64, SoftF64) {
/* rint(x/(pi/2)), Assume round-to-nearest. */
let tmp = SoftF64(x.0 as f64).mul(INV_PIO2).add(TO_INT);
// force rounding of tmp to it's storage format on x87 to avoid
// excess precision issues.
let f_n = tmp.sub(TO_INT);
let n = f_n.0 as i32;
let mut r = x.sub(f_n.mul(PIO2_1));
let mut w = f_n.mul(PIO2_1T); /* 1st round, good to 85 bits */
let mut y0 = r.sub(w);
let ui = SoftF64::to_bits(y0);
let ey = (ui >> 52) as i32 & 0x7ff;
let ex = (ix >> 20) as i32;
if ex - ey > 16 {
/* 2nd round, good to 118 bits */
let t = r;
w = f_n.mul(PIO2_2);
r = t.sub(w);
w = f_n.mul(PIO2_2T).sub((t.sub(r)).sub(w));
y0 = r.sub(w);
let ey = (SoftF64::to_bits(y0) >> 52) as i32 & 0x7ff;
if ex - ey > 49 {
/* 3rd round, good to 151 bits, covers all cases */
let t = r;
w = f_n.mul(PIO2_3);
r = t.sub(w);
w = f_n.mul(PIO2_3T).sub((t.sub(r)).sub(w));
y0 = r.sub(w);
}
}
let y1 = (r.sub(y0)).sub(w);
(n, y0, y1)
}
if ix <= 0x400f6a7a {
/* |x| ~<= 5pi/4 */
if (ix & 0xfffff) == 0x921fb {
/* |x| ~= pi/2 or 2pi/2 */
return medium(x, ix); /* cancellation -- use medium case */
}
if ix <= 0x4002d97c {
/* |x| ~<= 3pi/4 */
if sign == 0 {
let z = x.sub(PIO2_1); /* one round good to 85 bits */
let y0 = z.sub(PIO2_1T);
let y1 = (z.sub(y0)).sub(PIO2_1T);
return (1, y0, y1);
} else {
let z = x.add(PIO2_1);
let y0 = z.add(PIO2_1T);
let y1 = (z.sub(y0)).add(PIO2_1T);
return (-1, y0, y1);
}
} else if sign == 0 {
let z = x.sub(SoftF64(2.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(2.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(2.0).mul(PIO2_1T));
return (2, y0, y1);
} else {
let z = x.add(SoftF64(2.0).mul(PIO2_1));
let y0 = z.add(SoftF64(2.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(2.0).mul(PIO2_1T));
return (-2, y0, y1);
}
}
if ix <= 0x401c463b {
/* |x| ~<= 9pi/4 */
if ix <= 0x4015fdbc {
/* |x| ~<= 7pi/4 */
if ix == 0x4012d97c {
/* |x| ~= 3pi/2 */
return medium(x, ix);
}
if sign == 0 {
let z = x.sub(SoftF64(3.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(3.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(3.0).mul(PIO2_1T));
return (3, y0, y1);
} else {
let z = x.add(SoftF64(3.0).mul(PIO2_1));
let y0 = z.add(SoftF64(3.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(3.0).mul(PIO2_1T));
return (-3, y0, y1);
}
} else {
if ix == 0x401921fb {
/* |x| ~= 4pi/2 */
return medium(x, ix);
}
if sign == 0 {
let z = x.sub(SoftF64(4.0).mul(PIO2_1));
let y0 = z.sub(SoftF64(4.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).sub(SoftF64(4.0).mul(PIO2_1T));
return (4, y0, y1);
} else {
let z = x.add(SoftF64(4.0).mul(PIO2_1));
let y0 = z.add(SoftF64(4.0).mul(PIO2_1T));
let y1 = (z.sub(y0)).add(SoftF64(4.0).mul(PIO2_1T));
return (-4, y0, y1);
}
}
}
if ix < 0x413921fb {
/* |x| ~< 2^20*(pi/2), medium size */
return medium(x, ix);
}
/*
* all other (large) arguments
*/
if ix >= 0x7ff00000 {
/* x is inf or NaN */
let y0 = x.sub(x);
let y1 = y0;
return (0, y0, y1);
}
/* set z = scalbn(|x|,-ilogb(x)+23) */
let mut ui = SoftF64::to_bits(x);
ui &= (!1) >> 12;
ui |= (0x3ff + 23) << 52;
let mut z = SoftF64::from_bits(ui);
let mut tx = [SoftF64::ZERO; 3];
{
let mut i = 0;
while i < 2 {
tx[i] = SoftF64(z.0 as i32 as f64);
z = (z.sub(tx[i])).mul(x1p24);
i += 1;
}
}
tx[2] = z;
/* skip zero terms, first term is non-zero */
let mut i = 2;
while i != 0 && eq(tx[i], SoftF64::ZERO) {
i -= 1;
}
let ty = [SoftF64::ZERO; 3];
let (n, ty) = match i {
2 => rem_pio2_large(&tx, &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
1 => rem_pio2_large(&[tx[0], tx[1]], &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
0 => rem_pio2_large(&[tx[0]], &ty, ((ix as i32) >> 20) - (0x3ff + 23), 1),
_ => panic!(),
};
if sign != 0 {
return (-n, ty[0].neg(), ty[1].neg());
}
(n, ty[0], ty[1])
}
#[cfg(test)]
mod tests {
use super::{rem_pio2, SoftF64};
#[test]
fn test_near_pi() {
let arg = SoftF64(3.141592025756836);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -6.278329573009626e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592033207416);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -6.20382377148128e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592144966125);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, -5.086236681942706e-7, -2.1125998133974653e-23)
);
let arg = SoftF64(3.141592979431152);
let (a, b, c) = rem_pio2(arg);
assert_eq!(
(a, b.0, c.0),
(2, 3.2584135866119817e-7, -2.1125998133974653e-23)
);
}
#[test]
fn test_overflow_b9b847() {
let _ = rem_pio2(SoftF64(-3054214.5490637687));
}
#[test]
fn test_overflow_4747b9() {
let _ = rem_pio2(SoftF64(917340800458.2274));
}
}

View File

@@ -0,0 +1,534 @@
#![allow(unused_unsafe)]
/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
use crate::soft_f64::SoftF64;
use super::{eq, ge, scalbn};
// initial value for jk
const INIT_JK: [usize; 4] = [3, 4, 4, 6];
// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
//
// integer array, contains the (24*i)-th to (24*i+23)-th
// bit of 2/pi after binary point. The corresponding
// floating value is
//
// ipio2[i] * 2^(-24(i+1)).
//
// NB: This table must have at least (e0-3)/24 + jk terms.
// For quad precision (e0 <= 16360, jk = 6), this is 686.
#[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))]
const IPIO2: [i32; 66] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163,
0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C,
0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292,
0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
0x73A8C9, 0x60E27B, 0xC08C6B,
];
#[cfg(target_pointer_width = "64")]
const IPIO2: [i32; 690] = [
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163,
0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C,
0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292,
0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26,
0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B,
0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A,
0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2,
0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE,
0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2,
0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593,
0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199,
0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD,
0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B,
0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C,
0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C,
0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364,
0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6,
0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47,
0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B,
0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6,
0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA,
0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7,
0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D,
0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B,
0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F,
0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E,
0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555,
0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD,
0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9,
0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD,
0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0,
0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351,
0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51,
0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA,
0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385,
0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4,
0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5,
0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
];
const PIO2: [SoftF64; 8] = [
SoftF64(1.57079625129699707031e+00), /* 0x3FF921FB, 0x40000000 */
SoftF64(7.54978941586159635335e-08), /* 0x3E74442D, 0x00000000 */
SoftF64(5.39030252995776476554e-15), /* 0x3CF84698, 0x80000000 */
SoftF64(3.28200341580791294123e-22), /* 0x3B78CC51, 0x60000000 */
SoftF64(1.27065575308067607349e-29), /* 0x39F01B83, 0x80000000 */
SoftF64(1.22933308981111328932e-36), /* 0x387A2520, 0x40000000 */
SoftF64(2.73370053816464559624e-44), /* 0x36E38222, 0x80000000 */
SoftF64(2.16741683877804819444e-51), /* 0x3569F31D, 0x00000000 */
];
//
// Input parameters:
// x[] The input value (must be positive) is broken into nx
// pieces of 24-bit integers in double precision format.
// x[i] will be the i-th 24 bit of x. The scaled exponent
// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
// match x's up to 24 bits.
//
// Example of breaking a double positive z into x[0]+x[1]+x[2]:
// e0 = ilogb(z)-23
// z = scalbn(z,-e0)
// for i = 0,1,2
// x[i] = floor(z)
// z = (z-x[i])*2**24
//
// y[] ouput result in an array of double precision numbers.
// The dimension of y[] is:
// 24-bit precision 1
// 53-bit precision 2
// 64-bit precision 2
// 113-bit precision 3
// The actual value is the sum of them. Thus for 113-bit
// precison, one may have to do something like:
//
// long double t,w,r_head, r_tail;
// t = (long double)y[2] + (long double)y[1];
// w = (long double)y[0];
// r_head = t+w;
// r_tail = w - (r_head - t);
//
// e0 The exponent of x[0]. Must be <= 16360 or you need to
// expand the ipio2 table.
//
// prec an integer indicating the precision:
// 0 24 bits (single)
// 1 53 bits (double)
// 2 64 bits (extended)
// 3 113 bits (quad)
//
// Here is the description of some local variables:
//
// jk jk+1 is the initial number of terms of ipio2[] needed
// in the computation. The minimum and recommended value
// for jk is 3,4,4,6 for single, double, extended, and quad.
// jk+1 must be 2 larger than you might expect so that our
// recomputation test works. (Up to 24 bits in the integer
// part (the 24 bits of it that we compute) and 23 bits in
// the fraction part may be lost to cancelation before we
// recompute.)
//
// jz local integer variable indicating the number of
// terms of ipio2[] used.
//
// jx nx - 1
//
// jv index for pointing to the suitable ipio2[] for the
// computation. In general, we want
// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
// is an integer. Thus
// e0-3-24*jv >= 0 or (e0-3)/24 >= jv
// Hence jv = max(0,(e0-3)/24).
//
// jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
//
// q[] double array with integral value, representing the
// 24-bits chunk of the product of x and 2/pi.
//
// q0 the corresponding exponent of q[0]. Note that the
// exponent for q[i] would be q0-24*i.
//
// PIo2[] double precision array, obtained by cutting pi/2
// into 24 bits chunks.
//
// f[] ipio2[] in floating point
//
// iq[] integer array by breaking up q[] in 24-bits chunk.
//
// fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
//
// ih integer. If >0 it indicates q[] is >= 0.5, hence
// it also indicates the *sign* of the result.
/// Return the last three digits of N with y = x - N*pi/2
/// so that |y| < pi/2.
///
/// The method is to compute the integer (mod 8) and fraction parts of
/// (2/pi)*x without doing the full multiplication. In general we
/// skip the part of the product that are known to be a huge integer (
/// more accurately, = 0 mod 8 ). Thus the number of operations are
/// independent of the exponent of the input.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) const fn rem_pio2_large<const Y: usize>(
x: &[SoftF64],
y: &[SoftF64; Y],
e0: i32,
prec: usize,
) -> (i32, [SoftF64; Y]) {
let mut y: [SoftF64; Y] = *y;
let x1p24 = SoftF64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24
let x1p_24 = SoftF64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24)
let nx = x.len();
let mut fw: SoftF64;
let mut n: i32;
let mut ih: i32;
let mut z: SoftF64;
let mut f: [SoftF64; 20] = [SoftF64(0.); 20];
let mut fq: [SoftF64; 20] = [SoftF64(0.); 20];
let mut q: [SoftF64; 20] = [SoftF64(0.); 20];
let mut iq: [i32; 20] = [0; 20];
/* initialize jk*/
let jk = INIT_JK[prec];
let jp = jk;
/* determine jx,jv,q0, note that 3>q0 */
let jx = nx - 1;
let mut jv = (e0 - 3) / 24;
if jv < 0 {
jv = 0;
}
let mut q0 = e0 - 24 * (jv + 1);
let jv = jv as usize;
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
let mut j = (jv as i32) - (jx as i32);
let m = jx + jk;
{
let mut i = 0;
while i <= m {
if j < 0 {
f[i] = SoftF64::ZERO;
} else {
f[i] = SoftF64(IPIO2[j as usize] as f64);
}
j += 1;
i += 1;
}
}
/* compute q[0],q[1],...q[jk] */
{
let mut i = 0;
while i <= jk {
fw = SoftF64::ZERO;
let mut j = 0;
while j <= jx {
fw = fw.add(x[j].mul(f[jx + i - j]));
j += 1;
}
q[i] = fw;
i += 1;
}
}
let mut jz = jk;
'recompute: loop {
/* distill q[] into iq[] reversingly */
let mut i = 0i32;
z = q[jz];
{
let mut j = jz;
while j >= 1 {
fw = SoftF64((x1p_24.mul(z)).0 as i32 as f64);
iq[i as usize] = z.sub(x1p24.mul(fw)).0 as i32;
z = q[j - 1].add(fw);
i += 1;
j -= 1;
}
}
/* compute n */
z = scalbn(z, q0); /* actual value of z */
z = z.sub(SoftF64(8.0).mul(z.mul(SoftF64(0.125)).floor())); /* trim off integer >= 8 */
n = z.0 as i32;
z = z.sub(SoftF64(n as f64));
ih = 0;
if q0 > 0 {
/* need iq[jz-1] to determine n */
i = iq[jz - 1] >> (24 - q0);
n += i;
iq[jz - 1] = iq[jz - 1] - (i << (24 - q0));
ih = iq[jz - 1] >> (23 - q0);
} else if q0 == 0 {
ih = iq[jz - 1] >> 23;
} else if ge(z, SoftF64(0.5)) {
ih = 2;
}
if ih > 0 {
/* q > 0.5 */
n += 1;
let mut carry = 0i32;
{
let mut i = 0;
while i < jz {
/* compute 1-q */
let j = iq[i];
if carry == 0 {
if j != 0 {
carry = 1;
iq[i] = 0x1000000 - j;
}
} else {
iq[i] = 0xffffff - j;
}
i += 1;
}
}
if q0 > 0 {
/* rare case: chance is 1 in 12 */
match q0 {
1 => {
iq[jz - 1] &= 0x7fffff;
}
2 => {
iq[jz - 1] &= 0x3fffff;
}
_ => {}
}
}
if ih == 2 {
z = SoftF64::ONE.sub(z);
if carry != 0 {
z = z.sub(scalbn(SoftF64::ONE, q0));
}
}
}
/* check if recomputation is needed */
if eq(z, SoftF64::ZERO) {
let mut j = 0;
{
let mut i = jz - 1;
while i >= jk {
j |= iq[i];
i -= 1;
}
}
if j == 0 {
/* need recomputation */
let mut k = 1;
while iq[jk - k] == 0 {
k += 1; /* k = no. of terms needed */
}
{
let mut i = jz + 1;
while i <= jz + k {
/* add q[jz+1] to q[jz+k] */
f[jx + i] = SoftF64(IPIO2[jv + i] as f64);
fw = SoftF64::ZERO;
{
let mut j = 0;
while j <= jx {
fw = fw.add(x[j].mul(f[jx + i - j]));
j += 1;
}
}
q[i] = fw;
i += 1;
}
}
jz += k;
continue 'recompute;
}
}
break;
}
/* chop off zero terms */
if eq(z, SoftF64::ZERO) {
jz -= 1;
q0 -= 24;
while iq[jz] == 0 {
jz -= 1;
q0 -= 24;
}
} else {
/* break z into 24-bit if necessary */
z = scalbn(z, -q0);
if ge(z, x1p24) {
fw = SoftF64(x1p_24.mul(z).0 as i32 as f64);
iq[jz] = z.sub(x1p24.mul(fw)).0 as i32;
jz += 1;
q0 += 24;
iq[jz] = fw.0 as i32;
} else {
iq[jz] = z.0 as i32;
}
}
/* convert integer "bit" chunk to floating-point value */
fw = scalbn(SoftF64::ONE, q0);
{
let mut i = jz;
while i != usize::MAX {
q[i] = fw.mul(SoftF64(iq[i] as f64));
fw = fw.mul(x1p_24);
i = i.wrapping_sub(1);
}
}
/* compute PIo2[0,...,jp]*q[jz,...,0] */
{
let mut i = jz;
while i != usize::MAX {
fw = SoftF64::ZERO;
let mut k = 0;
while (k <= jp) && (k <= jz - i) {
fw = fw.add(PIO2[k].mul(q[i + k]));
k += 1;
}
fq[jz - i] = fw;
i = i.wrapping_sub(1);
}
}
/* compress fq[] into y[] */
match prec {
0 => {
fw = SoftF64::ZERO;
{
let mut i = jz;
while i != usize::MAX {
fw = fw.add(fq[i]);
i = i.wrapping_sub(1);
}
}
y[0] = if ih == 0 { fw } else { fw.neg() };
}
1 | 2 => {
fw = SoftF64::ZERO;
{
let mut i = jz;
while i != usize::MAX {
fw = fw.add(fq[i]);
i = i.wrapping_sub(1);
}
}
// TODO: drop excess precision here once double_t is used
fw = SoftF64(fw.0 as f64);
y[0] = if ih == 0 { fw } else { fw.neg() };
fw = fq[0].sub(fw);
{
let mut i = 1;
while i <= jz {
fw = fw.add(fq[i]);
i += 1;
}
}
y[1] = if ih == 0 { fw } else { fw.neg() };
}
3 => {
/* painful */
{
let mut i = jz;
while i >= 1 {
fw = fq[i - 1].add(fq[i]);
fq[i] = fq[i].add(fq[i - 1].sub(fw));
fq[i - 1] = fw;
i -= 1;
}
}
{
let mut i = jz;
while i >= 2 {
fw = fq[i - 1].add(fq[i]);
fq[i] = fq[i].add(fq[i - 1].sub(fw));
fq[i - 1] = fw;
i -= 1;
}
}
fw = SoftF64::ZERO;
let mut i = jz;
while i >= 2 {
fw = fw.add(fq[i]);
i -= 1;
}
if ih == 0 {
y[0] = fq[0];
y[1] = fq[1];
y[2] = fw;
} else {
y[0] = fq[0].neg();
y[1] = fq[1].neg();
y[2] = fw.neg();
}
}
_ => unreachable!(),
};
(n & 7, y)
}

View File

@@ -0,0 +1,34 @@
use crate::soft_f64::SoftF64;
pub(crate) const fn scalbn(x: SoftF64, mut n: i32) -> SoftF64 {
let x1p1023 = SoftF64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
let x1p53 = SoftF64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
let x1p_1022 = SoftF64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
let mut y = x;
if n > 1023 {
y = y.mul(x1p1023);
n -= 1023;
if n > 1023 {
y = y.mul(x1p1023);
n -= 1023;
if n > 1023 {
n = 1023;
}
}
} else if n < -1022 {
/* make sure final n < -53 to avoid double
rounding in the subnormal range */
y = y.mul(x1p_1022.mul(x1p53));
n += 1022 - 53;
if n < -1022 {
y = y.mul(x1p_1022.mul(x1p53));
n += 1022 - 53;
if n < -1022 {
n = -1022;
}
}
}
y.mul(SoftF64::from_bits(((0x3ff + n) as u64) << 52))
}

View File

@@ -0,0 +1,68 @@
use crate::soft_f64::SoftF64;
type F = SoftF64;
impl From<f64> for F {
fn from(value: f64) -> Self {
F::from_f64(value)
}
}
impl PartialEq<Self> for F {
fn eq(&self, other: &Self) -> bool {
match self.cmp(*other) {
Some(core::cmp::Ordering::Equal) => true,
_ => false,
}
}
}
impl PartialOrd for F {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
self.cmp(*other)
}
}
impl core::ops::Add for F {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
F::add(self, rhs)
}
}
impl core::ops::Sub for F {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
F::sub(self, rhs)
}
}
impl core::ops::Mul for F {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
F::mul(self, rhs)
}
}
impl core::ops::Div for F {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
F::div(self, rhs)
}
}
impl core::ops::AddAssign for F {
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl core::ops::SubAssign for F {
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}

View File

@@ -0,0 +1,178 @@
pub(crate) mod helpers;
pub mod add;
pub mod cmp;
pub mod copysign;
pub mod cos;
pub mod div;
pub mod floor;
pub mod mul;
pub mod pow;
pub mod round;
pub mod sin;
pub mod sqrt;
pub mod trunc;
#[cfg(feature = "const_trait_impl")]
pub mod const_impl_trait;
#[cfg(feature = "const_trait_impl")]
pub use const_impl_trait as impl_trait;
#[cfg(not(feature = "const_trait_impl"))]
pub mod impl_trait;
#[derive(Default, Copy, Clone)]
#[repr(transparent)]
pub struct SoftF64(pub f64);
impl SoftF64 {
pub const fn from_f64(a: f64) -> Self {
Self(a)
}
pub const fn to_f64(self) -> f64 {
self.0
}
pub const fn from_bits(a: u64) -> Self {
Self(unsafe { core::mem::transmute(a) })
}
pub const fn to_bits(self) -> u64 {
unsafe { core::mem::transmute(self.0) }
}
pub const fn add(self, rhs: Self) -> Self {
add::add(self, rhs)
}
pub const fn mul(self, rhs: Self) -> Self {
mul::mul(self, rhs)
}
pub const fn div(self, rhs: Self) -> Self {
div::div(self, rhs)
}
pub const fn cmp(self, rhs: Self) -> Option<core::cmp::Ordering> {
cmp::cmp(self, rhs)
}
pub const fn neg(self) -> Self {
Self::from_repr(self.repr() ^ Self::SIGN_MASK)
}
pub const fn sub(self, rhs: Self) -> Self {
self.add(rhs.neg())
}
pub const fn sqrt(self) -> Self {
sqrt::sqrt(self)
}
pub const fn powi(self, n: i32) -> Self {
pow::pow(self, n)
}
pub const fn copysign(self, other: Self) -> Self {
copysign::copysign(self, other)
}
pub const fn trunc(self) -> Self {
trunc::trunc(self)
}
pub const fn round(self) -> Self {
round::round(self)
}
pub const fn floor(self) -> Self {
floor::floor(self)
}
pub const fn sin(self) -> Self {
sin::sin(self)
}
pub const fn cos(self) -> Self {
cos::cos(self)
}
}
type SelfInt = u64;
type SelfSignedInt = i64;
type SelfExpInt = i16;
#[allow(unused)]
impl SoftF64 {
const ZERO: Self = Self(0.0);
const ONE: Self = Self(1.0);
const BITS: u32 = 64;
const SIGNIFICAND_BITS: u32 = 52;
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
const SIGN_MASK: SelfInt = 1 << (Self::BITS - 1);
const SIGNIFICAND_MASK: SelfInt = (1 << Self::SIGNIFICAND_BITS) - 1;
const IMPLICIT_BIT: SelfInt = 1 << Self::SIGNIFICAND_BITS;
const EXPONENT_MASK: SelfInt = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
const fn repr(self) -> SelfInt {
self.to_bits()
}
const fn signed_repr(self) -> SelfSignedInt {
self.to_bits() as SelfSignedInt
}
const fn sign(self) -> bool {
self.signed_repr() < 0
}
const fn exp(self) -> SelfExpInt {
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as SelfExpInt
}
const fn frac(self) -> SelfInt {
self.to_bits() & Self::SIGNIFICAND_MASK
}
const fn imp_frac(self) -> SelfInt {
self.frac() | Self::IMPLICIT_BIT
}
const fn from_repr(a: SelfInt) -> Self {
Self::from_bits(a)
}
const fn from_parts(sign: bool, exponent: SelfInt, significand: SelfInt) -> Self {
Self::from_repr(
((sign as SelfInt) << (Self::BITS - 1))
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
| (significand & Self::SIGNIFICAND_MASK),
)
}
const fn normalize(significand: SelfInt) -> (i32, SelfInt) {
let shift = significand
.leading_zeros()
.wrapping_sub((1u64 << Self::SIGNIFICAND_BITS).leading_zeros());
(
1i32.wrapping_sub(shift as i32),
significand << shift as SelfInt,
)
}
const fn is_subnormal(self) -> bool {
(self.repr() & Self::EXPONENT_MASK) == 0
}
const fn scalbn(self, n: i32) -> SoftF64 {
helpers::scalbn(self, n)
}
}
const fn u128_lo(x: u128) -> u64 {
x as u64
}
const fn u128_hi(x: u128) -> u64 {
(x >> 64) as u64
}
const fn u64_widen_mul(a: u64, b: u64) -> (u64, u64) {
let x = u128::wrapping_mul(a as _, b as _);
(u128_lo(x), u128_hi(x))
}

View File

@@ -0,0 +1,194 @@
use crate::soft_f64::{u64_widen_mul, SoftF64};
type F = SoftF64;
type FInt = u64;
const fn widen_mul(a: FInt, b: FInt) -> (FInt, FInt) {
u64_widen_mul(a, b)
}
pub(crate) const fn mul(a: F, b: F) -> F {
let one: FInt = 1;
let zero: FInt = 0;
let bits = F::BITS;
let significand_bits = F::SIGNIFICAND_BITS;
let max_exponent = F::EXPONENT_MAX;
let exponent_bias = F::EXPONENT_BIAS;
let implicit_bit = F::IMPLICIT_BIT;
let significand_mask = F::SIGNIFICAND_MASK;
let sign_bit = F::SIGN_MASK as FInt;
let abs_mask = sign_bit - one;
let exponent_mask = F::EXPONENT_MASK;
let inf_rep = exponent_mask;
let quiet_bit = implicit_bit >> 1;
let qnan_rep = exponent_mask | quiet_bit;
let exponent_bits = F::EXPONENT_BITS;
let a_rep = a.repr();
let b_rep = b.repr();
let a_exponent = (a_rep >> significand_bits) & max_exponent as FInt;
let b_exponent = (b_rep >> significand_bits) & max_exponent as FInt;
let product_sign = (a_rep ^ b_rep) & sign_bit;
let mut a_significand = a_rep & significand_mask;
let mut b_significand = b_rep & significand_mask;
let mut scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if a_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
|| b_exponent.wrapping_sub(one) >= (max_exponent - 1) as FInt
{
let a_abs = a_rep & abs_mask;
let b_abs = b_rep & abs_mask;
// NaN + anything = qNaN
if a_abs > inf_rep {
return F::from_repr(a_rep | quiet_bit);
}
// anything + NaN = qNaN
if b_abs > inf_rep {
return F::from_repr(b_rep | quiet_bit);
}
if a_abs == inf_rep {
if b_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(a_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
if b_abs == inf_rep {
if a_abs != zero {
// infinity * non-zero = +/- infinity
return F::from_repr(b_abs | product_sign);
} else {
// infinity * zero = NaN
return F::from_repr(qnan_rep);
}
}
// zero * anything = +/- zero
if a_abs == zero {
return F::from_repr(product_sign);
}
// anything * zero = +/- zero
if b_abs == zero {
return F::from_repr(product_sign);
}
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if a_abs < implicit_bit {
let (exponent, significand) = F::normalize(a_significand);
scale += exponent;
a_significand = significand;
}
if b_abs < implicit_bit {
let (exponent, significand) = F::normalize(b_significand);
scale += exponent;
b_significand = significand;
}
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
a_significand |= implicit_bit;
b_significand |= implicit_bit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
let (mut product_low, mut product_high) =
widen_mul(a_significand, b_significand << exponent_bits);
let a_exponent_i32: i32 = a_exponent as _;
let b_exponent_i32: i32 = b_exponent as _;
let mut product_exponent: i32 = a_exponent_i32
.wrapping_add(b_exponent_i32)
.wrapping_add(scale)
.wrapping_sub(exponent_bias as i32);
// Normalize the significand, adjust exponent if needed.
if (product_high & implicit_bit) != zero {
product_exponent = product_exponent.wrapping_add(1);
} else {
product_high = (product_high << 1) | (product_low >> (bits - 1));
product_low <<= 1;
}
// If we have overflowed the type, return +/- infinity.
if product_exponent >= max_exponent as i32 {
return F::from_repr(inf_rep | product_sign);
}
if product_exponent <= 0 {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
let shift = one.wrapping_sub(product_exponent as FInt) as u32;
if shift >= bits {
return F::from_repr(product_sign);
}
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of productLo.
if shift < bits {
let sticky = product_low << (bits - shift);
product_low = product_high << (bits - shift) | product_low >> shift | sticky;
product_high >>= shift;
} else if shift < (2 * bits) {
let sticky = product_high << (2 * bits - shift) | product_low;
product_low = product_high >> (shift - bits) | sticky;
product_high = zero;
} else {
product_high = zero;
}
} else {
// Result is normal before rounding; insert the exponent.
product_high &= significand_mask;
product_high |= (product_exponent as FInt) << significand_bits;
}
// Insert the sign of the result:
product_high |= product_sign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if product_low > sign_bit {
product_high += one;
}
if product_low == sign_bit {
product_high += product_high & one;
}
F::from_repr(product_high)
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(2.0).mul(SoftF64(2.0)).0, 4.0)
}
}

View File

@@ -0,0 +1,37 @@
use crate::abs_diff;
use crate::soft_f64::SoftF64;
type F = SoftF64;
pub(crate) const fn pow(a: F, b: i32) -> F {
let mut a = a;
let recip = b < 0;
let mut pow = abs_diff(b, 0);
let mut mul = F::ONE;
loop {
if (pow & 1) != 0 {
mul = mul.mul(a);
}
pow >>= 1;
if pow == 0 {
break;
}
a = a.mul(a);
}
if recip {
F::ONE.div(mul)
} else {
mul
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(SoftF64(2.0).powi(2).0, 4.0)
}
}

View File

@@ -0,0 +1,31 @@
use super::SoftF64;
pub(crate) const fn round(x: SoftF64) -> SoftF64 {
SoftF64::trunc(x.add(SoftF64::copysign(
SoftF64(0.5).sub(SoftF64(0.25).mul(SoftF64(f64::EPSILON))),
x,
)))
}
#[cfg(test)]
mod tests {
use super::SoftF64;
#[test]
fn negative_zero() {
assert_eq!(
SoftF64::round(SoftF64(-0.0)).to_bits(),
SoftF64(-0.0).to_bits()
);
}
#[test]
fn sanity_check() {
assert_eq!((SoftF64(-1.0)).round().0, -1.0);
assert_eq!((SoftF64(2.8)).round().0, 3.0);
assert_eq!((SoftF64(-0.5)).round().0, -1.0);
assert_eq!((SoftF64(0.5)).round().0, 1.0);
assert_eq!((SoftF64(-1.5)).round().0, -2.0);
assert_eq!((SoftF64(1.5)).round().0, 2.0);
}
}

View File

@@ -0,0 +1,98 @@
// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c, https://github.com/rust-lang/libm/blob/4c8a973741c014b11ce7f1477693a3e5d4ef9609/src/math/sin.rs */
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
use super::{
helpers::{k_cos, k_sin, rem_pio2},
SoftF64,
};
// sin(x)
// Return sine function of x.
//
// kernel function:
// k_sin ... sine function on [-pi/4,pi/4]
// k_cos ... cose function on [-pi/4,pi/4]
// rem_pio2 ... argument reduction routine
//
// Method.rounded
// Let S,C and T denote the sin, cos and tan respectively on
// [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
// in [-pi/4 , +pi/4], and let n = k mod 4.
// We have
//
// n sin(x) cos(x) tan(x)
// ----------------------------------------------------------
// 0 S C T
// 1 C -S -1/T
// 2 -S -C T
// 3 -C S -1/T
// ----------------------------------------------------------
//
// Special cases:
// Let trig be any of sin, cos, or tan.
// trig(+-INF) is NaN, with signals;
// trig(NaN) is that NaN;
//
// Accuracy:
// TRIG(x) returns trig(x) nearly rounded
pub(crate) const fn sin(x: SoftF64) -> SoftF64 {
let x1p120 = SoftF64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
/* High word of x. */
let ix = (SoftF64::to_bits(x) >> 32) as u32 & 0x7fffffff;
/* |x| ~< pi/4 */
if ix <= 0x3fe921fb {
if ix < 0x3e500000 {
/* |x| < 2**-26 */
/* raise inexact if x != 0 and underflow if subnormal*/
if ix < 0x00100000 {
x.div(x1p120);
} else {
x.add(x1p120);
}
return x;
}
return k_sin(x, SoftF64::ZERO, 0);
}
/* sin(Inf or NaN) is NaN */
if ix >= 0x7ff00000 {
return x.sub(x);
}
/* argument reduction needed */
let (n, y0, y1) = rem_pio2(x);
match n & 3 {
0 => k_sin(y0, y1, 1),
1 => k_cos(y0, y1),
2 => k_sin(y0, y1, 1).neg(),
_ => k_cos(y0, y1).neg(),
}
}
#[cfg(test)]
mod test {
use crate::soft_f64::SoftF64;
#[test]
fn test_near_pi() {
let x = SoftF64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
let sx = SoftF64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
let result = x.sin().0;
assert_eq!(result, sx.0);
}
#[test]
fn test_large_neg() {
assert_eq!(SoftF64(-1647101.0).sin().to_f64(), (-1647101.0_f64).sin())
}
}

View File

@@ -0,0 +1,239 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
/* sqrt(x)
* Return correctly rounded sqrt.
* ------------------------------------------
* | Use the hardware sqrt if you have one |
* ------------------------------------------
* Method:
* Bit by bit method using integer arithmetic. (Slow, but portable)
* 1. Normalization
* Scale x to y in [1,4) with even powers of 2:
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
* sqrt(x) = 2^k * sqrt(y)
* 2. Bit by bit computation
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
* i 0
* i+1 2
* s = 2*q , and y = 2 * ( y - q ). (1)
* i i i i
*
* To compute q from q , one checks whether
* i+1 i
*
* -(i+1) 2
* (q + 2 ) <= y. (2)
* i
* -(i+1)
* If (2) is false, then q = q ; otherwise q = q + 2 .
* i+1 i i+1 i
*
* With some algebraic manipulation, it is not difficult to see
* that (2) is equivalent to
* -(i+1)
* s + 2 <= y (3)
* i i
*
* The advantage of (3) is that s and y can be computed by
* i i
* the following recurrence formula:
* if (3) is false
*
* s = s , y = y ; (4)
* i+1 i i+1 i
*
* otherwise,
* -i -(i+1)
* s = s + 2 , y = y - s - 2 (5)
* i+1 i i+1 i i
*
* One may easily use induction to prove (4) and (5).
* Note. Since the left hand side of (3) contain only i+2 bits,
* it does not necessary to do a full (53-bit) comparison
* in (3).
* 3. Final rounding
* After generating the 53 bits result, we compute one more bit.
* Together with the remainder, we can decide whether the
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
* (it will never equal to 1/2ulp).
* The rounding mode can be detected by checking whether
* huge + tiny is equal to huge, and whether huge - tiny is
* equal to huge for some floating point number "huge" and "tiny".
*
* Special cases:
* sqrt(+-0) = +-0 ... exact
* sqrt(inf) = inf
* sqrt(-ve) = NaN ... with invalid signal
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
*/
use crate::soft_f64::{
helpers::{ge, gt},
SoftF64,
};
type F = SoftF64;
pub(crate) const fn sqrt(x: F) -> F {
const TINY: F = SoftF64(1.0e-300);
let mut z: F;
let sign: u32 = 0x80000000;
let mut ix0: i32;
let mut s0: i32;
let mut q: i32;
let mut m: i32;
let mut t: i32;
let mut i: i32;
let mut r: u32;
let mut t1: u32;
let mut s1: u32;
let mut ix1: u32;
let mut q1: u32;
ix0 = (x.to_bits() >> 32) as i32;
ix1 = x.to_bits() as u32;
/* take care of Inf and NaN */
if (ix0 & 0x7ff00000) == 0x7ff00000 {
return x.mul(x).add(x); /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
}
/* take care of zero */
if ix0 <= 0 {
if ((ix0 & !(sign as i32)) | ix1 as i32) == 0 {
return x; /* sqrt(+-0) = +-0 */
}
if ix0 < 0 {
return (x.sub(x)).div(x.sub(x)); /* sqrt(-ve) = sNaN */
}
}
/* normalize x */
m = ix0 >> 20;
if m == 0 {
/* subnormal x */
while ix0 == 0 {
m -= 21;
ix0 |= (ix1 >> 11) as i32;
ix1 <<= 21;
}
i = 0;
while (ix0 & 0x00100000) == 0 {
i += 1;
ix0 <<= 1;
}
m -= i - 1;
ix0 |= (ix1 as usize >> (32 - i) as usize) as i32;
ix1 = ix1 << i as usize;
}
m -= 1023; /* unbias exponent */
ix0 = (ix0 & 0x000fffff) | 0x00100000;
if (m & 1) == 1 {
/* odd m, double x to make it even */
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
}
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
q = 0; /* [q,q1] = sqrt(x) */
q1 = 0;
s0 = 0;
s1 = 0;
r = 0x00200000; /* r = moving bit from right to left */
while r != 0 {
t = s0 + r as i32;
if t <= ix0 {
s0 = t + r as i32;
ix0 -= t;
q += r as i32;
}
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
r >>= 1;
}
r = sign;
while r != 0 {
t1 = s1.wrapping_add(r);
t = s0;
if t < ix0 || (t == ix0 && t1 <= ix1) {
s1 = t1.wrapping_add(r);
if (t1 & sign) == sign && (s1 & sign) == 0 {
s0 += 1;
}
ix0 -= t;
if ix1 < t1 {
ix0 -= 1;
}
ix1 = ix1.wrapping_sub(t1);
q1 += r;
}
ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
ix1 = ix1.wrapping_add(ix1);
r >>= 1;
}
/* use floating add to find out rounding direction */
if (ix0 as u32 | ix1) != 0 {
z = SoftF64(1.0).sub(TINY); /* raise inexact flag */
if ge(z, SoftF64::ONE) {
z = SoftF64::ONE.add(TINY);
if q1 == 0xffffffff {
q1 = 0;
q += 1;
} else if gt(z, SoftF64::ONE) {
if q1 == 0xfffffffe {
q += 1;
}
q1 = q1.wrapping_add(2);
} else {
q1 += q1 & 1;
}
}
}
ix0 = (q >> 1) + 0x3fe00000;
ix1 = q1 >> 1;
if (q & 1) == 1 {
ix1 |= sign;
}
ix0 += m << 20;
SoftF64::from_bits((ix0 as u64) << 32 | ix1 as u64)
}
#[cfg(test)]
mod tests {
use super::*;
// use core::f64::*;
#[test]
fn sanity_check() {
const SQRT_100: SoftF64 = sqrt(SoftF64(100.0));
assert_eq!(SQRT_100.0, 10.0);
const SQRT_4: SoftF64 = sqrt(SoftF64(4.0));
assert_eq!(SQRT_4.0, 2.0);
}
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
#[test]
fn spec_tests() {
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrt(SoftF64(-1.0)).0.is_nan());
assert!(sqrt(SoftF64(f64::NAN)).0.is_nan());
for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
assert_eq!(sqrt(SoftF64(f)).0, f);
}
}
}

View File

@@ -0,0 +1,29 @@
use super::SoftF64;
pub(crate) const fn trunc(x: SoftF64) -> SoftF64 {
let mut i: u64 = x.to_bits();
let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12;
if e >= 52 + 12 {
return x;
}
if e < 12 {
e = 1;
}
let m = -1i64 as u64 >> e;
if (i & m) == 0 {
return x;
}
i &= !m;
SoftF64::from_bits(i)
}
#[cfg(test)]
mod tests {
use crate::soft_f64::SoftF64;
#[test]
fn sanity_check() {
assert_eq!(super::trunc(SoftF64(1.1)).0, 1.0);
}
}