Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

589
vendor/portable-atomic/src/cfgs.rs vendored Normal file
View File

@@ -0,0 +1,589 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
#![allow(missing_docs)]
#[cfg(not(all(
portable_atomic_no_atomic_load_store,
not(any(
target_arch = "avr",
target_arch = "msp430",
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
)),
)))]
#[macro_use]
mod atomic_8_16_macros {
#[macro_export]
macro_rules! cfg_has_atomic_8 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_8 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_has_atomic_16 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_16 {
($($tt:tt)*) => {};
}
}
#[cfg(all(
portable_atomic_no_atomic_load_store,
not(any(
target_arch = "avr",
target_arch = "msp430",
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
)),
))]
#[macro_use]
mod atomic_8_16_macros {
#[macro_export]
macro_rules! cfg_has_atomic_8 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_8 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_has_atomic_16 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_16 {
($($tt:tt)*) => {
$($tt)*
};
}
}
#[cfg(all(
any(not(target_pointer_width = "16"), feature = "fallback"),
not(all(
portable_atomic_no_atomic_load_store,
not(any(
target_arch = "avr",
target_arch = "msp430",
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
)),
)),
))]
#[macro_use]
mod atomic_32_macros {
#[macro_export]
macro_rules! cfg_has_atomic_32 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_32 {
($($tt:tt)*) => {};
}
}
#[cfg(not(all(
any(not(target_pointer_width = "16"), feature = "fallback"),
not(all(
portable_atomic_no_atomic_load_store,
not(any(
target_arch = "avr",
target_arch = "msp430",
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
)),
)),
)))]
#[macro_use]
mod atomic_32_macros {
#[macro_export]
macro_rules! cfg_has_atomic_32 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_32 {
($($tt:tt)*) => {
$($tt)*
};
}
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(any(
all(
feature = "fallback",
any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
),
),
not(portable_atomic_no_atomic_64),
not(any(target_pointer_width = "16", target_pointer_width = "32")),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
all(
feature = "fallback",
any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
),
),
target_has_atomic = "64",
not(any(target_pointer_width = "16", target_pointer_width = "32")),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
))
)]
#[macro_use]
mod atomic_64_macros {
#[macro_export]
macro_rules! cfg_has_atomic_64 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_64 {
($($tt:tt)*) => {};
}
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(any(
all(
feature = "fallback",
any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
),
),
not(portable_atomic_no_atomic_64),
not(any(target_pointer_width = "16", target_pointer_width = "32")),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
)))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(not(any(
all(
feature = "fallback",
any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
),
),
target_has_atomic = "64",
not(any(target_pointer_width = "16", target_pointer_width = "32")),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
)))
)]
#[macro_use]
mod atomic_64_macros {
#[macro_export]
macro_rules! cfg_has_atomic_64 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_64 {
($($tt:tt)*) => {
$($tt)*
};
}
}
#[cfg_attr(
not(feature = "fallback"),
cfg(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
all(
target_arch = "x86_64",
not(all(
any(miri, portable_atomic_sanitize_thread),
portable_atomic_no_cmpxchg16b_intrinsic,
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
),
all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
),
),
all(
target_arch = "s390x",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
))
)]
#[cfg_attr(
all(feature = "fallback", portable_atomic_no_cfg_target_has_atomic),
cfg(any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))
)]
#[cfg_attr(
all(feature = "fallback", not(portable_atomic_no_cfg_target_has_atomic)),
cfg(any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))
)]
#[macro_use]
mod atomic_128_macros {
#[macro_export]
macro_rules! cfg_has_atomic_128 {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_128 {
($($tt:tt)*) => {};
}
}
#[cfg_attr(
not(feature = "fallback"),
cfg(not(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
all(
target_arch = "x86_64",
not(all(
any(miri, portable_atomic_sanitize_thread),
portable_atomic_no_cmpxchg16b_intrinsic,
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
),
all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
),
),
all(
target_arch = "s390x",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
)))
)]
#[cfg_attr(
all(feature = "fallback", portable_atomic_no_cfg_target_has_atomic),
cfg(not(any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
)))
)]
#[cfg_attr(
all(feature = "fallback", not(portable_atomic_no_cfg_target_has_atomic)),
cfg(not(any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
)))
)]
#[macro_use]
mod atomic_128_macros {
#[macro_export]
macro_rules! cfg_has_atomic_128 {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_128 {
($($tt:tt)*) => {
$($tt)*
};
}
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))
)]
#[macro_use]
mod atomic_cas_macros {
#[macro_export]
macro_rules! cfg_has_atomic_cas {
($($tt:tt)*) => {
$($tt)*
};
}
#[macro_export]
macro_rules! cfg_no_atomic_cas {
($($tt:tt)*) => {};
}
// private
macro_rules! cfg_has_atomic_cas_or_amo32 {
($($tt:tt)*) => {
$($tt)*
};
}
macro_rules! cfg_has_atomic_cas_or_amo8 {
($($tt:tt)*) => {
$($tt)*
};
}
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(any(
not(portable_atomic_no_atomic_cas),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
)))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(not(any(
target_has_atomic = "ptr",
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
)))
)]
#[macro_use]
mod atomic_cas_macros {
#[macro_export]
macro_rules! cfg_has_atomic_cas {
($($tt:tt)*) => {};
}
#[macro_export]
macro_rules! cfg_no_atomic_cas {
($($tt:tt)*) => {
$($tt)*
};
}
// private
#[cfg_attr(
any(target_arch = "riscv32", target_arch = "riscv64"),
cfg(not(any(target_feature = "zaamo", portable_atomic_target_feature = "zaamo")))
)]
macro_rules! cfg_has_atomic_cas_or_amo32 {
($($tt:tt)*) => {};
}
#[cfg_attr(
any(target_arch = "riscv32", target_arch = "riscv64"),
cfg(not(any(target_feature = "zaamo", portable_atomic_target_feature = "zaamo")))
)]
macro_rules! cfg_no_atomic_cas_or_amo32 {
($($tt:tt)*) => {
$($tt)*
};
}
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
any(target_feature = "zaamo", portable_atomic_target_feature = "zaamo"),
))]
macro_rules! cfg_has_atomic_cas_or_amo32 {
($($tt:tt)*) => {
$($tt)*
};
}
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
any(target_feature = "zaamo", portable_atomic_target_feature = "zaamo"),
))]
macro_rules! cfg_no_atomic_cas_or_amo32 {
($($tt:tt)*) => {};
}
#[cfg_attr(
any(target_arch = "riscv32", target_arch = "riscv64"),
cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))
)]
#[allow(unused_macros)]
macro_rules! cfg_has_atomic_cas_or_amo8 {
($($tt:tt)*) => {};
}
#[cfg_attr(
any(target_arch = "riscv32", target_arch = "riscv64"),
cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))
)]
#[cfg_attr(target_arch = "bpf", allow(unused_macros))]
macro_rules! cfg_no_atomic_cas_or_amo8 {
($($tt:tt)*) => {
$($tt)*
};
}
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
any(target_feature = "zabha", portable_atomic_target_feature = "zabha"),
))]
macro_rules! cfg_has_atomic_cas_or_amo8 {
($($tt:tt)*) => {
$($tt)*
};
}
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
any(target_feature = "zabha", portable_atomic_target_feature = "zabha"),
))]
macro_rules! cfg_no_atomic_cas_or_amo8 {
($($tt:tt)*) => {};
}
}
// Check that all cfg_ macros work.
mod check {
crate::cfg_has_atomic_8! { type _Atomic8 = (); }
crate::cfg_no_atomic_8! { type _Atomic8 = (); }
crate::cfg_has_atomic_16! { type _Atomic16 = (); }
crate::cfg_no_atomic_16! { type _Atomic16 = (); }
crate::cfg_has_atomic_32! { type _Atomic32 = (); }
crate::cfg_no_atomic_32! { type _Atomic32 = (); }
crate::cfg_has_atomic_64! { type _Atomic64 = (); }
crate::cfg_no_atomic_64! { type _Atomic64 = (); }
crate::cfg_has_atomic_128! { type _Atomic128 = (); }
crate::cfg_no_atomic_128! { type _Atomic128 = (); }
crate::cfg_has_atomic_ptr! { type _AtomicPtr = (); }
crate::cfg_no_atomic_ptr! { type _AtomicPtr = (); }
crate::cfg_has_atomic_cas! { type __AtomicPtr = (); }
crate::cfg_no_atomic_cas! { type __AtomicPtr = (); }
#[allow(unused_imports)]
use self::{
__AtomicPtr as _, _Atomic8 as _, _Atomic16 as _, _Atomic32 as _, _Atomic64 as _,
_Atomic128 as _, _AtomicPtr as _,
};
}

76
vendor/portable-atomic/src/gen/build.rs vendored Normal file
View File

@@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// This file is @generated by no_atomic.sh.
// It is not intended for manual editing.
// Note: This is the list as of nightly-2022-02-10. We don't refer to this in
// nightly-2022-02-11+ because feature(cfg_target_has_atomic) stabilized.
#[rustfmt::skip]
pub(crate) static NO_ATOMIC_CAS: &[&str] = &[
"avr-unknown-gnu-atmega328",
"bpfeb-unknown-none",
"bpfel-unknown-none",
"msp430-none-elf",
"riscv32i-unknown-none-elf",
"riscv32imc-unknown-none-elf",
"thumbv4t-none-eabi",
"thumbv6m-none-eabi",
];
// Note: This is the list as of nightly-2022-02-10. We don't refer to this in
// nightly-2022-02-11+ because feature(cfg_target_has_atomic) stabilized.
#[rustfmt::skip]
pub(crate) static NO_ATOMIC_64: &[&str] = &[
"arm-linux-androideabi",
"armebv7r-none-eabi",
"armebv7r-none-eabihf",
"armv4t-unknown-linux-gnueabi",
"armv5te-unknown-linux-gnueabi",
"armv5te-unknown-linux-musleabi",
"armv5te-unknown-linux-uclibceabi",
"armv6k-nintendo-3ds",
"armv7r-none-eabi",
"armv7r-none-eabihf",
"avr-unknown-gnu-atmega328",
"hexagon-unknown-linux-musl",
"m68k-unknown-linux-gnu",
"mips-unknown-linux-gnu",
"mips-unknown-linux-musl",
"mips-unknown-linux-uclibc",
"mipsel-sony-psp",
"mipsel-unknown-linux-gnu",
"mipsel-unknown-linux-musl",
"mipsel-unknown-linux-uclibc",
"mipsel-unknown-none",
"mipsisa32r6-unknown-linux-gnu",
"mipsisa32r6el-unknown-linux-gnu",
"msp430-none-elf",
"powerpc-unknown-freebsd",
"powerpc-unknown-linux-gnu",
"powerpc-unknown-linux-gnuspe",
"powerpc-unknown-linux-musl",
"powerpc-unknown-netbsd",
"powerpc-unknown-openbsd",
"powerpc-wrs-vxworks",
"powerpc-wrs-vxworks-spe",
"riscv32gc-unknown-linux-gnu",
"riscv32gc-unknown-linux-musl",
"riscv32i-unknown-none-elf",
"riscv32imac-unknown-none-elf",
"riscv32imc-esp-espidf",
"riscv32imc-unknown-none-elf",
"thumbv4t-none-eabi",
"thumbv6m-none-eabi",
"thumbv7em-none-eabi",
"thumbv7em-none-eabihf",
"thumbv7m-none-eabi",
"thumbv8m.base-none-eabi",
"thumbv8m.main-none-eabi",
"thumbv8m.main-none-eabihf",
];
#[rustfmt::skip]
pub(crate) static NO_ATOMIC: &[&str] = &[
"bpfeb-unknown-none",
"bpfel-unknown-none",
"mipsel-sony-psx",
];

153
vendor/portable-atomic/src/gen/utils.rs vendored Normal file
View File

@@ -0,0 +1,153 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// This file is @generated by target_spec.sh.
// It is not intended for manual editing.
#![allow(unused_macros)]
// On AArch64, the base register of memory-related instructions must be 64-bit.
// Passing a 32-bit value to `in(reg)` on AArch64 results in the upper bits
// having an undefined value, but to work correctly with ILP32 ABI, the upper
// bits must be zero, which is handled here by casting to u64. Another way to
// handle this is to pass it as a pointer and clear the upper bits inside asm,
// but it is easier to overlook than cast, which can catch overlooks by
// asm_sub_register lint.
// See also https://github.com/ARM-software/abi-aa/blob/2024Q3/aapcs64/aapcs64.rst#pointers
//
// Except for x86_64, which can use 32-bit registers in the destination operand
// (on x86_64, we use the ptr_modifier macro to handle this), we need to do the
// same for ILP32 ABI on other 64-bit architectures. (At least, as far as I can
// see from the assembly generated by LLVM, this is also required for MIPS64 N32
// ABI. I don't know about the RISC-V RV64ILP32* ABI, but in any case, this
// should be a safe default for such ABIs).
//
// Known architectures that have such ABI are x86_64 (X32), AArch64 (ILP32),
// mips64 (N32), and riscv64 (RV64ILP32*). (As of 2025-01-23, only the former
// two are supported by rustc.) However, we list all known 64-bit architectures
// because similar ABIs may exist or future added for other architectures.
#[cfg(all(
target_pointer_width = "32",
any(
target_arch = "aarch64",
target_arch = "amdgpu",
target_arch = "arm64ec",
target_arch = "bpf",
target_arch = "loongarch64",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "nvptx64",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc64",
target_arch = "wasm64",
target_arch = "x86_64",
),
))]
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
macro_rules! ptr_reg {
($ptr:ident) => {{
let _: *const _ = $ptr; // ensure $ptr is a pointer (*mut _ or *const _)
#[cfg(not(portable_atomic_no_asm_maybe_uninit))]
#[allow(clippy::ptr_as_ptr)]
{
// If we cast to u64 here, the provenance will be lost,
// so we convert to MaybeUninit<u64> via zero extend helper.
crate::utils::zero_extend64_ptr($ptr as *mut ())
}
#[cfg(portable_atomic_no_asm_maybe_uninit)]
{
// Use cast on old rustc because it does not support MaybeUninit
// registers. This is still permissive-provenance compatible and
// is sound.
$ptr as u64
}
}};
}
#[cfg(not(all(
target_pointer_width = "32",
any(
target_arch = "aarch64",
target_arch = "amdgpu",
target_arch = "arm64ec",
target_arch = "bpf",
target_arch = "loongarch64",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "nvptx64",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc64",
target_arch = "wasm64",
target_arch = "x86_64",
),
)))]
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
macro_rules! ptr_reg {
($ptr:ident) => {{
let _: *const _ = $ptr; // ensure $ptr is a pointer (*mut _ or *const _)
$ptr // cast is unnecessary here.
}};
}
// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
// AArch64 ILP32 ABI, MIPS64 N32 ABI). On those targets, AtomicU64 is available
// and fast, so use it to implement normal sequence lock.
//
// See ptr_reg macro for the reason why all known 64-bit architectures are listed.
#[cfg(any(
not(any(target_pointer_width = "16", target_pointer_width = "32")), // i.e., 64-bit or greater
target_arch = "aarch64",
target_arch = "amdgpu",
target_arch = "arm64ec",
target_arch = "bpf",
target_arch = "loongarch64",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "nvptx64",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc64",
target_arch = "wasm64",
target_arch = "x86_64",
))]
#[macro_use]
mod fast_atomic_64_macros {
macro_rules! cfg_has_fast_atomic_64 {
($($tt:tt)*) => {
$($tt)*
};
}
macro_rules! cfg_no_fast_atomic_64 {
($($tt:tt)*) => {};
}
}
#[cfg(not(any(
not(any(target_pointer_width = "16", target_pointer_width = "32")), // i.e., 64-bit or greater
target_arch = "aarch64",
target_arch = "amdgpu",
target_arch = "arm64ec",
target_arch = "bpf",
target_arch = "loongarch64",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "nvptx64",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc64",
target_arch = "wasm64",
target_arch = "x86_64",
)))]
#[macro_use]
mod fast_atomic_64_macros {
macro_rules! cfg_has_fast_atomic_64 {
($($tt:tt)*) => {};
}
macro_rules! cfg_no_fast_atomic_64 {
($($tt:tt)*) => {
$($tt)*
};
}
}

View File

@@ -0,0 +1,39 @@
# 128-bit atomic implementations on 64-bit architectures
(See the [`atomic64` module](../atomic64) for 64-bit atomic implementations on 32-bit architectures.)
## 128-bit atomic instructions
Here is the table of targets that support 128-bit atomics and the instructions used:
| target_arch | load | store | CAS | RMW | note |
| ----------- | ---- | ----- | --- | --- | ---- |
| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | Requires `cmpxchg16b` target feature (enabled by default on Apple, Windows (except Windows 7, since Rust 1.78), and Fuchsia (since Rust 1.87) targets). vmovdqa requires Intel, AMD, or Zhaoxin CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ |
| aarch64/arm64ec | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires `lse` target feature, ldp/stp requires `lse2` target feature, ldiapp/stilp requires `lse2` and `rcpc3` target features, swpp/ldclrp/ldsetp requires `lse128` target feature. <br> Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ (aarch64) / 1.84+ (arm64ec) |
| riscv64 | amocas.q | amocas.q | amocas.q | amocas.q | Requires `zacas` target feature. Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ |
| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires `quadword-atomics` target feature (enabled by default on powerpc64le). Both compile-time and run-time detection are supported. <br> Requires nightly |
| s390x | lpq | stpq | cdsg | cdsg | Requires rustc 1.84+ |
| loongarch64 | sc.q | sc.q | sc.q | sc.q | Unimplemented. Requires `scq` target feature. |
| mips64r6 | lldp | lldp/scdp | lldp/scdp | lldp/scdp | Unimplemented (unsupported in LLVM). Requires Release 6 Paired LL/SC family of instructions |
| nvptx64 | ld.b128 | st.b128 | atom.cas.b128 | atom.exch.b128/atom.cas.b128 | Unimplemented. Requires `ptx83` and `sm_90`. |
On compiler versions or platforms where these are not supported, the fallback implementation is used.
See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on AArch64.
## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128)
This directory has target-specific implementations with inline assembly ([x86_64.rs](x86_64.rs), [aarch64.rs](aarch64.rs), [riscv64.rs](riscv64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly.
Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example:
- On x86_64 and AArch64, implementation with inline assembly contains additional optimizations (e.g., [#16](https://github.com/taiki-e/portable-atomic/pull/16), [#126](https://github.com/taiki-e/portable-atomic/pull/126)) and is much faster for some operations.
- On AArch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance.
- On powerpc64, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations.
- In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation.
As 128-bit atomics-related APIs stabilize in the standard library, implementations with inline assembly are planned to be updated to get the benefits of both.
## Run-time CPU feature detection
See the [`detect` module's readme](../detect/README.md) for run-time CPU feature detection.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,506 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementation without inline assembly.
Adapted from https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs.
Note: This module is currently only enabled on Miri and ThreadSanitizer which
do not support inline assembly.
This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and
`core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x.
See README.md of this directory for performance comparison with the
implementation with inline assembly.
Note:
- This currently needs Rust 1.70 on x86_64, otherwise nightly compilers.
- On powerpc64, this requires LLVM 15+ and quadword-atomics target feature:
https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
- On s390x, old LLVM (pre-18) generates libcalls for operations other than load/store/cmpxchg:
https://github.com/llvm/llvm-project/commit/c568927f3e2e7d9804ea74ecbf11c16c014ddcbc
- On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test)
(on cfg(miri)/cfg(sanitize) it may be fine though)
- On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max:
https://github.com/llvm/llvm-project/issues/68390
- On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add)
- On riscv64, LLVM does not automatically use 128-bit atomic instructions even if zacas feature is
enabled, because doing it changes the ABI. (If the ability to do that is provided by LLVM in the
future, it should probably be controlled by another ABI feature similar to forced-atomics.)
*/
include!("macros.rs");
#[allow(dead_code)] // we only use compare_exchange.
#[cfg(target_arch = "x86_64")]
#[cfg(not(target_feature = "cmpxchg16b"))]
#[path = "../fallback/outline_atomics.rs"]
mod fallback;
#[cfg(target_arch = "x86_64")]
#[cfg(not(target_feature = "cmpxchg16b"))]
#[path = "../detect/x86_64.rs"]
mod detect;
#[cfg(not(target_arch = "x86_64"))]
use core::intrinsics;
use core::sync::atomic::Ordering::{self, AcqRel, Acquire, Relaxed, Release, SeqCst};
#[cfg(target_arch = "x86_64")]
#[inline]
fn strongest_failure_ordering(order: Ordering) -> Ordering {
match order {
Release | Relaxed => Relaxed,
SeqCst => SeqCst,
Acquire | AcqRel => Acquire,
_ => unreachable!(),
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
#[cfg(target_arch = "x86_64")]
// SAFETY: the caller must uphold the safety contract.
unsafe {
let fail_order = strongest_failure_ordering(order);
match atomic_compare_exchange(src, 0, 0, order, fail_order) {
Ok(v) | Err(v) => v,
}
}
#[cfg(not(target_arch = "x86_64"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_load_acquire(src),
Relaxed => intrinsics::atomic_load_relaxed(src),
SeqCst => intrinsics::atomic_load_seqcst(src),
_ => unreachable!(),
}
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
#[cfg(target_arch = "x86_64")]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_swap(dst, val, order);
}
#[cfg(not(target_arch = "x86_64"))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Release => intrinsics::atomic_store_release(dst, val),
Relaxed => intrinsics::atomic_store_relaxed(dst, val),
SeqCst => intrinsics::atomic_store_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> Result<u128, u128> {
#[cfg(target_arch = "x86_64")]
let (val, ok) = {
#[target_feature(enable = "cmpxchg16b")]
#[cfg_attr(target_feature = "cmpxchg16b", inline)]
#[cfg_attr(not(target_feature = "cmpxchg16b"), inline(never))]
unsafe fn cmpxchg16b(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> (u128, bool) {
debug_assert!(dst as usize % 16 == 0);
#[cfg(not(target_feature = "cmpxchg16b"))]
{
debug_assert!(detect::detect().cmpxchg16b());
}
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned (required by CMPXCHG16B), that there are no
// concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
let prev = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) };
(prev, prev == old)
}
#[cfg(target_feature = "cmpxchg16b")]
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, that there are no concurrent non-atomic operations,
// and cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
cmpxchg16b(dst, old, new, success, failure)
}
#[cfg(not(target_feature = "cmpxchg16b"))]
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
unsafe {
ifunc!(unsafe fn(
dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering
) -> (u128, bool) {
if detect::detect().cmpxchg16b() {
cmpxchg16b
} else {
fallback::atomic_compare_exchange
}
})
}
};
#[cfg(not(target_arch = "x86_64"))]
// SAFETY: the caller must uphold the safety contract.
let (val, ok) = unsafe {
match (success, failure) {
(Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new),
(Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new),
(Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new),
(Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new),
(Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new),
(Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new),
(Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new),
(Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new),
(Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new),
(AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new),
(AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new),
(AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new),
(SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new),
(SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new),
(SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new),
_ => unreachable!(),
}
};
if ok { Ok(val) } else { Err(val) }
}
#[cfg(target_arch = "x86_64")]
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
#[cfg(not(target_arch = "x86_64"))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_compare_exchange_weak(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> Result<u128, u128> {
// SAFETY: the caller must uphold the safety contract.
let (val, ok) = unsafe {
match (success, failure) {
(Relaxed, Relaxed) => intrinsics::atomic_cxchgweak_relaxed_relaxed(dst, old, new),
(Relaxed, Acquire) => intrinsics::atomic_cxchgweak_relaxed_acquire(dst, old, new),
(Relaxed, SeqCst) => intrinsics::atomic_cxchgweak_relaxed_seqcst(dst, old, new),
(Acquire, Relaxed) => intrinsics::atomic_cxchgweak_acquire_relaxed(dst, old, new),
(Acquire, Acquire) => intrinsics::atomic_cxchgweak_acquire_acquire(dst, old, new),
(Acquire, SeqCst) => intrinsics::atomic_cxchgweak_acquire_seqcst(dst, old, new),
(Release, Relaxed) => intrinsics::atomic_cxchgweak_release_relaxed(dst, old, new),
(Release, Acquire) => intrinsics::atomic_cxchgweak_release_acquire(dst, old, new),
(Release, SeqCst) => intrinsics::atomic_cxchgweak_release_seqcst(dst, old, new),
(AcqRel, Relaxed) => intrinsics::atomic_cxchgweak_acqrel_relaxed(dst, old, new),
(AcqRel, Acquire) => intrinsics::atomic_cxchgweak_acqrel_acquire(dst, old, new),
(AcqRel, SeqCst) => intrinsics::atomic_cxchgweak_acqrel_seqcst(dst, old, new),
(SeqCst, Relaxed) => intrinsics::atomic_cxchgweak_seqcst_relaxed(dst, old, new),
(SeqCst, Acquire) => intrinsics::atomic_cxchgweak_seqcst_acquire(dst, old, new),
(SeqCst, SeqCst) => intrinsics::atomic_cxchgweak_seqcst_seqcst(dst, old, new),
_ => unreachable!(),
}
};
if ok { Ok(val) } else { Err(val) }
}
#[inline(always)]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
where
F: FnMut(u128) -> u128,
{
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is a private function and all instances of `f` only operate on the value
// loaded, so there is no need to synchronize the first load/failed CAS.
let mut prev = atomic_load(dst, Ordering::Relaxed);
loop {
let next = f(prev);
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
// On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics.
// - On s390x, old LLVM (pre-18) generates libcalls for operations other than load/store/cmpxchg (see also module-level comment).
#[cfg(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18)))]
atomic_rmw_by_atomic_update!();
// On powerpc64, LLVM doesn't support 128-bit atomic min/max (see also module-level comment).
#[cfg(target_arch = "powerpc64")]
atomic_rmw_by_atomic_update!(cmp);
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_xchg_acquire(dst, val),
Release => intrinsics::atomic_xchg_release(dst, val),
AcqRel => intrinsics::atomic_xchg_acqrel(dst, val),
Relaxed => intrinsics::atomic_xchg_relaxed(dst, val),
SeqCst => intrinsics::atomic_xchg_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_xadd_acquire(dst, val),
Release => intrinsics::atomic_xadd_release(dst, val),
AcqRel => intrinsics::atomic_xadd_acqrel(dst, val),
Relaxed => intrinsics::atomic_xadd_relaxed(dst, val),
SeqCst => intrinsics::atomic_xadd_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_xsub_acquire(dst, val),
Release => intrinsics::atomic_xsub_release(dst, val),
AcqRel => intrinsics::atomic_xsub_acqrel(dst, val),
Relaxed => intrinsics::atomic_xsub_relaxed(dst, val),
SeqCst => intrinsics::atomic_xsub_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_and_acquire(dst, val),
Release => intrinsics::atomic_and_release(dst, val),
AcqRel => intrinsics::atomic_and_acqrel(dst, val),
Relaxed => intrinsics::atomic_and_relaxed(dst, val),
SeqCst => intrinsics::atomic_and_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_nand_acquire(dst, val),
Release => intrinsics::atomic_nand_release(dst, val),
AcqRel => intrinsics::atomic_nand_acqrel(dst, val),
Relaxed => intrinsics::atomic_nand_relaxed(dst, val),
SeqCst => intrinsics::atomic_nand_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_or_acquire(dst, val),
Release => intrinsics::atomic_or_release(dst, val),
AcqRel => intrinsics::atomic_or_acqrel(dst, val),
Relaxed => intrinsics::atomic_or_relaxed(dst, val),
SeqCst => intrinsics::atomic_or_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_xor_acquire(dst, val),
Release => intrinsics::atomic_xor_release(dst, val),
AcqRel => intrinsics::atomic_xor_acqrel(dst, val),
Relaxed => intrinsics::atomic_xor_relaxed(dst, val),
SeqCst => intrinsics::atomic_xor_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "powerpc64",
all(target_arch = "s390x", portable_atomic_pre_llvm_18),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_max_acquire(dst.cast::<i128>(), val as i128),
Release => intrinsics::atomic_max_release(dst.cast::<i128>(), val as i128),
AcqRel => intrinsics::atomic_max_acqrel(dst.cast::<i128>(), val as i128),
Relaxed => intrinsics::atomic_max_relaxed(dst.cast::<i128>(), val as i128),
SeqCst => intrinsics::atomic_max_seqcst(dst.cast::<i128>(), val as i128),
_ => unreachable!(),
}
}
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "powerpc64",
all(target_arch = "s390x", portable_atomic_pre_llvm_18),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_min_acquire(dst.cast::<i128>(), val as i128),
Release => intrinsics::atomic_min_release(dst.cast::<i128>(), val as i128),
AcqRel => intrinsics::atomic_min_acqrel(dst.cast::<i128>(), val as i128),
Relaxed => intrinsics::atomic_min_relaxed(dst.cast::<i128>(), val as i128),
SeqCst => intrinsics::atomic_min_seqcst(dst.cast::<i128>(), val as i128),
_ => unreachable!(),
}
}
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "powerpc64",
all(target_arch = "s390x", portable_atomic_pre_llvm_18),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_umax_acquire(dst, val),
Release => intrinsics::atomic_umax_release(dst, val),
AcqRel => intrinsics::atomic_umax_acqrel(dst, val),
Relaxed => intrinsics::atomic_umax_relaxed(dst, val),
SeqCst => intrinsics::atomic_umax_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "powerpc64",
all(target_arch = "s390x", portable_atomic_pre_llvm_18),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
match order {
Acquire => intrinsics::atomic_umin_acquire(dst, val),
Release => intrinsics::atomic_umin_release(dst, val),
AcqRel => intrinsics::atomic_umin_acqrel(dst, val),
Relaxed => intrinsics::atomic_umin_relaxed(dst, val),
SeqCst => intrinsics::atomic_umin_seqcst(dst, val),
_ => unreachable!(),
}
}
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_xor(dst, !0, order) }
}
#[cfg(not(any(target_arch = "x86_64", all(target_arch = "s390x", portable_atomic_pre_llvm_18))))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, u128::wrapping_neg) }
}
#[cfg(not(target_arch = "x86_64"))]
#[inline]
const fn is_lock_free() -> bool {
IS_ALWAYS_LOCK_FREE
}
#[cfg(not(target_arch = "x86_64"))]
const IS_ALWAYS_LOCK_FREE: bool = true;
#[cfg(target_arch = "x86_64")]
#[inline]
fn is_lock_free() -> bool {
#[cfg(target_feature = "cmpxchg16b")]
{
// CMPXCHG16B is available at compile-time.
true
}
#[cfg(not(target_feature = "cmpxchg16b"))]
{
detect::detect().cmpxchg16b()
}
}
#[cfg(target_arch = "x86_64")]
const IS_ALWAYS_LOCK_FREE: bool = cfg!(target_feature = "cmpxchg16b");
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}

View File

@@ -0,0 +1,349 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
macro_rules! atomic128 {
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
#[repr(C, align(16))]
pub(crate) struct $atomic_type {
v: core::cell::UnsafeCell<$int_type>,
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by atomic intrinsics.
unsafe impl Sync for $atomic_type {}
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: core::cell::UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
is_lock_free()
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = IS_ALWAYS_LOCK_FREE;
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_load(self.v.get().cast::<u128>(), order) as $int_type
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_store(self.v.get().cast::<u128>(), val as u128, order)
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_swap(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
match atomic_compare_exchange(
self.v.get().cast::<u128>(),
current as u128,
new as u128,
success,
failure,
) {
Ok(v) => Ok(v as $int_type),
Err(v) => Err(v as $int_type),
}
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
match atomic_compare_exchange_weak(
self.v.get().cast::<u128>(),
current as u128,
new as u128,
success,
failure,
) {
Ok(v) => Ok(v as $int_type),
Err(v) => Err(v as $int_type),
}
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_add(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_sub(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_and(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_nand(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_or(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_xor(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
$atomic_max(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
$atomic_min(self.v.get().cast::<u128>(), val as u128, order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_not(self.v.get().cast::<u128>(), order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_neg(self.v.get().cast::<u128>(), order) as $int_type
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
self.v.get()
}
}
};
}
#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))]
#[allow(unused_macros)] // also used by intrinsics.rs
macro_rules! atomic_rmw_by_atomic_update {
() => {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |_| val) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| x & val) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| !(x & val)) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| x | val) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| x ^ val) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| !x) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, u128::wrapping_neg) }
}
atomic_rmw_by_atomic_update!(cmp);
};
(cmp) => {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_update(dst, order, |x| core::cmp::max(x as i128, val as i128) as u128)
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| core::cmp::max(x, val)) }
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_update(dst, order, |x| core::cmp::min(x as i128, val as i128) as u128)
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe { atomic_update(dst, order, |x| core::cmp::min(x, val)) }
}
};
}

View File

@@ -0,0 +1,131 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementations on 64-bit architectures
See README.md for details.
*/
// AArch64
#[cfg(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
))]
// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "intrinsics.rs")]
pub(super) mod aarch64;
// powerpc64
#[cfg(all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(
all(
target_os = "linux",
any(
all(
target_env = "gnu",
any(target_endian = "little", not(target_feature = "crt-static")),
),
all(
target_env = "musl",
any(not(target_feature = "crt-static"), feature = "std"),
),
target_env = "ohos",
all(target_env = "uclibc", not(target_feature = "crt-static")),
portable_atomic_outline_atomics,
),
),
target_os = "android",
all(
target_os = "freebsd",
any(
target_endian = "little",
not(target_feature = "crt-static"),
portable_atomic_outline_atomics,
),
),
target_os = "openbsd",
all(
target_os = "aix",
not(portable_atomic_pre_llvm_20),
any(test, portable_atomic_outline_atomics), // TODO(aix): currently disabled by default
),
),
not(any(miri, portable_atomic_sanitize_thread)),
),
),
))]
// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "intrinsics.rs")]
pub(super) mod powerpc64;
// riscv64
#[cfg(all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
))]
pub(super) mod riscv64;
// s390x
#[cfg(all(
target_arch = "s390x",
not(all(any(miri, portable_atomic_sanitize_thread), not(portable_atomic_atomic_intrinsics))),
not(portable_atomic_no_asm),
))]
// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "intrinsics.rs")]
pub(super) mod s390x;
// x86_64
#[cfg(all(
target_arch = "x86_64",
not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_no_cmpxchg16b_intrinsic)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
),
),
))]
// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "intrinsics.rs")]
pub(super) mod x86_64;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,624 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementation on riscv64.
This architecture provides the following 128-bit atomic instructions:
- amocas.q: CAS (Zacas extension)
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#risc-v
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use fallback implementation instead.
Refs:
- RISC-V Instruction Set Manual
"Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zacas.adoc
- RISC-V Atomics ABI Specification
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc
Generated asm:
- riscv64gc (+zacas) https://godbolt.org/z/c59a9fs63
*/
include!("macros.rs");
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[path = "../fallback/outline_atomics.rs"]
mod fallback;
#[cfg(not(portable_atomic_no_outline_atomics))]
#[cfg(any(test, not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))))]
#[cfg(any(target_os = "linux", target_os = "android"))]
#[path = "../detect/riscv_linux.rs"]
mod detect;
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::sync::atomic::Ordering;
use crate::utils::{Pair, U128};
macro_rules! debug_assert_zacas {
() => {
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
{
debug_assert!(detect::detect().zacas());
}
};
}
// `.option arch, +zacas` directive requires LLVM 20, so we use .4byte directive for old LLVM.
// Note that `.insn <value>` directive requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! start_zacas {
() => {
".option push\n.option arch, +zacas"
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! end_zacas {
() => {
".option pop"
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! atomic_rmw_amocas_order {
($op:ident, $order:ident) => {
atomic_rmw_amocas_order!($op, $order, failure = $order)
};
($op:ident, $order:ident, failure = $failure:ident) => {
match $order {
Ordering::Relaxed => $op!("", ""),
Ordering::Acquire => $op!("", ".aq"),
Ordering::Release => $op!("", ".rl"),
Ordering::AcqRel => $op!("", ".aqrl"),
Ordering::SeqCst if $failure == Ordering::SeqCst => $op!("fence rw,rw", ".aqrl"),
Ordering::SeqCst => $op!("", ".aqrl"),
_ => unreachable!(),
}
};
}
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! atomic_rmw_amocas_order_insn {
($op:ident, $order:ident) => {
atomic_rmw_amocas_order_insn!($op, $order, failure = $order)
};
($op:ident, $order:ident, failure = $failure:ident) => {
match $order {
Ordering::Relaxed => $op!("", "8"),
Ordering::Acquire => $op!("", "c"),
Ordering::Release => $op!("", "a"),
Ordering::AcqRel => $op!("", "e"),
Ordering::SeqCst if $failure == Ordering::SeqCst => $op!("fence rw,rw", "e"),
Ordering::SeqCst => $op!("", "e"),
_ => unreachable!(),
}
};
}
// If zacas is available at compile-time, we can always use zacas_fn.
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
use self::atomic_load_zacas as atomic_load;
// Otherwise, we need to do run-time detection and can use zacas_fn only if zacas is available.
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[inline]
unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(src: *mut u128) -> u128;
atomic_load_zacas_relaxed = atomic_load_zacas(Ordering::Relaxed);
atomic_load_zacas_acquire = atomic_load_zacas(Ordering::Acquire);
atomic_load_zacas_seqcst = atomic_load_zacas(Ordering::SeqCst);
}
// SAFETY: the caller must uphold the safety contract.
// we only calls atomic_load_zacas if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
if detect::detect().zacas() {
atomic_load_zacas_relaxed
} else {
fallback::atomic_load_non_seqcst
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
if detect::detect().zacas() {
atomic_load_zacas_acquire
} else {
fallback::atomic_load_non_seqcst
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
if detect::detect().zacas() {
atomic_load_zacas_seqcst
} else {
fallback::atomic_load_seqcst
}
})
}
_ => unreachable!(),
}
}
}
#[inline]
unsafe fn atomic_load_zacas(src: *mut u128, order: Ordering) -> u128 {
debug_assert!(src as usize % 16 == 0);
debug_assert_zacas!();
let (out_lo, out_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! load {
($fence:tt, $asm_order:tt) => {
asm!(
start_zacas!(),
$fence, // fence
concat!("amocas.q", $asm_order, " a2, a2, 0({src})"), // atomic { if *dst == a2:a3 { *dst = a2:a3 } else { a2:a3 = *dst } }
end_zacas!(),
src = in(reg) ptr_reg!(src),
inout("a2") 0_u64 => out_lo,
inout("a3") 0_u64 => out_hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw_amocas_order!(load, order);
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! load {
($fence:tt, $insn_order:tt) => {
asm!(
$fence, // fence
// amocas.q{,.aq,.rl,.aqrl} a2, a2, (a0) // atomic { if *a0 == a2:a3 { *a0 = a2:a3 } else { a2:a3 = *a0 } }
concat!(".4byte 0x2", $insn_order, "c5462f"),
in("a0") ptr_reg!(src),
inout("a2") 0_u64 => out_lo,
inout("a3") 0_u64 => out_hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_amocas_order_insn!(load, order);
U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_swap(dst, val, order);
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> Result<u128, u128> {
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that zacas instructions are available at compile-time.
let (prev, ok) = unsafe { atomic_compare_exchange_zacas(dst, old, new, success, failure) };
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
let (prev, ok) = {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
zacas_relaxed_fn = atomic_compare_exchange_zacas(Ordering::Relaxed, Ordering::Relaxed);
zacas_acquire_fn = atomic_compare_exchange_zacas(Ordering::Acquire, Ordering::Acquire);
zacas_release_fn = atomic_compare_exchange_zacas(Ordering::Release, Ordering::Relaxed);
zacas_acqrel_fn = atomic_compare_exchange_zacas(Ordering::AcqRel, Ordering::Acquire);
zacas_seqcst_fn = atomic_compare_exchange_zacas(Ordering::SeqCst, Ordering::SeqCst);
}
let order = crate::utils::upgrade_success_ordering(success, failure);
// SAFETY: the caller must uphold the safety contract.
// we only calls atomic_compare_exchange_zacas if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().zacas() {
zacas_relaxed_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().zacas() {
zacas_acquire_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().zacas() {
zacas_release_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::AcqRel => {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().zacas() {
zacas_acqrel_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().zacas() {
zacas_seqcst_fn
} else {
fallback::atomic_compare_exchange_seqcst
}
})
}
_ => unreachable!(),
}
}
};
if ok { Ok(prev) } else { Err(prev) }
}
#[inline]
unsafe fn atomic_compare_exchange_zacas(
dst: *mut u128,
old: u128,
new: u128,
success: Ordering,
failure: Ordering,
) -> (u128, bool) {
debug_assert!(dst as usize % 16 == 0);
debug_assert_zacas!();
let order = crate::utils::upgrade_success_ordering(success, failure);
let old = U128 { whole: old };
let new = U128 { whole: new };
let (prev_lo, prev_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! cmpxchg {
($fence:tt, $asm_order:tt) => {
asm!(
start_zacas!(),
$fence, // fence
concat!("amocas.q", $asm_order, " a4, a2, 0({dst})"), // atomic { if *dst == a4:a5 { *dst = a2:a3 } else { a4:a5 = *dst } }
end_zacas!(),
dst = in(reg) ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
inout("a5") old.pair.hi => prev_hi,
// must be allocated to even/odd register pair
in("a2") new.pair.lo,
in("a3") new.pair.hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw_amocas_order!(cmpxchg, order, failure = failure);
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! cmpxchg {
($fence:tt, $insn_order:tt) => {
asm!(
$fence, // fence
// amocas.q{,.aq,.rl,.aqrl} a4, a2, (a0) // atomic { if *a0 == a4:a5 { *a0 = a2:a3 } else { a4:a5 = *a0 } }
concat!(".4byte 0x2", $insn_order, "c5472f"),
in("a0") ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
inout("a5") old.pair.hi => prev_hi,
// must be allocated to even/odd register pair
in("a2") new.pair.lo,
in("a3") new.pair.hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_amocas_order_insn!(cmpxchg, order, failure = failure);
let prev = U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole;
(prev, prev == old.whole)
}
}
// amocas is always strong.
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
// 128-bit atomic load by two 64-bit atomic loads.
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u128) -> u128 {
let (out_lo, out_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
asm!(
"ld {out_lo}, ({src})", // atomic { out_lo = *src }
"ld {out_hi}, 8({src})", // atomic { out_hi = *src.byte_add(8) }
src = in(reg) ptr_reg!(src),
out_lo = out(reg) out_lo,
out_hi = out(reg) out_hi,
options(pure, nostack, preserves_flags, readonly),
);
U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
macro_rules! select_atomic_rmw {
(
unsafe fn $name:ident(dst: *mut u128 $(, $($arg:tt)*)?) $(-> $ret_ty:ty)? {
|$zacas_fn_binding:ident| $($zacas_fn_body:tt)*
}
zacas = $zacas_fn:ident;
non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
seqcst_fallback = $seqcst_fallback_fn:ident;
) => {
#[inline]
unsafe fn $zacas_fn(dst: *mut u128 $(, $($arg)*)?, order: Ordering) $(-> $ret_ty)? {
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement byte_wise_atomic_load.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
let mut prev = byte_wise_atomic_load(dst);
loop {
let next = {
let $zacas_fn_binding = prev;
$($zacas_fn_body)*
};
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
// If zacas is available at compile-time, we can always use zacas_fn.
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
use self::$zacas_fn as $name;
// Otherwise, we need to do run-time detection and can use zacas_fn only if zacas is available.
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[inline]
unsafe fn $name(dst: *mut u128 $(, $($arg)*)?, order: Ordering) $(-> $ret_ty)? {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)?;
zacas_relaxed_fn = $zacas_fn(Ordering::Relaxed);
zacas_acquire_fn = $zacas_fn(Ordering::Acquire);
zacas_release_fn = $zacas_fn(Ordering::Release);
zacas_acqrel_fn = $zacas_fn(Ordering::AcqRel);
zacas_seqcst_fn = $zacas_fn(Ordering::SeqCst);
}
// SAFETY: the caller must uphold the safety contract.
// we only calls zacas_fn if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_relaxed_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_acquire_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_release_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::AcqRel => {
ifunc!(unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_acqrel_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_seqcst_fn
} else {
fallback::$seqcst_fallback_fn
}
})
}
_ => unreachable!(),
}
}
}
};
}
select_atomic_rmw! {
unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128 {
|_x| val
}
zacas = atomic_swap_zacas;
non_seqcst_fallback = atomic_swap_non_seqcst;
seqcst_fallback = atomic_swap_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128 {
|x| x.wrapping_add(val)
}
zacas = atomic_add_zacas;
non_seqcst_fallback = atomic_add_non_seqcst;
seqcst_fallback = atomic_add_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128 {
|x| x.wrapping_sub(val)
}
zacas = atomic_sub_zacas;
non_seqcst_fallback = atomic_sub_non_seqcst;
seqcst_fallback = atomic_sub_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128 {
|x| x & val
}
zacas = atomic_and_zacas;
non_seqcst_fallback = atomic_and_non_seqcst;
seqcst_fallback = atomic_and_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128 {
|x| !(x & val)
}
zacas = atomic_nand_zacas;
non_seqcst_fallback = atomic_nand_non_seqcst;
seqcst_fallback = atomic_nand_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128 {
|x| x | val
}
zacas = atomic_or_zacas;
non_seqcst_fallback = atomic_or_non_seqcst;
seqcst_fallback = atomic_or_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128 {
|x| x ^ val
}
zacas = atomic_xor_zacas;
non_seqcst_fallback = atomic_xor_non_seqcst;
seqcst_fallback = atomic_xor_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::max(x as i128, val as i128) as u128 }
}
}
zacas = atomic_max_zacas;
non_seqcst_fallback = atomic_max_non_seqcst;
seqcst_fallback = atomic_max_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128 {
|x| core::cmp::max(x, val)
}
zacas = atomic_umax_zacas;
non_seqcst_fallback = atomic_umax_non_seqcst;
seqcst_fallback = atomic_umax_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::min(x as i128, val as i128) as u128 }
}
}
zacas = atomic_min_zacas;
non_seqcst_fallback = atomic_min_non_seqcst;
seqcst_fallback = atomic_min_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128 {
|x| core::cmp::min(x, val)
}
zacas = atomic_umin_zacas;
non_seqcst_fallback = atomic_umin_non_seqcst;
seqcst_fallback = atomic_umin_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_not(dst: *mut u128) -> u128 {
|x| !x
}
zacas = atomic_not_zacas;
non_seqcst_fallback = atomic_not_non_seqcst;
seqcst_fallback = atomic_not_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_neg(dst: *mut u128) -> u128 {
|x| x.wrapping_neg()
}
zacas = atomic_neg_zacas;
non_seqcst_fallback = atomic_neg_non_seqcst;
seqcst_fallback = atomic_neg_seqcst;
}
#[inline]
fn is_lock_free() -> bool {
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
{
// zacas is available at compile-time.
true
}
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
{
detect::detect().zacas()
}
}
const IS_ALWAYS_LOCK_FREE: bool =
cfg!(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"));
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)]
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}

View File

@@ -0,0 +1,526 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementation on s390x.
This architecture provides the following 128-bit atomic instructions:
- LPQ/STPQ: load/store (arch1 or later, i.e., baseline)
- CDSG: CAS (arch1 or later, i.e., baseline)
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#s390x
LLVM's minimal supported architecture level is arch8 (z10):
https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/SystemZ/SystemZProcessors.td#L16-L17
This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use intrinsics.rs instead.
Refs:
- z/Architecture Principles of Operation, Fourteenth Edition (SA22-7832-13)
https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
- atomic-maybe-uninit
https://github.com/taiki-e/atomic-maybe-uninit
Generated asm:
- s390x https://godbolt.org/z/oPxYYEvPG
- s390x (z196) https://godbolt.org/z/M69KrKT7Y
- s390x (z15,-vector) https://godbolt.org/z/Wec8b3ada
- s390x (z15) https://godbolt.org/z/KxWcrbfYh
*/
include!("macros.rs");
use core::{arch::asm, sync::atomic::Ordering};
use crate::utils::{Pair, U128};
// bcr 14,0 requires fast-BCR-serialization facility added in arch9 (z196).
#[cfg(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
))]
macro_rules! serialization {
() => {
"bcr 14, 0"
};
}
#[cfg(not(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
)))]
macro_rules! serialization {
() => {
"bcr 15, 0"
};
}
// Use distinct operands on z196 or later, otherwise split to lgr and $op.
#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
};
}
// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
};
}
// Extracts and checks condition code.
#[inline]
fn extract_cc(r: i64) -> bool {
r.wrapping_add(-268435456) & (1 << 31) != 0
}
#[inline]
unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
debug_assert!(src as usize % 16 == 0);
let (out_hi, out_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic load is always SeqCst.
asm!(
"lpq %r0, 0({src})", // atomic { r0:r1 = *src }
src = in(reg) ptr_reg!(src),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") out_hi,
out("r1") out_lo,
options(nostack, preserves_flags),
);
U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
// SAFETY: the caller must uphold the safety contract.
unsafe {
macro_rules! atomic_store {
($acquire:expr) => {
asm!(
"stpq %r0, 0({dst})", // atomic { *dst = r0:r1 }
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
in("r0") val.pair.hi,
in("r1") val.pair.lo,
options(nostack, preserves_flags),
)
};
}
match order {
// Relaxed and Release stores are equivalent.
Ordering::Relaxed | Ordering::Release => atomic_store!(""),
Ordering::SeqCst => atomic_store!(serialization!()),
_ => unreachable!(),
}
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
_success: Ordering,
_failure: Ordering,
) -> Result<u128, u128> {
debug_assert!(dst as usize % 16 == 0);
let old = U128 { whole: old };
let new = U128 { whole: new };
let (prev_hi, prev_lo);
let r;
// SAFETY: the caller must uphold the safety contract.
let prev = unsafe {
// atomic CAS is always SeqCst.
asm!(
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:13 } else { cc = 1; r0:r1 = *dst } }
"ipm {r}", // r[:] = cc
dst = in(reg) ptr_reg!(dst),
r = lateout(reg) r,
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
inout("r0") old.pair.hi => prev_hi,
inout("r1") old.pair.lo => prev_lo,
in("r12") new.pair.hi,
in("r13") new.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
};
if extract_cc(r) { Ok(prev) } else { Err(prev) }
}
// cdsg is always strong.
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
// 128-bit atomic load by two 64-bit atomic loads.
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u128) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
let (out_hi, out_lo);
asm!(
"lg {out_hi}, 8({src})", // atomic { out_hi = *src.byte_add(8) }
"lg {out_lo}, 0({src})", // atomic { out_lo = *src }
src = in(reg) src,
out_hi = out(reg) out_hi,
out_lo = out(reg) out_lo,
options(pure, nostack, preserves_flags, readonly),
);
U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
}
}
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
#[inline(always)]
unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
where
F: FnMut(u128) -> u128,
{
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement byte_wise_atomic_load.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
let mut prev = byte_wise_atomic_load(dst);
loop {
let next = f(prev);
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
//
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
//
// Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
unsafe {
// atomic swap is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
in("r12") val.pair.hi,
in("r13") val.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
/// Atomic RMW by CAS loop (3 arguments)
/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - val_hi/val_lo pair: val argument (read-only for `$op`)
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_3 {
($name:ident, [$($reg:tt)*], $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
$($op)*
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
val_hi = in(reg) val.pair.hi,
val_lo = in(reg) val.pair.lo,
$($reg)*
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
/// Atomic RMW by CAS loop (2 arguments)
/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_2 {
($name:ident, [$($reg:tt)*], $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
let (mut prev_hi, mut prev_lo);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
asm!(
"lg %r0, 8({dst})", // atomic { r0 = *dst.byte_add(8) }
"lg %r1, 0({dst})", // atomic { r1 = *dst }
"2:", // 'retry:
$($op)*
"cdsg %r0, %r12, 0({dst})", // atomic { if *dst == r0:r1 { cc = 0; *dst = r12:r13 } else { cc = 1; r0:r1 = *dst } }
"jl 2b", // if cc == 1 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
$($reg)*
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
atomic_rmw_cas_3! {
atomic_add, [],
distinct_op!("algr", "%r13", "%r1", "{val_lo}"), // r13 = r1 + val_lo; cc = zero | carry
"lgr %r12, %r0", // r12 = r0
"alcgr %r12, {val_hi}", // r12 += val_hi + carry
}
atomic_rmw_cas_3! {
atomic_sub, [],
distinct_op!("slgr", "%r13", "%r1", "{val_lo}"), // r13 = r1 - val_lo; cc = zero | borrow
"lgr %r12, %r0", // r12 = r0
"slbgr %r12, {val_hi}", // r12 -= val_hi + borrow
}
atomic_rmw_cas_3! {
atomic_and, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), // r13 = r1 & val_lo
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), // r12 = r0 & val_hi
}
// Use nngrk on z15 or later.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
atomic_rmw_cas_3! {
atomic_nand, [],
"nngrk %r13, %r1, {val_lo}", // r13 = !(r1 & val_lo)
"nngrk %r12, %r0, {val_hi}", // r12 = !(r0 & val_hi)
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
atomic_rmw_cas_3! {
atomic_nand, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), // r13 = r1 & val_lo
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), // r12 = r0 & val_hi
"lcgr %r13, %r13", // r13 = !r13 + 1
"aghi %r13, -1", // r13 -= 1
"lcgr %r12, %r12", // r12 = !r12 + 1
"aghi %r12, -1", // r12 -= 1
}
atomic_rmw_cas_3! {
atomic_or, [],
distinct_op!("ogr", "%r13", "%r1", "{val_lo}"), // r13 = r1 | val_lo
distinct_op!("ogr", "%r12", "%r0", "{val_hi}"), // r12 = r0 | val_hi
}
atomic_rmw_cas_3! {
atomic_xor, [],
distinct_op!("xgr", "%r13", "%r1", "{val_lo}"), // r13 = r1 ^ val_lo
distinct_op!("xgr", "%r12", "%r0", "{val_hi}"), // r12 = r0 ^ val_hi
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_max, [],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("h", "%r12", "%r1", "{val_lo}"), // if cc == 2 { r12 = r1 } else { r12 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
select_op!("h", "%r13", "%r1", "{val_lo}"), // if cc == 2 { r13 = r1 } else { r13 = val_lo }
"locgre %r13, %r12", // if cc == 0 { r13 = r12 }
select_op!("h", "%r12", "%r0", "{val_hi}"), // if cc == 2 { r12 = r0 } else { r12 = val_hi }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umax, [tmp = out(reg) _,],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("h", "{tmp}", "%r1", "{val_lo}"), // if cc == 2 { tmp = r1 } else { tmp = val_lo }
"clgr %r0, {val_hi}", // if r0(u) < val_hi(u) { cc = 1 } else if r0(u) > val_hi(u) { cc = 2 } else { cc = 0 }
select_op!("h", "%r12", "%r0", "{val_hi}"), // if cc == 2 { r12 = r0 } else { r12 = val_hi }
select_op!("h", "%r13", "%r1", "{val_lo}"), // if cc == 2 { r13 = r1 } else { r13 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
"locgre %r13, {tmp}", // if cc == 0 { r13 = tmp }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_min, [],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("l", "%r12", "%r1", "{val_lo}"), // if cc == 1 { r12 = r1 } else { r12 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
select_op!("l", "%r13", "%r1", "{val_lo}"), // if cc == 1 { r13 = r1 } else { r13 = val_lo }
"locgre %r13, %r12", // if cc == 0 { r13 = r12 }
select_op!("l", "%r12", "%r0", "{val_hi}"), // if cc == 1 { r12 = r0 } else { r12 = val_hi }
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umin, [tmp = out(reg) _,],
"clgr %r1, {val_lo}", // if r1(u) < val_lo(u) { cc = 1 } else if r1(u) > val_lo(u) { cc = 2 } else { cc = 0 }
select_op!("l", "{tmp}", "%r1", "{val_lo}"), // if cc == 1 { tmp = r1 } else { tmp = val_lo }
"clgr %r0, {val_hi}", // if r0(u) < val_hi(u) { cc = 1 } else if r0(u) > val_hi(u) { cc = 2 } else { cc = 0 }
select_op!("l", "%r12", "%r0", "{val_hi}"), // if cc == 1 { r12 = r0 } else { r12 = val_hi }
select_op!("l", "%r13", "%r1", "{val_lo}"), // if cc == 1 { r13 = r1 } else { r13 = val_lo }
"cgr %r0, {val_hi}", // if r0(i) < val_hi(i) { cc = 1 } else if r0(i) > val_hi(i) { cc = 2 } else { cc = 0 }
"locgre %r13, {tmp}", // if cc == 0 { r13 = tmp }
}
// We use atomic_update for atomic min/max on pre-z196 because
// z10 doesn't seem to have a good way to implement 128-bit min/max.
// loc{,g}r requires z196 or later.
// https://godbolt.org/z/EqoMEP8b3
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
atomic_rmw_by_atomic_update!(cmp);
atomic_rmw_cas_2! {
atomic_not, [],
"lcgr %r13, %r1", // r13 = !r1 + 1
"aghi %r13, -1", // r13 -= 1
"lcgr %r12, %r0", // r12 = !r0 + 1
"aghi %r12, -1", // r12 -= 1
}
#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
atomic_rmw_cas_2! {
atomic_neg, [zero = in(reg) 0_u64,],
"slgrk %r13, {zero}, %r1", // r13 = 0 - r1; cc = zero | borrow
"lghi %r12, 0", // r12 = 0
"slbgr %r12, %r0", // r12 -= r0 + borrow
}
#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
atomic_rmw_cas_2! {
atomic_neg, [],
"lghi %r13, 0", // r13 = 0
"slgr %r13, %r1", // r13 -= r1; cc = zero | borrow
"lghi %r12, 0", // r12 = 0
"slbgr %r12, %r0", // r12 -= r0 + borrow
}
#[inline]
const fn is_lock_free() -> bool {
IS_ALWAYS_LOCK_FREE
}
const IS_ALWAYS_LOCK_FREE: bool = true;
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}

View File

@@ -0,0 +1,872 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
128-bit atomic implementation on x86_64.
This architecture provides the following 128-bit atomic instructions:
- CMPXCHG16B: CAS (CMPXCHG16B)
- VMOVDQA: load/store (Intel, AMD, or Zhaoxin CPU with AVX)
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use intrinsics.rs instead.
Refs:
- x86 and amd64 instruction reference https://www.felixcloutier.com/x86
- atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
Generated asm:
- x86_64 (+cmpxchg16b) https://godbolt.org/z/rfs1jxd51
*/
// TODO: use core::arch::x86_64::cmpxchg16b where available and efficient than asm
include!("macros.rs");
#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
#[path = "../fallback/outline_atomics.rs"]
mod fallback;
#[cfg(not(portable_atomic_no_outline_atomics))]
#[cfg(not(target_env = "sgx"))]
#[cfg_attr(
not(target_feature = "sse"),
cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))
)]
#[path = "../detect/x86_64.rs"]
mod detect;
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::sync::atomic::Ordering;
use crate::utils::{Pair, U128};
// Asserts that the function is called in the correct context.
macro_rules! debug_assert_cmpxchg16b {
() => {
#[cfg(not(any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
)))]
{
debug_assert!(detect::detect().cmpxchg16b());
}
};
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(target_feature = "sse")]
macro_rules! debug_assert_vmovdqa_atomic {
() => {{
debug_assert_cmpxchg16b!();
debug_assert!(detect::detect().vmovdqa_atomic());
}};
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(target_feature = "sse")]
#[cfg(target_pointer_width = "32")]
macro_rules! ptr_modifier {
() => {
":e"
};
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(target_feature = "sse")]
#[cfg(target_pointer_width = "64")]
macro_rules! ptr_modifier {
() => {
""
};
}
// Unlike AArch64 and RISC-V, x86's assembler doesn't check instruction
// requirements for the currently enabled target features. In the first place,
// there is no option in the x86 assembly for such case, like Arm .arch_extension,
// RISC-V .option arch, PowerPC .machine, etc.
// However, we set target_feature(enable) when available (Rust 1.69+) in case a
// new codegen backend is added that checks for it in the future, or an option
// is added to the assembler to check for it.
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
debug_assert!(dst as usize % 16 == 0);
debug_assert_cmpxchg16b!();
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned (required by CMPXCHG16B), that there are no
// concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
//
// If the value at `dst` (destination operand) and rdx:rax are equal, the
// 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at
// `dst` is loaded to rdx:rax.
//
// The ZF flag is set if the value at `dst` and rdx:rax are equal,
// otherwise it is cleared. Other flags are unaffected.
//
// Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
unsafe {
// cmpxchg16b is always SeqCst.
let r: u8;
let old = U128 { whole: old };
let new = U128 { whole: new };
let (prev_lo, prev_hi);
macro_rules! cmpxchg16b {
($rdi:tt) => {
asm!(
"xchg {rbx_tmp}, rbx", // save rbx which is reserved by LLVM
concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
"sete cl",
"mov rbx, {rbx_tmp}", // restore rbx
rbx_tmp = inout(reg) new.pair.lo => _,
in("rcx") new.pair.hi,
inout("rax") old.pair.lo => prev_lo,
inout("rdx") old.pair.hi => prev_hi,
in($rdi) dst,
lateout("cl") r,
// Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
options(nostack),
)
};
}
#[cfg(target_pointer_width = "32")]
cmpxchg16b!("edi");
#[cfg(target_pointer_width = "64")]
cmpxchg16b!("rdi");
crate::utils::assert_unchecked(r == 0 || r == 1); // needed to remove extra test
(U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
}
}
// VMOVDQA is atomic on Intel, AMD, and Zhaoxin CPUs with AVX.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
//
// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
//
// Use cfg(target_feature = "sse") here -- SSE is included in the x86_64
// baseline and is always available, but the SSE target feature is disabled for
// use cases such as kernels and firmware that should not use vector registers.
// So, do not use vector registers unless SSE target feature is enabled.
// See also https://github.com/rust-lang/rust/blob/1.84.0/src/doc/rustc/src/platform-support/x86_64-unknown-none.md.
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 {
debug_assert!(src as usize % 16 == 0);
debug_assert_vmovdqa_atomic!();
// SAFETY: the caller must uphold the safety contract.
//
// atomic load by vmovdqa is always SeqCst.
unsafe {
let out: core::arch::x86_64::__m128i;
asm!(
concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"),
src = in(reg) src,
out = out(xmm_reg) out,
options(nostack, preserves_flags),
);
core::mem::transmute(out)
}
}
#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
#[cfg(target_feature = "sse")]
#[target_feature(enable = "avx")]
#[inline]
unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) {
debug_assert!(dst as usize % 16 == 0);
debug_assert_vmovdqa_atomic!();
// SAFETY: the caller must uphold the safety contract.
unsafe {
let val: core::arch::x86_64::__m128i = core::mem::transmute(val);
match order {
// Relaxed and Release stores are equivalent.
Ordering::Relaxed | Ordering::Release => {
asm!(
concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
dst = in(reg) dst,
val = in(xmm_reg) val,
options(nostack, preserves_flags),
);
}
Ordering::SeqCst => {
let p = core::cell::UnsafeCell::new(core::mem::MaybeUninit::<u64>::uninit());
asm!(
concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
// Equivalent to mfence, but is up to 3.1x faster on Coffee Lake and up to 2.4x faster on Raptor Lake-H at least in simple cases.
// - https://github.com/taiki-e/portable-atomic/pull/156
// - LLVM uses lock or for x86_32 64-bit atomic SeqCst store using SSE https://godbolt.org/z/9sKEr8YWc
// - Windows uses xchg for x86_32 for MemoryBarrier https://learn.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-memorybarrier
// - MSVC STL uses lock inc https://github.com/microsoft/STL/pull/740
// - boost uses lock or https://github.com/boostorg/atomic/commit/559eba81af71386cedd99f170dc6101c6ad7bf22
concat!("xchg qword ptr [{p", ptr_modifier!(), "}], {tmp}"),
dst = in(reg) dst,
val = in(xmm_reg) val,
p = inout(reg) p.get() => _,
tmp = lateout(reg) _,
options(nostack, preserves_flags),
);
}
_ => unreachable!(),
}
}
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
)))]
macro_rules! load_store_detect {
(
vmovdqa = $vmovdqa:ident
cmpxchg16b = $cmpxchg16b:ident
fallback = $fallback:ident
) => {{
let cpuid = detect::detect();
#[cfg(not(any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
)))]
{
// Check CMPXCHG16B first to prevent mixing atomic and non-atomic access.
if cpuid.cmpxchg16b() {
// We only use VMOVDQA when SSE is enabled. See atomic_load_vmovdqa() for more.
#[cfg(target_feature = "sse")]
{
if cpuid.vmovdqa_atomic() { $vmovdqa } else { $cmpxchg16b }
}
#[cfg(not(target_feature = "sse"))]
{
$cmpxchg16b
}
} else {
fallback::$fallback
}
}
#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
{
if cpuid.vmovdqa_atomic() { $vmovdqa } else { $cmpxchg16b }
}
}};
}
#[inline]
unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
// We only use VMOVDQA when SSE is enabled. See atomic_load_vmovdqa() for more.
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
atomic_load_cmpxchg16b(src)
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
ifunc!(unsafe fn(src: *mut u128) -> u128 {
load_store_detect! {
vmovdqa = atomic_load_vmovdqa
cmpxchg16b = atomic_load_cmpxchg16b
// Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
fallback = atomic_load_seqcst
}
})
}
}
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
debug_assert!(src as usize % 16 == 0);
debug_assert_cmpxchg16b!();
// SAFETY: the caller must guarantee that `src` is valid for both writes and
// reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
// cfg guarantees that the CPU supports CMPXCHG16B.
//
// See cmpxchg16b function for more.
//
// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
// omitting the storing of condition flags and avoid use of xchg to handle rbx.
unsafe {
// cmpxchg16b is always SeqCst.
let (out_lo, out_hi);
macro_rules! cmpxchg16b {
($rdi:tt) => {
asm!(
"mov {rbx_tmp}, rbx", // save rbx which is reserved by LLVM
"xor rbx, rbx", // zeroed rbx
concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
"mov rbx, {rbx_tmp}", // restore rbx
// set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg)
rbx_tmp = out(reg) _,
in("rcx") 0_u64,
inout("rax") 0_u64 => out_lo,
inout("rdx") 0_u64 => out_hi,
in($rdi) src,
// Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
options(nostack),
)
};
}
#[cfg(target_pointer_width = "32")]
cmpxchg16b!("edi");
#[cfg(target_pointer_width = "64")]
cmpxchg16b!("rdi");
U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
// We only use VMOVDQA when SSE is enabled. See atomic_load_vmovdqa() for more.
// SGX doesn't support CPUID.
#[cfg(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that CMPXCHG16B is available at compile-time.
unsafe {
// cmpxchg16b is always SeqCst.
let _ = order;
atomic_store_cmpxchg16b(dst, val);
}
#[cfg(not(all(
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(target_feature = "sse")]
fn_alias! {
#[target_feature(enable = "avx")]
unsafe fn(dst: *mut u128, val: u128);
// atomic store by vmovdqa has at least release semantics.
atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
}
match order {
// Relaxed and Release stores are equivalent in all implementations
// that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
// core::arch's cmpxchg16b will never called here.
Ordering::Relaxed | Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_non_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_non_seqcst
}
});
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u128, val: u128) {
load_store_detect! {
vmovdqa = atomic_store_vmovdqa_seqcst
cmpxchg16b = atomic_store_cmpxchg16b
fallback = atomic_store_seqcst
}
});
}
_ => unreachable!(),
}
}
}
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
// cmpxchg16b is always SeqCst.
atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst);
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
_success: Ordering,
_failure: Ordering,
) -> Result<u128, u128> {
#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, that there are no concurrent non-atomic operations,
// and cfg guarantees that CMPXCHG16B is available at compile-time.
let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) };
#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
let (prev, ok) = unsafe {
ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
if detect::detect().cmpxchg16b() {
cmpxchg16b
} else {
// Use SeqCst because cmpxchg16b is always SeqCst.
fallback::atomic_compare_exchange_seqcst
}
})
};
if ok { Ok(prev) } else { Err(prev) }
}
// cmpxchg16b is always strong.
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
debug_assert_cmpxchg16b!();
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
// cfg guarantees that the CPU supports CMPXCHG16B.
//
// See cmpxchg16b function for more.
//
// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
//
// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
unsafe {
// cmpxchg16b is always SeqCst.
let val = U128 { whole: val };
let (mut prev_lo, mut prev_hi);
macro_rules! cmpxchg16b {
($rdi:tt) => {
asm!(
"xchg {rbx_tmp}, rbx", // save rbx which is reserved by LLVM
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// This is based on the code generated for the first load in DW RMWs by LLVM.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement this.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
concat!("mov rax, qword ptr [", $rdi, "]"),
concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
"2:",
concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
"jne 2b",
"mov rbx, {rbx_tmp}", // restore rbx
rbx_tmp = inout(reg) val.pair.lo => _,
in("rcx") val.pair.hi,
out("rax") prev_lo,
out("rdx") prev_hi,
in($rdi) dst,
// Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
options(nostack),
)
};
}
#[cfg(target_pointer_width = "32")]
cmpxchg16b!("edi");
#[cfg(target_pointer_width = "64")]
cmpxchg16b!("rdi");
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
/// Atomic RMW by CAS loop (3 arguments)
/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - rsi/r8 pair: val argument (read-only for `$op`)
/// - rax/rdx pair: previous value loaded (read-only for `$op`)
/// - rbx/rcx pair: new value that will be stored
// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
macro_rules! atomic_rmw_cas_3 {
($name:ident, $($op:tt)*) => {
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
debug_assert_cmpxchg16b!();
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
// cfg guarantees that the CPU supports CMPXCHG16B.
//
// See cmpxchg16b function for more.
unsafe {
// cmpxchg16b is always SeqCst.
let val = U128 { whole: val };
let (mut prev_lo, mut prev_hi);
macro_rules! cmpxchg16b {
($rdi:tt) => {
asm!(
"mov {rbx_tmp}, rbx", // save rbx which is reserved by LLVM
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// This is based on the code generated for the first load in DW RMWs by LLVM.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement this.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
concat!("mov rax, qword ptr [", $rdi, "]"),
concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
"2:",
$($op)*
concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
"jne 2b",
"mov rbx, {rbx_tmp}", // restore rbx
rbx_tmp = out(reg) _,
out("rcx") _,
out("rax") prev_lo,
out("rdx") prev_hi,
in($rdi) dst,
in("rsi") val.pair.lo,
in("r8") val.pair.hi,
// Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
options(nostack),
)
};
}
#[cfg(target_pointer_width = "32")]
cmpxchg16b!("edi");
#[cfg(target_pointer_width = "64")]
cmpxchg16b!("rdi");
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
};
}
/// Atomic RMW by CAS loop (2 arguments)
/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - rax/rdx pair: previous value loaded (read-only for `$op`)
/// - rbx/rcx pair: new value that will be stored
// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
// omitting the storing of condition flags and avoid use of xchg to handle rbx.
macro_rules! atomic_rmw_cas_2 {
($name:ident, $($op:tt)*) => {
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
#[inline]
unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
debug_assert_cmpxchg16b!();
// SAFETY: the caller must guarantee that `dst` is valid for both writes and
// reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
// cfg guarantees that the CPU supports CMPXCHG16B.
//
// See cmpxchg16b function for more.
unsafe {
// cmpxchg16b is always SeqCst.
let (mut prev_lo, mut prev_hi);
macro_rules! cmpxchg16b {
($rdi:tt) => {
asm!(
"mov {rbx_tmp}, rbx", // save rbx which is reserved by LLVM
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// This is based on the code generated for the first load in DW RMWs by LLVM.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement this.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
concat!("mov rax, qword ptr [", $rdi, "]"),
concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
"2:",
$($op)*
concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
"jne 2b",
"mov rbx, {rbx_tmp}", // restore rbx
rbx_tmp = out(reg) _,
out("rcx") _,
out("rax") prev_lo,
out("rdx") prev_hi,
in($rdi) dst,
// Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
options(nostack),
)
};
}
#[cfg(target_pointer_width = "32")]
cmpxchg16b!("edi");
#[cfg(target_pointer_width = "64")]
cmpxchg16b!("rdi");
U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
}
}
};
}
atomic_rmw_cas_3! {
atomic_add_cmpxchg16b,
"mov rbx, rax",
"add rbx, rsi",
"mov rcx, rdx",
"adc rcx, r8",
}
atomic_rmw_cas_3! {
atomic_sub_cmpxchg16b,
"mov rbx, rax",
"sub rbx, rsi",
"mov rcx, rdx",
"sbb rcx, r8",
}
atomic_rmw_cas_3! {
atomic_and_cmpxchg16b,
"mov rbx, rax",
"and rbx, rsi",
"mov rcx, rdx",
"and rcx, r8",
}
atomic_rmw_cas_3! {
atomic_nand_cmpxchg16b,
"mov rbx, rax",
"and rbx, rsi",
"not rbx",
"mov rcx, rdx",
"and rcx, r8",
"not rcx",
}
atomic_rmw_cas_3! {
atomic_or_cmpxchg16b,
"mov rbx, rax",
"or rbx, rsi",
"mov rcx, rdx",
"or rcx, r8",
}
atomic_rmw_cas_3! {
atomic_xor_cmpxchg16b,
"mov rbx, rax",
"xor rbx, rsi",
"mov rcx, rdx",
"xor rcx, r8",
}
atomic_rmw_cas_2! {
atomic_not_cmpxchg16b,
"mov rbx, rax",
"not rbx",
"mov rcx, rdx",
"not rcx",
}
atomic_rmw_cas_2! {
atomic_neg_cmpxchg16b,
"mov rbx, rax",
"neg rbx",
"mov rcx, 0",
"sbb rcx, rdx",
}
atomic_rmw_cas_3! {
atomic_max_cmpxchg16b,
"cmp rsi, rax",
"mov rcx, r8",
"sbb rcx, rdx",
"mov rcx, r8",
"cmovl rcx, rdx",
"mov rbx, rsi",
"cmovl rbx, rax",
}
atomic_rmw_cas_3! {
atomic_umax_cmpxchg16b,
"cmp rsi, rax",
"mov rcx, r8",
"sbb rcx, rdx",
"mov rcx, r8",
"cmovb rcx, rdx",
"mov rbx, rsi",
"cmovb rbx, rax",
}
atomic_rmw_cas_3! {
atomic_min_cmpxchg16b,
"cmp rsi, rax",
"mov rcx, r8",
"sbb rcx, rdx",
"mov rcx, r8",
"cmovge rcx, rdx",
"mov rbx, rsi",
"cmovge rbx, rax",
}
atomic_rmw_cas_3! {
atomic_umin_cmpxchg16b,
"cmp rsi, rax",
"mov rcx, r8",
"sbb rcx, rdx",
"mov rcx, r8",
"cmovae rcx, rdx",
"mov rbx, rsi",
"cmovae rbx, rax",
}
macro_rules! select_atomic_rmw {
(
unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
cmpxchg16b = $cmpxchg16b_fn:ident;
fallback = $seqcst_fallback_fn:ident;
) => {
// If cmpxchg16b is available at compile-time, we can always use cmpxchg16b_fn.
#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
use self::$cmpxchg16b_fn as $name;
// Otherwise, we need to do run-time detection and can use cmpxchg16b_fn only if cmpxchg16b is available.
#[cfg(not(any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
)))]
#[inline]
unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? {
fn_alias! {
// See cmpxchg16b() for target_feature(enable).
#[cfg_attr(
not(portable_atomic_no_cmpxchg16b_target_feature),
target_feature(enable = "cmpxchg16b")
)]
unsafe fn($($arg)*) $(-> $ret_ty)?;
// cmpxchg16b is always SeqCst.
cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst);
}
// SAFETY: the caller must uphold the safety contract.
// we only calls cmpxchg16b_fn if cmpxchg16b is available.
unsafe {
ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
if detect::detect().cmpxchg16b() {
cmpxchg16b_seqcst_fn
} else {
// Use SeqCst because cmpxchg16b is always SeqCst.
fallback::$seqcst_fallback_fn
}
})
}
}
};
}
select_atomic_rmw! {
unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_swap_cmpxchg16b;
fallback = atomic_swap_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_add_cmpxchg16b;
fallback = atomic_add_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_sub_cmpxchg16b;
fallback = atomic_sub_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_and_cmpxchg16b;
fallback = atomic_and_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_nand_cmpxchg16b;
fallback = atomic_nand_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_or_cmpxchg16b;
fallback = atomic_or_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_xor_cmpxchg16b;
fallback = atomic_xor_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_max_cmpxchg16b;
fallback = atomic_max_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_umax_cmpxchg16b;
fallback = atomic_umax_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_min_cmpxchg16b;
fallback = atomic_min_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
cmpxchg16b = atomic_umin_cmpxchg16b;
fallback = atomic_umin_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_not(dst: *mut u128) -> u128;
cmpxchg16b = atomic_not_cmpxchg16b;
fallback = atomic_not_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_neg(dst: *mut u128) -> u128;
cmpxchg16b = atomic_neg_cmpxchg16b;
fallback = atomic_neg_seqcst;
}
#[inline]
fn is_lock_free() -> bool {
#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
{
// CMPXCHG16B is available at compile-time.
true
}
#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
{
detect::detect().cmpxchg16b()
}
}
const IS_ALWAYS_LOCK_FREE: bool =
cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"));
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)]
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}

View File

@@ -0,0 +1,24 @@
# 64-bit atomic implementations on 32-bit architectures
(See the [`atomic128` module](../atomic128) for 128-bit atomic implementations on 64-bit architectures.)
## 64-bit atomic instructions
Here is the table of targets that support 64-bit atomics and the instructions used:
| target_arch | load | store | CAS | RMW | note |
| ----------- | ---- | ----- | --- | --- | ---- |
| x86 | cmpxchg8b or fild or movlps or movq | cmpxchg8b or fistp or movlps | cmpxchg8b | cmpxchg8b | provided by `core::sync::atomic` |
| arm | ldrexd | ldrexd/strexd | ldrexd/strexd | ldrexd/strexd | provided by `core::sync::atomic` for Armv6+, otherwise provided by us for Linux/Android using kuser_cmpxchg64 (see [arm_linux.rs](arm_linux.rs) for more) |
| riscv32 | amocas.d | amocas.d | amocas.d | amocas.d | Requires `zacas` target feature. Both compile-time and run-time detection are supported. <br> Requires rustc 1.59+ |
| hexagon | memd | memd | memd_locked | memd_locked | Unimplemented |
| sparc | ldx | stx | casx | casx | Unimplemented (unsupported in LLVM). Requires `v8plus` and `v9` target feature (Linux is v8plus+v9 by default) |
| m68k | cas2 | cas2 | cas2 | cas2 | Unimplemented (unsupported in LLVM). Requires M68020 or later (Linux is M68020 by default) |
| mips32r6 | llwp | llwp/scwp | llwp/scwp | llwp/scwp | Unimplemented (unsupported in LLVM). Requires Release 6 Paired LL/SC family of instructions |
If `core::sync::atomic` provides 64-bit atomics, we use them.
On compiler versions or platforms where these are not supported, the fallback implementation is used.
## Run-time CPU feature detection
See the [`detect` module's readme](../detect/README.md) for run-time CPU feature detection.

View File

@@ -0,0 +1,312 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 Arm Linux/Android.
Refs:
- https://github.com/torvalds/linux/blob/v6.13/Documentation/arch/arm/kernel_user_helpers.rst
- https://github.com/rust-lang/compiler-builtins/blob/compiler_builtins-v0.1.124/src/arm_linux.rs
Note: __kuser_cmpxchg64 is always SeqCst.
https://github.com/torvalds/linux/blob/v6.13/arch/arm/kernel/entry-armv.S#L700-L707
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use fallback implementation instead.
*/
// TODO: Since Rust 1.64, the Linux kernel requirement for Rust when using std is 3.2+, so it should
// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html
include!("macros.rs");
#[path = "../fallback/outline_atomics.rs"]
mod fallback;
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::{mem, sync::atomic::Ordering};
use crate::utils::{Pair, U64};
// https://github.com/torvalds/linux/blob/v6.13/Documentation/arch/arm/kernel_user_helpers.rst
const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
// __kuser_helper_version >= 5 (kernel version 3.1+)
const KUSER_CMPXCHG64: usize = 0xFFFF0F60;
#[inline]
fn __kuser_helper_version() -> i32 {
use core::sync::atomic::AtomicI32;
static CACHE: AtomicI32 = AtomicI32::new(0);
let mut v = CACHE.load(Ordering::Relaxed);
if v != 0 {
return v;
}
// SAFETY: core assumes that at least __kuser_memory_barrier (__kuser_helper_version >= 3,
// kernel version 2.6.15+) is available on this platform. __kuser_helper_version
// is always available on such a platform.
v = unsafe { crate::utils::ptr::with_exposed_provenance::<i32>(KUSER_HELPER_VERSION).read() };
CACHE.store(v, Ordering::Relaxed);
v
}
#[inline]
fn has_kuser_cmpxchg64() -> bool {
// Note: detect_false cfg is intended to make it easy for developers to test
// cases where features usually available is not available, and is not a public API.
if cfg!(portable_atomic_test_detect_false) {
return false;
}
__kuser_helper_version() >= 5
}
#[inline]
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
// SAFETY: the caller must uphold the safety contract.
unsafe {
let f: extern "C" fn(*const u64, *const u64, *mut u64) -> u32 =
mem::transmute(crate::utils::ptr::with_exposed_provenance::<()>(KUSER_CMPXCHG64));
f(old_val, new_val, ptr) == 0
}
}
// 64-bit atomic load by two 32-bit atomic loads.
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
let (out_lo, out_hi);
asm!(
"ldr {out_lo}, [{src}]", // atomic { out_lo = *src }
"ldr {out_hi}, [{src}, #4]", // atomic { out_hi = *src.byte_add(4) }
src = in(reg) src,
out_lo = out(reg) out_lo,
out_hi = out(reg) out_hi,
options(pure, nostack, preserves_flags, readonly),
);
U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
macro_rules! select_atomic {
(
unsafe fn $name:ident($dst:ident: *mut u64 $(, $($arg:tt)*)?) $(-> $ret_ty:ty)? {
|$kuser_cmpxchg64_fn_binding:ident| $($kuser_cmpxchg64_fn_body:tt)*
}
fallback = $seqcst_fallback_fn:ident
) => {
#[inline]
unsafe fn $name($dst: *mut u64 $(, $($arg)*)?, _: Ordering) $(-> $ret_ty)? {
unsafe fn kuser_cmpxchg64_fn($dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
debug_assert!($dst as usize % 8 == 0);
debug_assert!(has_kuser_cmpxchg64());
// SAFETY: the caller must uphold the safety contract.
unsafe {
loop {
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// Arm's memory model allow mixed-sized atomic access.
// https://github.com/rust-lang/unsafe-code-guidelines/issues/345#issuecomment-1172891466
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement byte_wise_atomic_load.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
let prev = byte_wise_atomic_load($dst);
let next = {
let $kuser_cmpxchg64_fn_binding = prev;
$($kuser_cmpxchg64_fn_body)*
};
if __kuser_cmpxchg64(&prev, &next, $dst) {
return prev;
}
}
}
}
// SAFETY: the caller must uphold the safety contract.
// we only calls __kuser_cmpxchg64 if it is available.
unsafe {
ifunc!(unsafe fn($dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if has_kuser_cmpxchg64() {
kuser_cmpxchg64_fn
} else {
// Use SeqCst because __kuser_cmpxchg64 is always SeqCst.
fallback::$seqcst_fallback_fn
}
})
}
}
};
}
select_atomic! {
unsafe fn atomic_load(src: *mut u64) -> u64 {
|old| old
}
fallback = atomic_load_seqcst
}
#[inline]
unsafe fn atomic_store(dst: *mut u64, val: u64, order: Ordering) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_swap(dst, val, order);
}
}
select_atomic! {
unsafe fn atomic_swap(dst: *mut u64, val: u64) -> u64 {
|_x| val
}
fallback = atomic_swap_seqcst
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u64,
old: u64,
new: u64,
_: Ordering,
_: Ordering,
) -> Result<u64, u64> {
unsafe fn kuser_cmpxchg64_fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
debug_assert!(dst as usize % 8 == 0);
debug_assert!(has_kuser_cmpxchg64());
// SAFETY: the caller must uphold the safety contract.
unsafe {
loop {
// See select_atomic! for more.
let prev = byte_wise_atomic_load(dst);
let next = if prev == old { new } else { prev };
if __kuser_cmpxchg64(&prev, &next, dst) {
return (prev, prev == old);
}
}
}
}
// SAFETY: the caller must uphold the safety contract.
// we only calls __kuser_cmpxchg64 if it is available.
let (prev, ok) = unsafe {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if has_kuser_cmpxchg64() {
kuser_cmpxchg64_fn
} else {
// Use SeqCst because __kuser_cmpxchg64 is always SeqCst.
fallback::atomic_compare_exchange_seqcst
}
})
};
if ok { Ok(prev) } else { Err(prev) }
}
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
select_atomic! {
unsafe fn atomic_add(dst: *mut u64, val: u64) -> u64 {
|x| x.wrapping_add(val)
}
fallback = atomic_add_seqcst
}
select_atomic! {
unsafe fn atomic_sub(dst: *mut u64, val: u64) -> u64 {
|x| x.wrapping_sub(val)
}
fallback = atomic_sub_seqcst
}
select_atomic! {
unsafe fn atomic_and(dst: *mut u64, val: u64) -> u64 {
|x| x & val
}
fallback = atomic_and_seqcst
}
select_atomic! {
unsafe fn atomic_nand(dst: *mut u64, val: u64) -> u64 {
|x| !(x & val)
}
fallback = atomic_nand_seqcst
}
select_atomic! {
unsafe fn atomic_or(dst: *mut u64, val: u64) -> u64 {
|x| x | val
}
fallback = atomic_or_seqcst
}
select_atomic! {
unsafe fn atomic_xor(dst: *mut u64, val: u64) -> u64 {
|x| x ^ val
}
fallback = atomic_xor_seqcst
}
select_atomic! {
unsafe fn atomic_max(dst: *mut u64, val: u64) -> u64 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::max(x as i64, val as i64) as u64 }
}
}
fallback = atomic_max_seqcst
}
select_atomic! {
unsafe fn atomic_umax(dst: *mut u64, val: u64) -> u64 {
|x| core::cmp::max(x, val)
}
fallback = atomic_umax_seqcst
}
select_atomic! {
unsafe fn atomic_min(dst: *mut u64, val: u64) -> u64 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::min(x as i64, val as i64) as u64 }
}
}
fallback = atomic_min_seqcst
}
select_atomic! {
unsafe fn atomic_umin(dst: *mut u64, val: u64) -> u64 {
|x| core::cmp::min(x, val)
}
fallback = atomic_umin_seqcst
}
select_atomic! {
unsafe fn atomic_not(dst: *mut u64) -> u64 {
|x| !x
}
fallback = atomic_not_seqcst
}
select_atomic! {
unsafe fn atomic_neg(dst: *mut u64) -> u64 {
|x| x.wrapping_neg()
}
fallback = atomic_neg_seqcst
}
#[inline]
fn is_lock_free() -> bool {
has_kuser_cmpxchg64()
}
const IS_ALWAYS_LOCK_FREE: bool = false;
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn kuser_helper_version() {
let version = __kuser_helper_version();
assert!(version >= 5, "{:?}", version);
assert_eq!(version, unsafe {
crate::utils::ptr::with_exposed_provenance::<i32>(KUSER_HELPER_VERSION).read()
});
}
test_atomic_int!(i64);
test_atomic_int!(u64);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u64);
}

View File

@@ -0,0 +1,230 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
macro_rules! atomic64 {
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
#[repr(C, align(8))]
pub(crate) struct $atomic_type {
v: core::cell::UnsafeCell<$int_type>,
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock.
unsafe impl Sync for $atomic_type {}
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: core::cell::UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
is_lock_free()
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = IS_ALWAYS_LOCK_FREE;
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_load(self.v.get().cast::<u64>(), order) as $int_type
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_store(self.v.get().cast::<u64>(), val as u64, order)
}
}
#[inline]
pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_swap(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
match atomic_compare_exchange(
self.v.get().cast::<u64>(),
current as u64,
new as u64,
success,
failure,
) {
Ok(v) => Ok(v as $int_type),
Err(v) => Err(v as $int_type),
}
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
match atomic_compare_exchange_weak(
self.v.get().cast::<u64>(),
current as u64,
new as u64,
success,
failure,
) {
Ok(v) => Ok(v as $int_type),
Err(v) => Err(v as $int_type),
}
}
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_add(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_sub(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_and(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_nand(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_or(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_xor(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
$atomic_max(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
$atomic_min(self.v.get().cast::<u64>(), val as u64, order) as $int_type
}
}
#[inline]
pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_not(self.v.get().cast::<u64>(), order) as $int_type
}
}
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
// SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock
// and the raw pointer passed in is valid because we got it from a reference.
unsafe {
atomic_neg(self.v.get().cast::<u64>(), order) as $int_type
}
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
self.v.get()
}
}
};
}

View File

@@ -0,0 +1,40 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
64-bit atomic implementations on 32-bit architectures
See README.md for details.
*/
// pre-v6 Arm Linux
#[cfg(feature = "fallback")]
// Miri and Sanitizer do not support inline assembly.
#[cfg(all(
target_arch = "arm",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
not(portable_atomic_no_outline_atomics),
))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
pub(super) mod arm_linux;
// riscv32
// Miri and Sanitizer do not support inline assembly.
#[cfg(all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
))]
pub(super) mod riscv32;

View File

@@ -0,0 +1,626 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
64-bit atomic implementation on riscv32.
This architecture provides the following 64-bit atomic instructions:
- amocas.d: CAS (Zacas extension)
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#risc-v
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use fallback implementation instead.
Refs:
- RISC-V Instruction Set Manual
"Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zacas.adoc
- RISC-V Atomics ABI Specification
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc
Generated asm:
- riscv32imac (+zacas) https://godbolt.org/z/9bTdfhKre
*/
// TODO: merge duplicated code with atomic128/riscv64.rs
include!("macros.rs");
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[path = "../fallback/outline_atomics.rs"]
mod fallback;
#[cfg(not(portable_atomic_no_outline_atomics))]
#[cfg(any(test, not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))))]
#[cfg(any(target_os = "linux", target_os = "android"))]
#[path = "../detect/riscv_linux.rs"]
mod detect;
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::sync::atomic::Ordering;
use crate::utils::{Pair, U64};
macro_rules! debug_assert_zacas {
() => {
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
{
debug_assert!(detect::detect().zacas());
}
};
}
// `.option arch, +zacas` directive requires LLVM 20, so we use .4byte directive for old LLVM.
// Note that `.insn <value>` directive requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! start_zacas {
() => {
".option push\n.option arch, +zacas"
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! end_zacas {
() => {
".option pop"
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! atomic_rmw_amocas_order {
($op:ident, $order:ident) => {
atomic_rmw_amocas_order!($op, $order, failure = $order)
};
($op:ident, $order:ident, failure = $failure:ident) => {
match $order {
Ordering::Relaxed => $op!("", ""),
Ordering::Acquire => $op!("", ".aq"),
Ordering::Release => $op!("", ".rl"),
Ordering::AcqRel => $op!("", ".aqrl"),
Ordering::SeqCst if $failure == Ordering::SeqCst => $op!("fence rw,rw", ".aqrl"),
Ordering::SeqCst => $op!("", ".aqrl"),
_ => unreachable!(),
}
};
}
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! atomic_rmw_amocas_order_insn {
($op:ident, $order:ident) => {
atomic_rmw_amocas_order_insn!($op, $order, failure = $order)
};
($op:ident, $order:ident, failure = $failure:ident) => {
match $order {
Ordering::Relaxed => $op!("", "8"),
Ordering::Acquire => $op!("", "c"),
Ordering::Release => $op!("", "a"),
Ordering::AcqRel => $op!("", "e"),
Ordering::SeqCst if $failure == Ordering::SeqCst => $op!("fence rw,rw", "e"),
Ordering::SeqCst => $op!("", "e"),
_ => unreachable!(),
}
};
}
// If zacas is available at compile-time, we can always use zacas_fn.
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
use self::atomic_load_zacas as atomic_load;
// Otherwise, we need to do run-time detection and can use zacas_fn only if zacas is available.
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[inline]
unsafe fn atomic_load(src: *mut u64, order: Ordering) -> u64 {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(src: *mut u64) -> u64;
atomic_load_zacas_relaxed = atomic_load_zacas(Ordering::Relaxed);
atomic_load_zacas_acquire = atomic_load_zacas(Ordering::Acquire);
atomic_load_zacas_seqcst = atomic_load_zacas(Ordering::SeqCst);
}
// SAFETY: the caller must uphold the safety contract.
// we only calls atomic_load_zacas if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(src: *mut u64) -> u64 {
if detect::detect().zacas() {
atomic_load_zacas_relaxed
} else {
fallback::atomic_load_non_seqcst
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(src: *mut u64) -> u64 {
if detect::detect().zacas() {
atomic_load_zacas_acquire
} else {
fallback::atomic_load_non_seqcst
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(src: *mut u64) -> u64 {
if detect::detect().zacas() {
atomic_load_zacas_seqcst
} else {
fallback::atomic_load_seqcst
}
})
}
_ => unreachable!(),
}
}
}
#[inline]
unsafe fn atomic_load_zacas(src: *mut u64, order: Ordering) -> u64 {
debug_assert!(src as usize % 8 == 0);
debug_assert_zacas!();
let (out_lo, out_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! load {
($fence:tt, $asm_order:tt) => {
asm!(
start_zacas!(),
$fence, // fence
concat!("amocas.d", $asm_order, " a2, a2, 0({src})"), // atomic { if *dst == a2:a3 { *dst = a2:a3 } else { a2:a3 = *dst } }
end_zacas!(),
src = in(reg) ptr_reg!(src),
inout("a2") 0_u32 => out_lo,
inout("a3") 0_u32 => out_hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw_amocas_order!(load, order);
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! load {
($fence:tt, $insn_order:tt) => {
asm!(
$fence, // fence
// amocas.d{,.aq,.rl,.aqrl} a2, a2, (a0) // atomic { if *a0 == a2:a3 { *a0 = a2:a3 } else { a2:a3 = *a0 } }
concat!(".4byte 0x2", $insn_order, "c5362f"),
in("a0") ptr_reg!(src),
inout("a2") 0_u32 => out_lo,
inout("a3") 0_u32 => out_hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_amocas_order_insn!(load, order);
U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u64, val: u64, order: Ordering) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
atomic_swap(dst, val, order);
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u64,
old: u64,
new: u64,
success: Ordering,
failure: Ordering,
) -> Result<u64, u64> {
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantees that zacas instructions are available at compile-time.
let (prev, ok) = unsafe { atomic_compare_exchange_zacas(dst, old, new, success, failure) };
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
let (prev, ok) = {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool);
zacas_relaxed_fn = atomic_compare_exchange_zacas(Ordering::Relaxed, Ordering::Relaxed);
zacas_acquire_fn = atomic_compare_exchange_zacas(Ordering::Acquire, Ordering::Acquire);
zacas_release_fn = atomic_compare_exchange_zacas(Ordering::Release, Ordering::Relaxed);
zacas_acqrel_fn = atomic_compare_exchange_zacas(Ordering::AcqRel, Ordering::Acquire);
zacas_seqcst_fn = atomic_compare_exchange_zacas(Ordering::SeqCst, Ordering::SeqCst);
}
let order = crate::utils::upgrade_success_ordering(success, failure);
// SAFETY: the caller must uphold the safety contract.
// we only calls atomic_compare_exchange_zacas if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if detect::detect().zacas() {
zacas_relaxed_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if detect::detect().zacas() {
zacas_acquire_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if detect::detect().zacas() {
zacas_release_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::AcqRel => {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if detect::detect().zacas() {
zacas_acqrel_fn
} else {
fallback::atomic_compare_exchange_non_seqcst
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
if detect::detect().zacas() {
zacas_seqcst_fn
} else {
fallback::atomic_compare_exchange_seqcst
}
})
}
_ => unreachable!(),
}
}
};
if ok { Ok(prev) } else { Err(prev) }
}
#[inline]
unsafe fn atomic_compare_exchange_zacas(
dst: *mut u64,
old: u64,
new: u64,
success: Ordering,
failure: Ordering,
) -> (u64, bool) {
debug_assert!(dst as usize % 8 == 0);
debug_assert_zacas!();
let order = crate::utils::upgrade_success_ordering(success, failure);
let old = U64 { whole: old };
let new = U64 { whole: new };
let (prev_lo, prev_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! cmpxchg {
($fence:tt, $asm_order:tt) => {
asm!(
start_zacas!(),
$fence, // fence
concat!("amocas.d", $asm_order, " a4, a2, 0({dst})"), // atomic { if *dst == a4:a5 { *dst = a2:a3 } else { a4:a5 = *dst } }
end_zacas!(),
dst = in(reg) ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
inout("a5") old.pair.hi => prev_hi,
// must be allocated to even/odd register pair
in("a2") new.pair.lo,
in("a3") new.pair.hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw_amocas_order!(cmpxchg, order, failure = failure);
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! cmpxchg {
($fence:tt, $insn_order:tt) => {
asm!(
$fence, // fence
// amocas.d{,.aq,.rl,.aqrl} a4, a2, (a0) // atomic { if *a0 == a4:a5 { *a0 = a2:a3 } else { a4:a5 = *a0 } }
concat!(".4byte 0x2", $insn_order, "c5372f"),
in("a0") ptr_reg!(dst),
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
inout("a5") old.pair.hi => prev_hi,
// must be allocated to even/odd register pair
in("a2") new.pair.lo,
in("a3") new.pair.hi,
options(nostack, preserves_flags),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_amocas_order_insn!(cmpxchg, order, failure = failure);
let prev = U64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole;
(prev, prev == old.whole)
}
}
// amocas is always strong.
use self::atomic_compare_exchange as atomic_compare_exchange_weak;
// 64-bit atomic load by two 32-bit atomic loads.
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
let (out_lo, out_hi);
// SAFETY: the caller must uphold the safety contract.
unsafe {
asm!(
"lw {out_lo}, ({src})", // atomic { out_lo = *src }
"lw {out_hi}, 4({src})", // atomic { out_hi = *src.byte_add(4) }
src = in(reg) ptr_reg!(src),
out_lo = out(reg) out_lo,
out_hi = out(reg) out_hi,
options(pure, nostack, preserves_flags, readonly),
);
U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
}
}
macro_rules! select_atomic_rmw {
(
unsafe fn $name:ident(dst: *mut u64 $(, $($arg:tt)*)?) $(-> $ret_ty:ty)? {
|$zacas_fn_binding:ident| $($zacas_fn_body:tt)*
}
zacas = $zacas_fn:ident;
non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
seqcst_fallback = $seqcst_fallback_fn:ident;
) => {
#[inline]
unsafe fn $zacas_fn(dst: *mut u64 $(, $($arg)*)?, order: Ordering) $(-> $ret_ty)? {
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
//
// Note that the C++20 memory model does not allow mixed-sized atomic access,
// so we must use inline assembly to implement byte_wise_atomic_load.
// (i.e., byte-wise atomic based on the standard library's atomic types
// cannot be used here).
let mut prev = byte_wise_atomic_load(dst);
loop {
let next = {
let $zacas_fn_binding = prev;
$($zacas_fn_body)*
};
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
// If zacas is available at compile-time, we can always use zacas_fn.
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
use self::$zacas_fn as $name;
// Otherwise, we need to do run-time detection and can use zacas_fn only if zacas is available.
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
#[inline]
unsafe fn $name(dst: *mut u64 $(, $($arg)*)?, order: Ordering) $(-> $ret_ty)? {
fn_alias! {
// inline(never) is just a hint and also not strictly necessary
// because we use ifunc helper macro, but used for clarity.
#[inline(never)]
unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)?;
zacas_relaxed_fn = $zacas_fn(Ordering::Relaxed);
zacas_acquire_fn = $zacas_fn(Ordering::Acquire);
zacas_release_fn = $zacas_fn(Ordering::Release);
zacas_acqrel_fn = $zacas_fn(Ordering::AcqRel);
zacas_seqcst_fn = $zacas_fn(Ordering::SeqCst);
}
// SAFETY: the caller must uphold the safety contract.
// we only calls zacas_fn if zacas is available.
unsafe {
match order {
Ordering::Relaxed => {
ifunc!(unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_relaxed_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::Acquire => {
ifunc!(unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_acquire_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::Release => {
ifunc!(unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_release_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::AcqRel => {
ifunc!(unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_acqrel_fn
} else {
fallback::$non_seqcst_fallback_fn
}
})
}
Ordering::SeqCst => {
ifunc!(unsafe fn(dst: *mut u64 $(, $($arg)*)?) $(-> $ret_ty)? {
if detect::detect().zacas() {
zacas_seqcst_fn
} else {
fallback::$seqcst_fallback_fn
}
})
}
_ => unreachable!(),
}
}
}
};
}
select_atomic_rmw! {
unsafe fn atomic_swap(dst: *mut u64, val: u64) -> u64 {
|_x| val
}
zacas = atomic_swap_zacas;
non_seqcst_fallback = atomic_swap_non_seqcst;
seqcst_fallback = atomic_swap_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_add(dst: *mut u64, val: u64) -> u64 {
|x| x.wrapping_add(val)
}
zacas = atomic_add_zacas;
non_seqcst_fallback = atomic_add_non_seqcst;
seqcst_fallback = atomic_add_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_sub(dst: *mut u64, val: u64) -> u64 {
|x| x.wrapping_sub(val)
}
zacas = atomic_sub_zacas;
non_seqcst_fallback = atomic_sub_non_seqcst;
seqcst_fallback = atomic_sub_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_and(dst: *mut u64, val: u64) -> u64 {
|x| x & val
}
zacas = atomic_and_zacas;
non_seqcst_fallback = atomic_and_non_seqcst;
seqcst_fallback = atomic_and_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_nand(dst: *mut u64, val: u64) -> u64 {
|x| !(x & val)
}
zacas = atomic_nand_zacas;
non_seqcst_fallback = atomic_nand_non_seqcst;
seqcst_fallback = atomic_nand_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_or(dst: *mut u64, val: u64) -> u64 {
|x| x | val
}
zacas = atomic_or_zacas;
non_seqcst_fallback = atomic_or_non_seqcst;
seqcst_fallback = atomic_or_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_xor(dst: *mut u64, val: u64) -> u64 {
|x| x ^ val
}
zacas = atomic_xor_zacas;
non_seqcst_fallback = atomic_xor_non_seqcst;
seqcst_fallback = atomic_xor_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_max(dst: *mut u64, val: u64) -> u64 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::max(x as i64, val as i64) as u64 }
}
}
zacas = atomic_max_zacas;
non_seqcst_fallback = atomic_max_non_seqcst;
seqcst_fallback = atomic_max_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umax(dst: *mut u64, val: u64) -> u64 {
|x| core::cmp::max(x, val)
}
zacas = atomic_umax_zacas;
non_seqcst_fallback = atomic_umax_non_seqcst;
seqcst_fallback = atomic_umax_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_min(dst: *mut u64, val: u64) -> u64 {
|x| {
#[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
{ core::cmp::min(x as i64, val as i64) as u64 }
}
}
zacas = atomic_min_zacas;
non_seqcst_fallback = atomic_min_non_seqcst;
seqcst_fallback = atomic_min_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_umin(dst: *mut u64, val: u64) -> u64 {
|x| core::cmp::min(x, val)
}
zacas = atomic_umin_zacas;
non_seqcst_fallback = atomic_umin_non_seqcst;
seqcst_fallback = atomic_umin_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_not(dst: *mut u64) -> u64 {
|x| !x
}
zacas = atomic_not_zacas;
non_seqcst_fallback = atomic_not_non_seqcst;
seqcst_fallback = atomic_not_seqcst;
}
select_atomic_rmw! {
unsafe fn atomic_neg(dst: *mut u64) -> u64 {
|x| x.wrapping_neg()
}
zacas = atomic_neg_zacas;
non_seqcst_fallback = atomic_neg_non_seqcst;
seqcst_fallback = atomic_neg_seqcst;
}
#[inline]
fn is_lock_free() -> bool {
#[cfg(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"))]
{
// zacas is available at compile-time.
true
}
#[cfg(not(any(target_feature = "zacas", portable_atomic_target_feature = "zacas")))]
{
detect::detect().zacas()
}
}
const IS_ALWAYS_LOCK_FREE: bool =
cfg!(any(target_feature = "zacas", portable_atomic_target_feature = "zacas"));
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);
#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)]
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i64);
test_atomic_int!(u64);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u64);
}

81
vendor/portable-atomic/src/imp/avr.rs vendored Normal file
View File

@@ -0,0 +1,81 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic load/store implementation on AVR.
Operations not supported here are provided by disabling interrupts.
See also src/imp/interrupt/avr.rs.
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#avr
Note: Ordering is always SeqCst.
Refs:
- AVR® Instruction Set Manual, Rev. DS40002198B
https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf
- atomic-maybe-uninit
https://github.com/taiki-e/atomic-maybe-uninit
Generated asm:
- avr https://godbolt.org/z/j49rYbj4d
*/
use core::{arch::asm, cell::UnsafeCell, sync::atomic::Ordering};
macro_rules! atomic8 {
($atomic_type:ident, $value_type:ty) => {
#[repr(transparent)]
pub(crate) struct $atomic_type {
v: UnsafeCell<$value_type>,
}
// Send is implicitly implemented for atomic integers, but not for atomic pointers.
// SAFETY: any data races are prevented by atomic operations.
unsafe impl Send for $atomic_type {}
// SAFETY: any data races are prevented by atomic operations.
unsafe impl Sync for $atomic_type {}
impl $atomic_type {
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $value_type {
crate::utils::assert_load_ordering(order);
let src = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
let out;
asm!(
"ld {out}, Z", // atomic { out = *Z }
in("Z") src,
out = out(reg) out,
options(nostack, preserves_flags),
);
out
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
asm!(
"st Z, {val}", // atomic { *Z = val }
in("Z") dst,
val = in(reg) val,
options(nostack, preserves_flags),
);
}
}
}
};
}
atomic8!(AtomicI8, i8);
atomic8!(AtomicU8, u8);

View File

@@ -0,0 +1,431 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Wrap the standard library's atomic types in newtype.
This is not a reexport, because we want to backport changes like
https://github.com/rust-lang/rust/pull/98383 to old compilers.
*/
use core::{cell::UnsafeCell, marker::PhantomData, sync::atomic::Ordering};
// core::panic::RefUnwindSafe is only available on Rust 1.56+, so on pre-1.56
// Rust, we implement RefUnwindSafe when "std" feature is enabled.
// However, on pre-1.56 Rust, the standard library's atomic types implement
// RefUnwindSafe when "linked to std", and that's behavior that our other atomic
// implementations can't emulate, so use PhantomData<NotRefUnwindSafe> to match
// conditions where our other atomic implementations implement RefUnwindSafe.
//
// If we do not do this, for example, downstream that is only tested on x86_64
// may incorrectly assume that AtomicU64 always implements RefUnwindSafe even on
// older rustc, and may be broken on platforms where std AtomicU64 is not available.
struct NotRefUnwindSafe(UnsafeCell<()>);
// SAFETY: this is a marker type and we'll never access the value.
unsafe impl Sync for NotRefUnwindSafe {}
#[repr(transparent)]
pub(crate) struct AtomicPtr<T> {
inner: core::sync::atomic::AtomicPtr<T>,
// Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
_not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
}
impl<T> AtomicPtr<T> {
#[inline]
pub(crate) const fn new(v: *mut T) -> Self {
Self { inner: core::sync::atomic::AtomicPtr::new(v), _not_ref_unwind_safe: PhantomData }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = true;
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn load(&self, order: Ordering) -> *mut T {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, ptr: *mut T, order: Ordering) {
crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check)
self.inner.store(ptr, order);
}
const_fn! {
const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut *mut T {
// SAFETY: Self is #[repr(C)] and internally UnsafeCell<*mut T>.
// See also https://github.com/rust-lang/rust/pull/66705 and
// https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116.
unsafe { (*(self as *const Self as *const UnsafeCell<*mut T>)).get() }
}
}
}
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
impl<T> AtomicPtr<T> {
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange(
&self,
current: *mut T,
new: *mut T,
success: Ordering,
failure: Ordering,
) -> Result<*mut T, *mut T> {
crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
#[cfg(portable_atomic_no_stronger_failure_ordering)]
let success = crate::utils::upgrade_success_ordering(success, failure);
self.inner.compare_exchange(current, new, success, failure)
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange_weak(
&self,
current: *mut T,
new: *mut T,
success: Ordering,
failure: Ordering,
) -> Result<*mut T, *mut T> {
crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
#[cfg(portable_atomic_no_stronger_failure_ordering)]
let success = crate::utils::upgrade_success_ordering(success, failure);
self.inner.compare_exchange_weak(current, new, success, failure)
}
}
impl<T> core::ops::Deref for AtomicPtr<T> {
type Target = core::sync::atomic::AtomicPtr<T>;
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
fn deref(&self) -> &Self::Target {
&self.inner
}
}
macro_rules! atomic_int {
($atomic_type:ident, $int_type:ident) => {
#[repr(transparent)]
pub(crate) struct $atomic_type {
inner: core::sync::atomic::$atomic_type,
// Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
_not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(portable_atomic_no_atomic_cas))
)]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
impl_default_no_fetch_ops!($atomic_type, $int_type);
#[cfg(not(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(portable_atomic_no_atomic_cas))
)]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self {
inner: core::sync::atomic::$atomic_type::new(v),
_not_ref_unwind_safe: PhantomData,
}
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
// ESP-IDF targets' 64-bit atomics are not lock-free.
// https://github.com/rust-lang/rust/pull/115577#issuecomment-1732259297
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = cfg!(not(all(
any(target_arch = "riscv32", target_arch = "xtensa"),
target_os = "espidf",
))) | (core::mem::size_of::<$int_type>()
< 8);
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check)
self.inner.store(val, order);
}
const_fn! {
const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
// SAFETY: Self is #[repr(C)] and internally UnsafeCell<$int_type>.
// See also https://github.com/rust-lang/rust/pull/66705 and
// https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116.
unsafe {
(*(self as *const Self as *const UnsafeCell<$int_type>)).get()
}
}
}
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(portable_atomic_no_atomic_cas))
)]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
impl $atomic_type {
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
#[cfg(portable_atomic_no_stronger_failure_ordering)]
let success = crate::utils::upgrade_success_ordering(success, failure);
self.inner.compare_exchange(current, new, success, failure)
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
#[cfg(portable_atomic_no_stronger_failure_ordering)]
let success = crate::utils::upgrade_success_ordering(success, failure);
self.inner.compare_exchange_weak(current, new, success, failure)
}
#[allow(dead_code)]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $int_type
where
F: FnMut($int_type) -> $int_type,
{
// This is a private function and all instances of `f` only operate on the value
// loaded, so there is no need to synchronize the first load/failed CAS.
let mut prev = self.load(Ordering::Relaxed);
loop {
let next = f(prev);
match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(next_prev) => prev = next_prev,
}
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
#[cfg(not(portable_atomic_no_atomic_min_max))]
{
#[cfg(any(
all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
),
all(
target_arch = "arm",
not(any(
target_feature = "v6",
portable_atomic_target_feature = "v6",
)),
),
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "powerpc",
target_arch = "powerpc64",
))]
{
// HACK: the following operations are currently broken (at least on qemu-user):
// - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse)
// - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}`
// - mips's `AtomicI8::fetch_{max,min}` (release mode)
// - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - mips64's `AtomicI8::fetch_{max,min}` (release mode)
// - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - powerpc's `AtomicI{8,16}::fetch_{max,min}`
// - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO)
// See also:
// https://github.com/llvm/llvm-project/issues/61880
// https://github.com/llvm/llvm-project/issues/61881
// https://github.com/llvm/llvm-project/issues/61882
// https://github.com/taiki-e/portable-atomic/issues/2
// https://github.com/rust-lang/rust/issues/100650
if core::mem::size_of::<$int_type>() <= 2 {
return self.fetch_update_(order, |x| core::cmp::max(x, val));
}
}
self.inner.fetch_max(val, order)
}
#[cfg(portable_atomic_no_atomic_min_max)]
{
self.fetch_update_(order, |x| core::cmp::max(x, val))
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
#[cfg(not(portable_atomic_no_atomic_min_max))]
{
#[cfg(any(
all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
),
all(
target_arch = "arm",
not(any(
target_feature = "v6",
portable_atomic_target_feature = "v6",
)),
),
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "powerpc",
target_arch = "powerpc64",
))]
{
// HACK: the following operations are currently broken (at least on qemu-user):
// - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse)
// - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}`
// - mips's `AtomicI8::fetch_{max,min}` (release mode)
// - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - mips64's `AtomicI8::fetch_{max,min}` (release mode)
// - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - powerpc's `AtomicI{8,16}::fetch_{max,min}`
// - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
// - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO)
// See also:
// https://github.com/llvm/llvm-project/issues/61880
// https://github.com/llvm/llvm-project/issues/61881
// https://github.com/llvm/llvm-project/issues/61882
// https://github.com/taiki-e/portable-atomic/issues/2
// https://github.com/rust-lang/rust/issues/100650
if core::mem::size_of::<$int_type>() <= 2 {
return self.fetch_update_(order, |x| core::cmp::min(x, val));
}
}
self.inner.fetch_min(val, order)
}
#[cfg(portable_atomic_no_atomic_min_max)]
{
self.fetch_update_(order, |x| core::cmp::min(x, val))
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
self.fetch_xor(!0, order)
}
#[cfg(not(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
// TODO: provide asm-based implementation on AArch64 without FEAT_LSE, Armv7, RISC-V, etc.
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
self.fetch_update_(order, $int_type::wrapping_neg)
}
#[cfg(not(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
impl core::ops::Deref for $atomic_type {
type Target = core::sync::atomic::$atomic_type;
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
fn deref(&self) -> &Self::Target {
&self.inner
}
}
};
}
atomic_int!(AtomicIsize, isize);
atomic_int!(AtomicUsize, usize);
#[cfg(not(portable_atomic_no_atomic_load_store))]
atomic_int!(AtomicI8, i8);
#[cfg(not(portable_atomic_no_atomic_load_store))]
atomic_int!(AtomicU8, u8);
#[cfg(not(portable_atomic_no_atomic_load_store))]
atomic_int!(AtomicI16, i16);
#[cfg(not(portable_atomic_no_atomic_load_store))]
atomic_int!(AtomicU16, u16);
#[cfg(not(portable_atomic_no_atomic_load_store))]
#[cfg(not(target_pointer_width = "16"))]
atomic_int!(AtomicI32, i32);
#[cfg(not(portable_atomic_no_atomic_load_store))]
#[cfg(not(target_pointer_width = "16"))]
atomic_int!(AtomicU32, u32);
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
target_has_atomic = "64",
not(any(target_pointer_width = "16", target_pointer_width = "32")),
))
)]
atomic_int!(AtomicI64, i64);
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
target_has_atomic = "64",
not(any(target_pointer_width = "16", target_pointer_width = "32")),
))
)]
atomic_int!(AtomicU64, u64);

View File

@@ -0,0 +1,38 @@
# Run-time CPU feature detection
This module has run-time CPU feature detection implementations.
Here is the table of targets that support run-time CPU feature detection and the instruction or API used:
| target_arch | target_os/target_env | instruction/API | features | note |
| ----------- | -------------------- | --------------- | -------- | ---- |
| x86_64 | all (except for sgx) | cpuid | all | Enabled by default |
| aarch64 | linux (gnu/ohos/uclibc) | getauxval | all | Enabled by default (dlsym is used by default if needed for compatibility with older versions) |
| aarch64 | linux (musl) | getauxval | all | Only enabled by default when dynamic linking or `std` feature enabled (both disabled by default, see [auxv.rs](auxv.rs)) |
| aarch64 | android | getauxval | all | Enabled by default |
| aarch64 | freebsd | elf_aux_info | all | Enabled by default |
| aarch64 | netbsd | sysctlbyname | all | Enabled by default |
| aarch64 | openbsd | sysctl | all | Enabled by default |
| aarch64 | macos/ios/tvos/watchos/visionos | sysctlbyname | all | Currently only used in tests (see [aarch64_apple.rs](aarch64_apple.rs)) |
| aarch64 | illumos | getisax | lse, lse2 | Disabled by default (see [aarch64_illumos.rs](aarch64_illumos.rs)) |
| aarch64/arm64ec | windows | IsProcessorFeaturePresent | lse | Enabled by default |
| aarch64 | fuchsia | zx_system_get_features | lse | Enabled by default |
| riscv32/riscv64 | linux/android | riscv_hwprobe | all | Enabled by default |
| powerpc64 | linux (gnu/ohos/uclibc) | getauxval | all | Enabled by default (dlsym is used by default if needed for compatibility with older versions) |
| powerpc64 | linux (musl) | getauxval | all | Only enabled by default when dynamic linking or `std` feature enabled (both disabled by default, see [auxv.rs](auxv.rs)) |
| powerpc64 | freebsd | elf_aux_info | all | Enabled by default (dlsym is used by default for compatibility with older versions) |
| powerpc64 | openbsd | elf_aux_info | all | Enabled by default (dlsym is used by default for compatibility with older versions) |
| powerpc64 | aix | getsystemcfg | all | Requires LLVM 20+. Disabled by default (see [powerpc64_aix.rs](powerpc64_aix.rs)) |
Run-time detection is enabled by default on most targets and can be disabled with `--cfg portable_atomic_no_outline_atomics`.
On some targets, run-time detection is disabled by default mainly for compatibility with incomplete build environments or support for it is experimental, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
`dlsym` usually not working with static linking, so detection using implementations that use `dlsym` for compatibility will be disabled if static linking is enabled.
You can use `--cfg portable_atomic_outline_atomics` to force the use of non-`dlsym` implementations and enable run-time detection in such an environment.
For targets not included in the above table, run-time detection is always disabled and works the same as when `--cfg portable_atomic_no_outline_atomics` is set.
See [auxv.rs](auxv.rs) module-level comments for more details on Linux/Android/FreeBSD/OpenBSD.
See also [docs about `portable_atomic_no_outline_atomics` cfg](https://github.com/taiki-e/portable-atomic/blob/HEAD/README.md#optional-cfg-no-outline-atomics) in the top-level readme.

View File

@@ -0,0 +1,591 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on AArch64 Linux/Android/FreeBSD/NetBSD/OpenBSD by parsing system registers.
As of nightly-2024-09-07, is_aarch64_feature_detected doesn't support run-time detection on NetBSD.
https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs
Run-time detection on OpenBSD by is_aarch64_feature_detected is supported on Rust 1.70+.
https://github.com/rust-lang/stdarch/pull/1374
Refs:
- https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers
- https://github.com/torvalds/linux/blob/v6.13/Documentation/arch/arm64/cpu-feature-registers.rst
- https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs
Supported platforms:
- Linux 4.11+ (emulate mrs instruction)
https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2
- FreeBSD 12.0+ (emulate mrs instruction)
https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b
- NetBSD 9.0+ (through sysctl/sysctlbyname)
https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
sysctl/sysctlbyname returns an unsupported error if operation is not supported,
so we can safely use this on older versions.
- OpenBSD 7.1+ (through sysctl)
https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
sysctl returns an unsupported error if operation is not supported,
so we can safely use this on older versions.
For now, this module is only used on NetBSD/OpenBSD.
On Linux/Android/FreeBSD, we use auxv.rs and this module is test-only because:
- On Linux/Android, this approach requires a higher kernel version than Rust supports,
and also does not work with qemu-user (as of 7.2) and Valgrind (as of 3.24).
(Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it
to false correctly, but qemu-user is setting it to true.)
- qemu-user issue seem to be fixed as of 9.2.
- On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on
FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14).
*/
include!("common.rs");
#[cfg_attr(test, derive(Debug, PartialEq))]
struct AA64Reg {
aa64isar0: u64,
aa64isar1: u64,
#[cfg(test)]
aa64isar3: u64,
aa64mmfr2: u64,
}
#[cold]
fn _detect(info: &mut CpuInfo) {
let AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3,
aa64mmfr2,
} = imp::aa64reg();
// ID_AA64ISAR0_EL1, AArch64 Instruction Set Attribute Register 0
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0
// Atomic, bits [23:20]
// > FEAT_LSE implements the functionality identified by the value 0b0010.
// > FEAT_LSE128 implements the functionality identified by the value 0b0011.
// > From Armv8.1, the value 0b0000 is not permitted.
let atomic = extract(aa64isar0, 23, 20);
if atomic >= 0b0010 {
info.set(CpuInfoFlag::lse);
if atomic >= 0b0011 {
info.set(CpuInfoFlag::lse128);
}
}
// ID_AA64ISAR1_EL1, AArch64 Instruction Set Attribute Register 1
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1
// LRCPC, bits [23:20]
// > FEAT_LRCPC implements the functionality identified by the value 0b0001.
// > FEAT_LRCPC2 implements the functionality identified by the value 0b0010.
// > FEAT_LRCPC3 implements the functionality identified by the value 0b0011.
// > From Armv8.3, the value 0b0000 is not permitted.
// > From Armv8.4, the value 0b0001 is not permitted.
if extract(aa64isar1, 23, 20) >= 0b0011 {
info.set(CpuInfoFlag::rcpc3);
}
// ID_AA64ISAR3_EL1, AArch64 Instruction Set Attribute Register 3
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64ISAR3-EL1--AArch64-Instruction-Set-Attribute-Register-3
// LSFE, bits [19:16]
// > FEAT_LSFE implements the functionality identified by the value 0b0001
#[cfg(test)]
if extract(aa64isar3, 19, 16) >= 0b0001 {
info.set(CpuInfoFlag::lsfe);
}
// ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
// https://developer.arm.com/documentation/ddi0601/2024-12/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2
// AT, bits [35:32]
// > FEAT_LSE2 implements the functionality identified by the value 0b0001.
// > From Armv8.4, the value 0b0000 is not permitted.
if extract(aa64mmfr2, 35, 32) >= 0b0001 {
info.set(CpuInfoFlag::lse2);
}
}
fn extract(x: u64, high: usize, low: usize) -> u64 {
(x >> low) & ((1 << (high - low + 1)) - 1)
}
#[cfg(not(any(target_os = "netbsd", target_os = "openbsd")))]
mod imp {
// This module is test-only. See parent module docs for details.
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use super::AA64Reg;
pub(super) fn aa64reg() -> AA64Reg {
// SAFETY: This is safe on FreeBSD 12.0+. FreeBSD 11 was EoL on 2021-09-30.
// Note that stdarch has been doing the same thing since before FreeBSD 11 was EoL.
// https://github.com/rust-lang/stdarch/pull/611
unsafe {
let aa64isar0: u64;
asm!(
"mrs {}, ID_AA64ISAR0_EL1",
out(reg) aa64isar0,
options(pure, nomem, nostack, preserves_flags),
);
let aa64isar1: u64;
asm!(
"mrs {}, ID_AA64ISAR1_EL1",
out(reg) aa64isar1,
options(pure, nomem, nostack, preserves_flags),
);
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
let aa64isar3: u64;
// ID_AA64ISAR3_EL1 is only recognized on LLVM 18+.
// https://github.com/llvm/llvm-project/commit/17baba9fa2728b1b1134f9dccb9318debd5a9a1b
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
asm!(
"mrs {}, ID_AA64ISAR3_EL1",
out(reg) aa64isar3,
options(pure, nomem, nostack, preserves_flags),
);
let aa64mmfr2: u64;
asm!(
"mrs {}, ID_AA64MMFR2_EL1",
out(reg) aa64mmfr2,
options(pure, nomem, nostack, preserves_flags),
);
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
#[cfg(not(portable_atomic_pre_llvm_18))]
aa64isar3,
#[cfg(test)]
#[cfg(portable_atomic_pre_llvm_18)]
aa64isar3: 0,
aa64mmfr2,
}
}
}
}
#[cfg(target_os = "netbsd")]
mod imp {
// NetBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
// https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
// https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
use core::{mem, ptr};
use super::AA64Reg;
// libc requires Rust 1.63
#[allow(non_camel_case_types)]
pub(super) mod ffi {
pub(crate) use crate::utils::ffi::{CStr, c_char, c_int, c_size_t, c_void};
sys_struct!({
// Defined in machine/armreg.h.
// https://github.com/NetBSD/src/blob/432a1357026b10c184d8a0ddb683008a23cc7cd9/sys/arch/aarch64/include/armreg.h#L1863
pub(crate) struct aarch64_sysctl_cpu_id {
// NetBSD 9.0+
// https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
pub(crate) ac_midr: u64,
pub(crate) ac_revidr: u64,
pub(crate) ac_mpidr: u64,
pub(crate) ac_aa64dfr0: u64,
pub(crate) ac_aa64dfr1: u64,
pub(crate) ac_aa64isar0: u64,
pub(crate) ac_aa64isar1: u64,
pub(crate) ac_aa64mmfr0: u64,
pub(crate) ac_aa64mmfr1: u64,
pub(crate) ac_aa64mmfr2: u64,
pub(crate) ac_aa64pfr0: u64,
pub(crate) ac_aa64pfr1: u64,
pub(crate) ac_aa64zfr0: u64,
pub(crate) ac_mvfr0: u32,
pub(crate) ac_mvfr1: u32,
pub(crate) ac_mvfr2: u32,
// NetBSD 10.0+
// https://github.com/NetBSD/src/commit/0c7bdc13f0e332cccec56e307f023b4888638973
pub(crate) ac_pad: u32,
pub(crate) ac_clidr: u64,
pub(crate) ac_ctr: u64,
}
});
sys_fn!({
extern "C" {
// Defined in sys/sysctl.h.
// https://man.netbsd.org/sysctl.3
// https://github.com/NetBSD/src/blob/432a1357026b10c184d8a0ddb683008a23cc7cd9/sys/sys/sysctl.h
pub(crate) fn sysctlbyname(
name: *const c_char,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *const c_void,
new_len: c_size_t,
) -> c_int;
}
});
}
pub(super) fn sysctl_cpu_id(name: &ffi::CStr) -> Option<AA64Reg> {
const OUT_LEN: ffi::c_size_t =
mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
// SAFETY: all fields of aarch64_sysctl_cpu_id are zero-able and we use
// the result when machdep.cpuN.cpu_id sysctl was successful.
let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { mem::zeroed() };
let mut out_len = OUT_LEN;
// SAFETY:
// - `name` a valid C string.
// - `out_len` does not exceed the size of the value at `buf`.
// - `sysctlbyname` is thread-safe.
let res = unsafe {
ffi::sysctlbyname(
name.as_ptr(),
(&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
&mut out_len,
ptr::null_mut(),
0,
)
};
if res != 0 {
return None;
}
Some(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
pub(super) fn aa64reg() -> AA64Reg {
// Get system registers for cpu0.
// If failed, returns default because machdep.cpuN.cpu_id sysctl is not available.
// machdep.cpuN.cpu_id sysctl was added in NetBSD 9.0 so it is not available on older versions.
// It is ok to check only cpu0, even if there are more CPUs.
// https://github.com/NetBSD/src/commit/bd9707e06ea7d21b5c24df6dfc14cb37c2819416
// https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
match sysctl_cpu_id(c!("machdep.cpu0.cpu_id")) {
Some(cpu_id) => cpu_id,
None => AA64Reg {
aa64isar0: 0,
aa64isar1: 0,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2: 0,
},
}
}
}
#[cfg(target_os = "openbsd")]
mod imp {
// OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
// https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
// https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925
use core::{mem, ptr};
use super::AA64Reg;
// libc requires Rust 1.63
pub(super) mod ffi {
pub(crate) use crate::utils::ffi::{c_int, c_size_t, c_uint, c_void};
sys_const!({
// Defined in sys/sysctl.h.
// https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/sys/sysctl.h#L82
pub(crate) const CTL_MACHDEP: c_int = 7;
// Defined in machine/cpu.h.
// https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/arch/arm64/include/cpu.h#L25-L40
// OpenBSD 7.1+
// https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
pub(crate) const CPU_ID_AA64ISAR0: c_int = 2;
pub(crate) const CPU_ID_AA64ISAR1: c_int = 3;
// OpenBSD 7.3+
// https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c
// However, on OpenBSD 7.3-7.5, querying CPU_ID_AA64MMFR2 always returns 0.
// https://github.com/openbsd/src/commit/e8331b74e5c20302d4bd948c9db722af688ccfc1
pub(crate) const CPU_ID_AA64MMFR2: c_int = 7;
});
sys_fn!({
extern "C" {
// Defined in sys/sysctl.h.
// https://man.openbsd.org/sysctl.2
// https://github.com/openbsd/src/blob/ed8f5e8d82ace15e4cefca2c82941b15cb1a7830/sys/sys/sysctl.h
pub(crate) fn sysctl(
name: *const c_int,
name_len: c_uint,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *mut c_void,
new_len: c_size_t,
) -> c_int;
}
});
}
// sysctl returns an unsupported error if operation is not supported,
// so we can safely use this function on older versions of OpenBSD.
pub(super) fn aa64reg() -> AA64Reg {
let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
AA64Reg {
aa64isar0,
aa64isar1,
#[cfg(test)]
aa64isar3: 0,
aa64mmfr2,
}
}
fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
const OUT_LEN: ffi::c_size_t = mem::size_of::<u64>() as ffi::c_size_t;
let mut out = 0_u64;
let mut out_len = OUT_LEN;
#[allow(clippy::cast_possible_truncation)]
let mib_len = mib.len() as ffi::c_uint;
// SAFETY:
// - `mib.len()` does not exceed the size of `mib`.
// - `out_len` does not exceed the size of `out`.
// - `sysctl` is thread-safe.
let res = unsafe {
ffi::sysctl(
mib.as_ptr(),
mib_len,
(&mut out as *mut u64).cast::<ffi::c_void>(),
&mut out_len,
ptr::null_mut(),
0,
)
};
if res == -1 {
return None;
}
debug_assert_eq!(out_len, OUT_LEN);
Some(out)
}
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg_attr(portable_atomic_test_detect_false, ignore = "detection disabled")]
fn test_aa64reg() {
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
test_helper::eprintln_nocapture!(
"aa64isar0={},aa64isar1={},aa64isar3={},aa64mmfr2={}",
aa64isar0,
aa64isar1,
aa64isar3,
aa64mmfr2,
);
let atomic = extract(aa64isar0, 23, 20);
if detect().lse() {
if detect().lse128() {
assert_eq!(atomic, 0b0011);
} else {
assert_eq!(atomic, 0b0010);
}
} else {
assert_eq!(atomic, 0b0000);
}
let lrcpc = extract(aa64isar1, 23, 20);
if detect().rcpc3() {
assert_eq!(lrcpc, 0b0011);
} else {
assert!(lrcpc < 0b0011, "{}", lrcpc);
}
let lsfe = extract(aa64isar3, 19, 16);
if detect().lsfe() {
assert_eq!(lsfe, 0b0001);
} else {
assert_eq!(lsfe, 0b0000);
}
let at = extract(aa64mmfr2, 35, 32);
if detect().lse2() {
assert_eq!(at, 0b0001);
} else {
assert_eq!(at, 0b0000);
}
}
#[allow(clippy::cast_possible_wrap)]
#[cfg(target_os = "netbsd")]
#[test]
fn test_alternative() {
use crate::utils::ffi::*;
use imp::ffi;
#[cfg(not(portable_atomic_no_asm))]
use std::arch::asm;
use std::{mem, ptr, vec, vec::Vec};
use test_helper::sys;
// Call syscall using asm instead of libc.
// Note that NetBSD does not guarantee the stability of raw syscall as
// much as Linux does (It may actually be stable enough, though: https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html).
//
// This is currently used only for testing.
fn sysctl_cpu_id_no_libc(name: &[&[u8]]) -> Result<AA64Reg, c_int> {
// https://github.com/golang/go/blob/go1.24.0/src/syscall/asm_netbsd_arm64.s
#[inline]
unsafe fn sysctl(
name: *const c_int,
name_len: c_uint,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *const c_void,
new_len: c_size_t,
) -> Result<c_int, c_int> {
// SAFETY: the caller must uphold the safety contract.
unsafe {
let mut n = sys::SYS___sysctl as u64;
let r: i64;
asm!(
"svc 0",
"b.cc 2f",
"mov x17, x0",
"mov x0, #-1",
"2:",
inout("x17") n,
inout("x0") ptr_reg!(name) => r,
inout("x1") name_len as u64 => _,
in("x2") ptr_reg!(old_p),
in("x3") ptr_reg!(old_len_p),
in("x4") ptr_reg!(new_p),
in("x5") new_len as u64,
options(nostack),
);
#[allow(clippy::cast_possible_truncation)]
if r as c_int == -1 { Err(n as c_int) } else { Ok(r as c_int) }
}
}
// https://github.com/golang/sys/blob/v0.31.0/cpu/cpu_netbsd_arm64.go
fn sysctl_nodes(mib: &mut Vec<i32>) -> Result<Vec<sys::sysctlnode>, i32> {
mib.push(sys::CTL_QUERY);
let mut q_node = sys::sysctlnode {
sysctl_flags: sys::SYSCTL_VERS_1,
..unsafe { mem::zeroed() }
};
let qp = (&mut q_node as *mut sys::sysctlnode).cast::<ffi::c_void>();
let sz = mem::size_of::<sys::sysctlnode>();
let mut olen = 0;
#[allow(clippy::cast_possible_truncation)]
let mib_len = mib.len() as c_uint;
unsafe {
sysctl(mib.as_ptr(), mib_len, ptr::null_mut(), &mut olen, qp, sz)?;
}
let mut nodes = Vec::<sys::sysctlnode>::with_capacity(olen / sz);
let np = nodes.as_mut_ptr().cast::<ffi::c_void>();
unsafe {
sysctl(mib.as_ptr(), mib_len, np, &mut olen, qp, sz)?;
nodes.set_len(olen / sz);
}
mib.pop(); // pop CTL_QUERY
Ok(nodes)
}
fn name_to_mib(parts: &[&[u8]]) -> Result<Vec<i32>, i32> {
let mut mib = vec![];
for (part_no, &part) in parts.iter().enumerate() {
let nodes = sysctl_nodes(&mut mib)?;
for node in nodes {
let mut n = vec![];
for b in node.sysctl_name {
if b != 0 {
n.push(b);
}
}
if n == part {
mib.push(node.sysctl_num);
break;
}
}
if mib.len() != part_no + 1 {
return Err(0);
}
}
Ok(mib)
}
const OUT_LEN: ffi::c_size_t =
mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
let mib = name_to_mib(name)?;
let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { mem::zeroed() };
let mut out_len = OUT_LEN;
#[allow(clippy::cast_possible_truncation)]
let mib_len = mib.len() as c_uint;
unsafe {
sysctl(
mib.as_ptr(),
mib_len,
(&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
&mut out_len,
ptr::null_mut(),
0,
)?;
}
Ok(AA64Reg {
aa64isar0: buf.ac_aa64isar0,
aa64isar1: buf.ac_aa64isar1,
aa64isar3: 0,
aa64mmfr2: buf.ac_aa64mmfr2,
})
}
assert_eq!(
imp::sysctl_cpu_id(c!("machdep.cpu0.cpu_id")).unwrap(),
sysctl_cpu_id_no_libc(&[b"machdep", b"cpu0", b"cpu_id"]).unwrap()
);
}
#[cfg(target_os = "openbsd")]
#[test]
fn test_alternative() {
use std::{format, process::Command, string::String};
// Call sysctl command instead of libc API.
//
// This is used only for testing.
struct SysctlMachdepOutput(String);
impl SysctlMachdepOutput {
fn new() -> Self {
let output = Command::new("sysctl").arg("machdep").output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8(output.stdout).unwrap();
Self(stdout)
}
fn field(&self, name: &str) -> Option<u64> {
Some(
self.0
.lines()
.find_map(|s| s.strip_prefix(&format!("{}=", name)))?
.parse()
.unwrap(),
)
}
}
let AA64Reg { aa64isar0, aa64isar1, aa64isar3, aa64mmfr2 } = imp::aa64reg();
let sysctl_output = SysctlMachdepOutput::new();
assert_eq!(aa64isar0, sysctl_output.field("machdep.id_aa64isar0").unwrap_or(0));
assert_eq!(aa64isar1, sysctl_output.field("machdep.id_aa64isar1").unwrap_or(0));
assert_eq!(aa64isar3, sysctl_output.field("machdep.id_aa64isar3").unwrap_or(0));
assert_eq!(aa64mmfr2, sysctl_output.field("machdep.id_aa64mmfr2").unwrap_or(0));
}
}

View File

@@ -0,0 +1,285 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on AArch64 Apple targets by using sysctlbyname.
On macOS, this module is currently only enabled on tests because there are no
instructions that were not available on the M1 but are now available on the
latest Apple hardware and this library currently wants to use:
```console
$ comm -23 <(rustc --print cfg --target aarch64-apple-darwin -C target-cpu=apple-m4 | grep -F target_feature) <(rustc --print cfg --target aarch64-apple-darwin | grep -F target_feature)
target_feature="bf16"
target_feature="bti"
target_feature="ecv"
target_feature="i8mm"
target_feature="sme"
target_feature="sme-f64f64"
target_feature="sme-i16i64"
target_feature="sme2"
target_feature="v8.5a"
target_feature="v8.6a"
target_feature="v8.7a"
target_feature="wfxt"
```
Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
TODO: non-macOS targets doesn't always supports FEAT_LSE2, but sysctl on them on the App Store is...?
- https://developer.apple.com/forums/thread/9440
- https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy
- https://github.com/rust-lang/stdarch/pull/1636
*/
include!("common.rs");
use core::{mem, ptr};
// libc requires Rust 1.63
mod ffi {
pub(crate) use crate::utils::ffi::{CStr, c_char, c_int, c_size_t, c_void};
sys_fn!({
extern "C" {
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname
// https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/bsd/sys/sysctl.h
pub(crate) fn sysctlbyname(
name: *const c_char,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *mut c_void,
new_len: c_size_t,
) -> c_int;
}
});
}
fn sysctlbyname32(name: &ffi::CStr) -> Option<u32> {
const OUT_LEN: ffi::c_size_t = mem::size_of::<u32>() as ffi::c_size_t;
let mut out = 0_u32;
let mut out_len = OUT_LEN;
// SAFETY:
// - `name` a valid C string.
// - `out_len` does not exceed the size of `out`.
// - `sysctlbyname` is thread-safe.
let res = unsafe {
ffi::sysctlbyname(
name.as_ptr(),
(&mut out as *mut u32).cast::<ffi::c_void>(),
&mut out_len,
ptr::null_mut(),
0,
)
};
if res != 0 {
return None;
}
debug_assert_eq!(out_len, OUT_LEN);
Some(out)
}
#[cold]
fn _detect(info: &mut CpuInfo) {
macro_rules! check {
($flag:ident, $($name:tt) ||+) => {
if $(sysctlbyname32(c!($name)).unwrap_or(0) != 0) ||+ {
info.set(CpuInfoFlag::$flag);
}
};
}
// On macOS, AArch64 support was added in macOS 11,
// hw.optional.armv8_1_atomics is available on macOS 11+,
// hw.optional.arm.FEAT_* are only available on macOS 12+.
// Query both names in case future versions of macOS remove the old name.
// https://github.com/golang/go/commit/c15593197453b8bf90fc3a9080ba2afeaf7934ea
// https://github.com/google/boringssl/commit/91e0b11eba517d83b910b20fe3740eeb39ecb37e
check!(lse, "hw.optional.arm.FEAT_LSE" || "hw.optional.armv8_1_atomics");
check!(lse2, "hw.optional.arm.FEAT_LSE2");
check!(lse128, "hw.optional.arm.FEAT_LSE128");
#[cfg(test)]
check!(lsfe, "hw.optional.arm.FEAT_LSFE");
check!(rcpc3, "hw.optional.arm.FEAT_LRCPC3");
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use std::{format, process::Command, str, string::String};
use super::*;
#[test]
fn test_alternative() {
use crate::utils::ffi::*;
// Call syscall using asm instead of libc.
// Note that macOS does not guarantee the stability of raw syscall.
// (And they actually changed it: https://go-review.googlesource.com/c/go/+/25495)
//
// This is currently used only for testing.
#[cfg(target_pointer_width = "64")]
fn sysctlbyname32_no_libc(name: &CStr) -> Result<u32, c_int> {
#[cfg(not(portable_atomic_no_asm))]
use std::arch::asm;
use std::mem;
use test_helper::sys;
// https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/bsd/kern/syscalls.master#L298
#[inline]
unsafe fn sysctl(
name: *const c_int,
name_len: c_uint,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *const c_void,
new_len: c_size_t,
) -> Result<c_int, c_int> {
// https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/osfmk/mach/i386/syscall_sw.h#L158
#[inline]
const fn syscall_construct_unix(n: u64) -> u64 {
const SYSCALL_CLASS_UNIX: u64 = 2;
const SYSCALL_CLASS_SHIFT: u64 = 24;
const SYSCALL_CLASS_MASK: u64 = 0xFF << SYSCALL_CLASS_SHIFT;
const SYSCALL_NUMBER_MASK: u64 = !SYSCALL_CLASS_MASK;
(SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | (SYSCALL_NUMBER_MASK & n)
}
#[allow(clippy::cast_possible_truncation)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
// https://github.com/apple-oss-distributions/xnu/blob/8d741a5de7ff4191bf97d57b9f54c2f6d4a15585/bsd/kern/syscalls.master#L4
let mut n = syscall_construct_unix(202);
let r: i64;
asm!(
"svc 0",
"b.cc 2f",
"mov x16, x0",
"mov x0, #-1",
"2:",
inout("x16") n,
inout("x0") ptr_reg!(name) => r,
inout("x1") name_len as u64 => _,
in("x2") ptr_reg!(old_p),
in("x3") ptr_reg!(old_len_p),
in("x4") ptr_reg!(new_p),
in("x5") new_len as u64,
options(nostack),
);
if r as c_int == -1 { Err(n as c_int) } else { Ok(r as c_int) }
}
}
// https://github.com/apple-oss-distributions/Libc/blob/af11da5ca9d527ea2f48bb7efbd0f0f2a4ea4812/gen/FreeBSD/sysctlbyname.c
unsafe fn sysctlbyname(
name: &CStr,
old_p: *mut c_void,
old_len_p: *mut c_size_t,
new_p: *mut c_void,
new_len: c_size_t,
) -> Result<c_int, c_int> {
let mut real_oid: [c_int; sys::CTL_MAXNAME as usize + 2] = unsafe { mem::zeroed() };
// Note that this is undocumented API.
// Although FreeBSD defined it in sys/sysctl.h since https://github.com/freebsd/freebsd-src/commit/382e01c8dc7f328f46c61c82a29222f432f510f7
let mut name2oid_oid: [c_int; 2] = [0, 3];
let mut oid_len = mem::size_of_val(&real_oid);
unsafe {
sysctl(
name2oid_oid.as_mut_ptr(),
2,
real_oid.as_mut_ptr().cast::<c_void>(),
&mut oid_len,
name.as_ptr().cast::<c_void>() as *mut c_void,
name.to_bytes_with_nul().len() - 1,
)?;
}
oid_len /= mem::size_of::<c_int>();
#[allow(clippy::cast_possible_truncation)]
unsafe {
sysctl(real_oid.as_mut_ptr(), oid_len as u32, old_p, old_len_p, new_p, new_len)
}
}
const OUT_LEN: ffi::c_size_t = mem::size_of::<u32>() as ffi::c_size_t;
let mut out = 0_u32;
let mut out_len = OUT_LEN;
// SAFETY:
// - `out_len` does not exceed the size of `out`.
// - `sysctlbyname` is thread-safe.
let res = unsafe {
sysctlbyname(
name,
(&mut out as *mut u32).cast::<ffi::c_void>(),
&mut out_len,
ptr::null_mut(),
0,
)?
};
debug_assert_eq!(res, 0);
debug_assert_eq!(out_len, OUT_LEN);
Ok(out)
}
// Call sysctl command instead of libc API.
//
// This is used only for testing.
struct SysctlHwOptionalOutput(String);
impl SysctlHwOptionalOutput {
fn new() -> Self {
let output = Command::new("sysctl").arg("hw.optional").output().unwrap();
assert!(output.status.success());
let stdout = String::from_utf8(output.stdout).unwrap();
test_helper::eprintln_nocapture!("sysctl hw.optional:\n{}", stdout);
Self(stdout)
}
fn field(&self, name: &CStr) -> Option<u32> {
let name = name.to_bytes_with_nul();
let name = str::from_utf8(&name[..name.len() - 1]).unwrap();
Some(
self.0
.lines()
.find_map(|s| s.strip_prefix(&format!("{}: ", name)))?
.parse()
.unwrap(),
)
}
}
let sysctl_output = SysctlHwOptionalOutput::new();
for (name, expected_on_macos) in [
(c!("hw.optional.arm.FEAT_LSE"), Some(1)),
(c!("hw.optional.armv8_1_atomics"), Some(1)),
(c!("hw.optional.arm.FEAT_LSE2"), Some(1)),
(c!("hw.optional.arm.FEAT_LSE128"), None),
(c!("hw.optional.arm.FEAT_LSFE"), None),
(c!("hw.optional.arm.FEAT_LRCPC"), Some(1)),
(c!("hw.optional.arm.FEAT_LRCPC2"), Some(1)),
(c!("hw.optional.arm.FEAT_LRCPC3"), None),
] {
let res = sysctlbyname32(name);
if res.is_none() {
assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
}
if cfg!(any(target_os = "macos", target_abi = "macabi")) {
assert_eq!(res, expected_on_macos);
}
if let Some(res) = res {
#[cfg(target_pointer_width = "64")]
assert_eq!(res, sysctlbyname32_no_libc(name).unwrap());
assert_eq!(res, sysctl_output.field(name).unwrap());
} else {
#[cfg(target_pointer_width = "64")]
assert_eq!(sysctlbyname32_no_libc(name).unwrap_err(), libc::ENOENT);
assert!(sysctl_output.field(name).is_none());
}
}
}
}

View File

@@ -0,0 +1,83 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on AArch64 Fuchsia by using zx_system_get_features.
As of nightly-2024-09-07, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia.
https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs
Refs:
- https://fuchsia.dev/reference/syscalls/system_get_features
- https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4
*/
include!("common.rs");
#[allow(non_camel_case_types)]
mod ffi {
sys_type!({
// https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/types.h
pub(crate) type zx_status_t = i32;
});
sys_const!({
// https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/errors.h
pub(crate) const ZX_OK: zx_status_t = 0;
// https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/features.h
pub(crate) const ZX_FEATURE_KIND_CPU: u32 = 0;
pub(crate) const ZX_ARM64_FEATURE_ISA_ATOMICS: u32 = 1 << 8;
});
// TODO: use sys_fn!
#[link(name = "zircon")]
extern "C" {
// https://fuchsia.dev/reference/syscalls/system_get_features
pub(crate) fn zx_system_get_features(kind: u32, features: *mut u32) -> zx_status_t;
}
}
fn zx_system_get_features(kind: u32) -> u32 {
let mut out = 0_u32;
// SAFETY: the pointer is valid because we got it from a reference.
let res = unsafe { ffi::zx_system_get_features(kind, &mut out) };
if res != ffi::ZX_OK {
return 0;
}
out
}
#[cold]
fn _detect(info: &mut CpuInfo) {
let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
macro_rules! check {
($flag:ident, $bit:ident) => {
if features & ffi::$bit != 0 {
info.set(CpuInfoFlag::$flag);
}
};
}
check!(lse, ZX_ARM64_FEATURE_ISA_ATOMICS);
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fuchsia() {
let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
test_helper::eprintln_nocapture!(
"zx_system_get_features(ZX_FEATURE_KIND_CPU): {:b}",
features
);
assert_ne!(features, 0);
}
}

View File

@@ -0,0 +1,55 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on AArch64 illumos by using getisax.
As of nightly-2024-09-07, is_aarch64_feature_detected doesn't support run-time detection on illumos.
https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs
Run-time detection on AArch64 illumos is currently disabled by default as experimental
because illumos AArch64 port is experimental and we cannot run tests on the VM or real machine.
*/
include!("common.rs");
// libc requires Rust 1.63
mod ffi {
pub(crate) use crate::utils::ffi::c_uint;
sys_const!({
// Defined in sys/auxv_aarch64.h.
// https://github.com/richlowe/illumos-gate/blob/arm64-gate/usr/src/uts/common/sys/auxv_aarch64.h
pub(crate) const AV_AARCH64_LSE: u32 = 1 << 15;
pub(crate) const AV_AARCH64_2_LSE2: u32 = 1 << 2;
});
sys_fn!({
extern "C" {
// Defined in sys/auxv.h.
// https://illumos.org/man/2/getisax
// https://github.com/richlowe/illumos-gate/blob/arm64-gate/usr/src/uts/common/sys/auxv.h
pub(crate) fn getisax(array: *mut u32, n: c_uint) -> c_uint;
}
});
}
#[cold]
fn _detect(info: &mut CpuInfo) {
const OUT_LEN: ffi::c_uint = 2;
let mut out = [0_u32; OUT_LEN as usize];
// SAFETY: the pointer is valid because we got it from a reference.
unsafe {
ffi::getisax(out.as_mut_ptr(), OUT_LEN);
}
macro_rules! check {
($x:ident, $flag:ident, $bit:ident) => {
if $x & ffi::$bit != 0 {
info.set(CpuInfoFlag::$flag);
}
};
}
let v1 = out[0];
check!(v1, lse, AV_AARCH64_LSE);
let v2 = out[1];
check!(v2, lse2, AV_AARCH64_2_LSE2);
}

View File

@@ -0,0 +1,52 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on AArch64 Windows by using IsProcessorFeaturePresent.
Run-time detection of FEAT_LSE on Windows by is_aarch64_feature_detected is supported on Rust 1.70+.
https://github.com/rust-lang/stdarch/pull/1373
Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
*/
include!("common.rs");
// windows-sys requires Rust 1.60
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
mod ffi {
sys_type!({
pub(crate) type [Win32::System::Threading] PROCESSOR_FEATURE_ID = u32;
pub(crate) type [Win32::Foundation] BOOL = i32;
});
sys_const!({
pub(crate) const [Win32::Foundation] FALSE: BOOL = 0;
// Defined in winnt.h of Windows SDK.
pub(crate) const [Win32::System::Threading]
PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: PROCESSOR_FEATURE_ID = 34;
});
sys_fn!({
extern "system" {
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
pub(crate) fn [Win32::System::Threading] IsProcessorFeaturePresent(
ProcessorFeature: PROCESSOR_FEATURE_ID,
) -> BOOL;
}
});
}
#[cold]
fn _detect(info: &mut CpuInfo) {
macro_rules! check {
($flag:ident, $bit:ident) => {
// SAFETY: calling IsProcessorFeaturePresent is safe, and FALSE is also
// returned if the HAL does not support detection of the specified feature.
if unsafe { ffi::IsProcessorFeaturePresent(ffi::$bit) != ffi::FALSE } {
info.set(CpuInfoFlag::$flag);
}
};
}
check!(lse, PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,234 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
#[derive(Clone, Copy)]
#[repr(transparent)]
pub(crate) struct CpuInfo(u32);
impl CpuInfo {
#[inline]
fn set(&mut self, bit: CpuInfoFlag) {
self.0 = set(self.0, bit as u32);
}
#[inline]
#[must_use]
fn test(self, bit: CpuInfoFlag) -> bool {
test(self.0, bit as u32)
}
}
#[inline]
#[must_use]
fn set(x: u32, bit: u32) -> u32 {
x | (1 << bit)
}
#[inline]
#[must_use]
fn test(x: u32, bit: u32) -> bool {
x & (1 << bit) != 0
}
#[inline]
pub(crate) fn detect() -> CpuInfo {
use core::sync::atomic::{AtomicU32, Ordering};
static CACHE: AtomicU32 = AtomicU32::new(0);
let mut info = CpuInfo(CACHE.load(Ordering::Relaxed));
if info.0 != 0 {
return info;
}
info.set(CpuInfoFlag::Init);
// Note: detect_false cfg is intended to make it easy for developers to test
// cases where features usually available is not available, and is not a public API.
if !cfg!(portable_atomic_test_detect_false) {
_detect(&mut info);
}
CACHE.store(info.0, Ordering::Relaxed);
info
}
macro_rules! flags {
($(
$(#[$attr:meta])*
$func:ident($name:literal, any($($cfg:ident),*)),
)*) => {
#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy)]
#[cfg_attr(test, derive(PartialEq, Eq, PartialOrd, Ord))]
#[repr(u32)]
enum CpuInfoFlag {
Init = 0,
$($func,)*
}
impl CpuInfo {
$(
$(#[$attr])*
#[cfg(any(test, not(any($($cfg = $name),*))))]
#[inline]
#[must_use]
pub(crate) fn $func(self) -> bool {
self.test(CpuInfoFlag::$func)
}
)*
#[cfg(test)] // for test
const ALL_FLAGS: &'static [(&'static str, CpuInfoFlag, bool)] = &[$(
($name, CpuInfoFlag::$func, cfg!(any($($cfg = $name),*))),
)*];
}
#[test]
#[cfg_attr(portable_atomic_test_detect_false, ignore = "detection disabled")]
fn test_detect() {$(
$(#[$attr])*
{
const _: u32 = 1_u32 << CpuInfoFlag::$func as u32;
assert_eq!($name.replace(|c: char| c == '-' || c == '.', "_"), stringify!($func));
if detect().$func() {
assert!(detect().test(CpuInfoFlag::$func));
} else {
assert!(!detect().test(CpuInfoFlag::$func));
}
}
)*}
};
}
// rustc definitions: https://github.com/rust-lang/rust/blob/e6af292f91f21f12ac1aab6825efb7e1e3381cbb/compiler/rustc_target/src/target_features.rs
// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/AArch64/AArch64Features.td
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
flags! {
// FEAT_LSE, Large System Extensions
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv8-1-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSE is OPTIONAL from Armv8.0.
// > FEAT_LSE is mandatory from Armv8.1.
lse("lse", any(target_feature /* 1.61+ */, portable_atomic_target_feature)),
// FEAT_LSE2, Large System Extensions version 2
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv8-4-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSE2 is OPTIONAL from Armv8.2.
// > FEAT_LSE2 is mandatory from Armv8.4.
#[cfg_attr(not(test), allow(dead_code))]
lse2("lse2", any(target_feature /* nightly */, portable_atomic_target_feature)),
// FEAT_LRCPC3, Load-Acquire RCpc instructions version 3
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv8-9-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LRCPC3 is OPTIONAL from Armv8.2.
// > If FEAT_LRCPC3 is implemented, then FEAT_LRCPC2 is implemented.
#[cfg_attr(not(test), allow(dead_code))]
rcpc3("rcpc3", any(target_feature /* nightly */, portable_atomic_target_feature)),
// FEAT_LSE128, 128-bit Atomics
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-4-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSE128 is OPTIONAL from Armv9.3.
// > If FEAT_LSE128 is implemented, then FEAT_LSE is implemented.
#[cfg_attr(not(test), allow(dead_code))]
lse128("lse128", any(target_feature /* nightly */, portable_atomic_target_feature)),
// FEAT_LSFE, Large System Float Extension
// https://developer.arm.com/documentation/109697/2024_12/Feature-descriptions/The-Armv9-6-architecture-extension
// > This feature is supported in AArch64 state only.
// > FEAT_LSFE is OPTIONAL from Armv9.3.
// > If FEAT_LSFE is implemented, then FEAT_FP is implemented.
#[cfg(test)]
lsfe("lsfe", any(target_feature /* N/A */, portable_atomic_target_feature)),
#[cfg(test)] // test-only
cpuid("cpuid", any(/* no corresponding target feature */)),
}
// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/PowerPC/PPC.td
#[cfg(target_arch = "powerpc64")]
flags! {
// lqarx and stqcx.
quadword_atomics("quadword-atomics", any(target_feature /* nightly */, portable_atomic_target_feature)),
}
// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/RISCV/RISCVFeatures.td
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
flags! {
// amocas.{w,d,q}
zacas("zacas", any(target_feature /* nightly */, portable_atomic_target_feature)),
}
// LLVM definitions: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/X86/X86.td
#[cfg(target_arch = "x86_64")]
flags! {
// cmpxchg16b
cmpxchg16b("cmpxchg16b", any(target_feature /* 1.69+ */, portable_atomic_target_feature)),
// atomic vmovdqa
#[cfg(target_feature = "sse")]
vmovdqa_atomic("vmovdqa-atomic", any(/* no corresponding target feature */)),
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests_common {
use std::{collections::BTreeSet, vec};
use super::*;
#[test]
fn test_bit_flags() {
let mut flags = vec![("init", CpuInfoFlag::Init)];
flags.extend(CpuInfo::ALL_FLAGS.iter().map(|&(name, flag, _)| (name, flag)));
let flag_set = flags.iter().map(|(_, flag)| flag).collect::<BTreeSet<_>>();
let name_set = flags.iter().map(|(_, flag)| flag).collect::<BTreeSet<_>>();
if flag_set.len() != flags.len() {
panic!("CpuInfo flag values must be unique")
}
if name_set.len() != flags.len() {
panic!("CpuInfo flag names must be unique")
}
let mut x = CpuInfo(0);
for &(_, f) in &flags {
assert!(!x.test(f));
}
for i in 0..flags.len() {
x.set(flags[i].1);
for &(_, f) in &flags[..i + 1] {
assert!(x.test(f));
}
for &(_, f) in &flags[i + 1..] {
assert!(!x.test(f));
}
}
for &(_, f) in &flags {
assert!(x.test(f));
}
}
#[test]
fn print_features() {
use std::{fmt::Write as _, string::String};
let mut features = String::new();
features.push_str("\nfeatures:\n");
for &(name, flag, compile_time) in CpuInfo::ALL_FLAGS {
let run_time = detect().test(flag);
if run_time == compile_time {
let _ = writeln!(features, " {}: {}", name, run_time);
} else {
let _ = writeln!(
features,
" {}: {} (compile-time), {} (run-time)",
name, compile_time, run_time
);
}
}
test_helper::eprintln_nocapture!("{}", features);
}
// Static assertions for C type definitions.
// Assertions with core::ffi types are in crate::utils::ffi module.
#[cfg(not(any(windows, target_arch = "x86", target_arch = "x86_64")))]
const _: fn() = || {
use test_helper::sys;
let _: crate::utils::ffi::c_char = 0 as sys::c_char;
};
}

View File

@@ -0,0 +1,55 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on PowerPC64 AIX by using getsystemcfg.
Refs:
- https://github.com/golang/go/blob/go1.24.0/src/internal/cpu/cpu_ppc64x_aix.go
As of nightly-2024-09-07, is_powerpc_feature_detected doesn't support run-time detection on AIX.
https://github.com/rust-lang/stdarch/blob/d9466edb4c53cece8686ee6e17b028436ddf4151/crates/std_detect/src/detect/mod.rs
Run-time detection on PowerPC64 AIX is currently disabled by default as experimental
because we cannot run tests on the VM or real machine.
*/
include!("common.rs");
// libc requires Rust 1.63
mod ffi {
pub(crate) use crate::utils::ffi::{c_int, c_ulong};
sys_const!({
// https://github.com/rust-lang/libc/blob/0.2.158/src/unix/aix/mod.rs#L2058
// https://github.com/golang/go/blob/go1.24.0/src/internal/cpu/cpu_ppc64x_aix.go
pub(crate) const SC_IMPL: c_int = 2;
pub(crate) const POWER_8: c_ulong = 0x10000;
pub(crate) const POWER_9: c_ulong = 0x20000;
});
// TODO: use sys_const! once libc crate defined it.
// https://github.com/golang/go/blob/go1.24.0/src/internal/cpu/cpu_ppc64x_aix.go
pub(crate) const POWER_10: c_ulong = 0x40000;
sys_fn!({
extern "C" {
// https://www.ibm.com/docs/en/aix/7.3?topic=g-getsystemcfg-subroutine
// https://github.com/rust-lang/libc/blob/0.2.158/src/unix/aix/powerpc64.rs#L643
pub(crate) fn getsystemcfg(name: c_int) -> c_ulong;
}
});
}
#[cold]
fn _detect(info: &mut CpuInfo) {
// SAFETY: calling getsystemcfg is safe.
let impl_ = unsafe { ffi::getsystemcfg(ffi::SC_IMPL) };
if impl_ == ffi::c_ulong::MAX {
return;
}
// Check both POWER_8 and later ISAs (which are superset of POWER_8) because
// AIX currently doesn't set POWER_8 when POWER_9 is set.
// https://github.com/golang/go/commit/51859ec2292d9c1d82a7054ec672ff551a0d7497
if impl_ & (ffi::POWER_8 | ffi::POWER_9 | ffi::POWER_10) != 0 {
info.set(CpuInfoFlag::quadword_atomics);
}
}

View File

@@ -0,0 +1,173 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on RISC-V Linux/Android by using riscv_hwprobe.
On RISC-V, detection using auxv only supports single-letter extensions.
So, we use riscv_hwprobe that supports multi-letter extensions.
Refs: https://github.com/torvalds/linux/blob/v6.13/Documentation/arch/riscv/hwprobe.rst
*/
include!("common.rs");
use core::ptr;
// libc requires Rust 1.63
#[allow(non_camel_case_types, non_upper_case_globals)]
mod ffi {
pub(crate) use crate::utils::ffi::{c_long, c_size_t, c_uint, c_ulong};
sys_struct!({
// https://github.com/torvalds/linux/blob/v6.13/arch/riscv/include/uapi/asm/hwprobe.h
pub(crate) struct riscv_hwprobe {
pub(crate) key: i64,
pub(crate) value: u64,
}
});
sys_const!({
pub(crate) const __NR_riscv_hwprobe: c_long = 258;
// https://github.com/torvalds/linux/blob/v6.13/arch/riscv/include/uapi/asm/hwprobe.h
pub(crate) const RISCV_HWPROBE_KEY_BASE_BEHAVIOR: i64 = 3;
pub(crate) const RISCV_HWPROBE_BASE_BEHAVIOR_IMA: u64 = 1 << 0;
pub(crate) const RISCV_HWPROBE_KEY_IMA_EXT_0: i64 = 4;
// Linux 6.8+
// https://github.com/torvalds/linux/commit/154a3706122978eeb34d8223d49285ed4f3c61fa
pub(crate) const RISCV_HWPROBE_EXT_ZACAS: u64 = 1 << 34;
});
#[cfg(not(all(
target_os = "linux",
any(target_arch = "riscv32", all(target_arch = "riscv64", target_pointer_width = "64")),
)))]
sys_fn!({
extern "C" {
// https://man7.org/linux/man-pages/man2/syscall.2.html
pub(crate) fn syscall(number: c_long, ...) -> c_long;
}
});
// Use asm-based syscall for compatibility with non-libc targets if possible.
#[cfg(all(
target_os = "linux", // https://github.com/bytecodealliance/rustix/issues/1095
any(target_arch = "riscv32", all(target_arch = "riscv64", target_pointer_width = "64")),
))]
#[inline]
pub(crate) unsafe fn syscall(
number: c_long,
a0: *mut riscv_hwprobe,
a1: c_size_t,
a2: c_size_t,
a3: *mut c_ulong,
a4: c_uint,
) -> c_long {
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
// arguments must be extended to 64-bit if RV64
let a4 = a4 as usize;
let r;
// SAFETY: the caller must uphold the safety contract.
// Refs:
// - https://github.com/bminor/musl/blob/v1.2.5/arch/riscv32/syscall_arch.h
// - https://github.com/bminor/musl/blob/v1.2.5/arch/riscv64/syscall_arch.h
unsafe {
asm!(
"ecall",
in("a7") number,
inout("a0") a0 => r,
in("a1") a1,
in("a2") a2,
in("a3") a3,
in("a4") a4,
options(nostack, preserves_flags)
);
}
r
}
// https://github.com/torvalds/linux/blob/v6.13/Documentation/arch/riscv/hwprobe.rst
pub(crate) unsafe fn __riscv_hwprobe(
pairs: *mut riscv_hwprobe,
pair_count: c_size_t,
cpu_set_size: c_size_t,
cpus: *mut c_ulong,
flags: c_uint,
) -> c_long {
// SAFETY: the caller must uphold the safety contract.
unsafe { syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_set_size, cpus, flags) }
}
}
// syscall returns an unsupported error if riscv_hwprobe is not supported,
// so we can safely use this function on older versions of Linux.
fn riscv_hwprobe(out: &mut [ffi::riscv_hwprobe]) -> bool {
let len = out.len();
// SAFETY: We've passed the valid pointer and length,
// passing null ptr for cpus is safe because cpu_set_size is zero.
unsafe { ffi::__riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 }
}
#[cold]
fn _detect(info: &mut CpuInfo) {
let mut out = [
ffi::riscv_hwprobe { key: ffi::RISCV_HWPROBE_KEY_BASE_BEHAVIOR, value: 0 },
ffi::riscv_hwprobe { key: ffi::RISCV_HWPROBE_KEY_IMA_EXT_0, value: 0 },
];
if riscv_hwprobe(&mut out)
&& out[0].key != -1
&& out[0].value & ffi::RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0
&& out[1].key != -1
{
let value = out[1].value;
macro_rules! check {
($flag:ident, $bit:ident) => {
if value & ffi::$bit != 0 {
info.set(CpuInfoFlag::$flag);
}
};
}
check!(zacas, RISCV_HWPROBE_EXT_ZACAS);
}
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use super::*;
// We use asm-based syscall for compatibility with non-libc targets.
// This test tests that our ones and libc::syscall returns the same result.
#[test]
fn test_alternative() {
unsafe fn __riscv_hwprobe_libc(
pairs: *mut ffi::riscv_hwprobe,
pair_count: ffi::c_size_t,
cpu_set_size: ffi::c_size_t,
cpus: *mut ffi::c_ulong,
flags: ffi::c_uint,
) -> ffi::c_long {
// SAFETY: the caller must uphold the safety contract.
unsafe {
libc::syscall(ffi::__NR_riscv_hwprobe, pairs, pair_count, cpu_set_size, cpus, flags)
}
}
fn riscv_hwprobe_libc(out: &mut [ffi::riscv_hwprobe]) -> bool {
let len = out.len();
unsafe { __riscv_hwprobe_libc(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 }
}
let mut out = [
ffi::riscv_hwprobe { key: ffi::RISCV_HWPROBE_KEY_BASE_BEHAVIOR, value: 0 },
ffi::riscv_hwprobe { key: ffi::RISCV_HWPROBE_KEY_IMA_EXT_0, value: 0 },
];
let mut libc_out = out;
assert_eq!(riscv_hwprobe(&mut out), riscv_hwprobe_libc(&mut libc_out));
assert_eq!(out, libc_out);
}
}

View File

@@ -0,0 +1,157 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Run-time CPU feature detection on x86_64 by using CPUID.
Adapted from https://github.com/rust-lang/stdarch.
*/
#![cfg_attr(portable_atomic_sanitize_thread, allow(dead_code))]
// Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932
// SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105
#[cfg(any(target_env = "sgx", miri))]
compile_error!("internal error: this module is not supported on this environment");
include!("common.rs");
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::arch::x86_64::CpuidResult;
// Workaround for https://github.com/rust-lang/rust/issues/101346
// It is not clear if our use cases are affected, but we implement this just in case.
//
// Refs:
// - https://www.felixcloutier.com/x86/cpuid
// - https://en.wikipedia.org/wiki/CPUID
// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs
#[cfg(not(target_env = "sgx"))]
fn __cpuid(leaf: u32) -> CpuidResult {
let eax;
let mut ebx;
let ecx;
let edx;
// SAFETY: Calling `__cpuid` is safe on all x86_64 CPUs except for SGX,
// which doesn't support `cpuid`.
// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L109
unsafe {
asm!(
"mov {ebx_tmp:r}, rbx", // save rbx which is reserved by LLVM
"cpuid",
"xchg {ebx_tmp:r}, rbx", // restore rbx
ebx_tmp = out(reg) ebx,
inout("eax") leaf => eax,
inout("ecx") 0 => ecx,
out("edx") edx,
options(nostack, preserves_flags),
);
}
CpuidResult { eax, ebx, ecx, edx }
}
// https://en.wikipedia.org/wiki/CPUID
const _VENDOR_ID_INTEL: [u32; 3] = _vender(b"GenuineIntel"); // Intel
const _VENDOR_ID_INTEL2: [u32; 3] = _vender(b"GenuineIotel"); // Intel https://github.com/InstLatx64/InstLatx64/commit/8fdd319884c67d2c6ec1ca0c595b42c1c4b8d803
const _VENDOR_ID_AMD: [u32; 3] = _vender(b"AuthenticAMD"); // AMD
const _VENDOR_ID_CENTAUR: [u32; 3] = _vender(b"CentaurHauls"); // Centaur/VIA/Zhaoxin
const _VENDOR_ID_ZHAOXIN: [u32; 3] = _vender(b" Shanghai "); // Zhaoxin
const fn _vender(b: &[u8; 12]) -> [u32; 3] {
[
u32::from_ne_bytes([b[0], b[1], b[2], b[3]]),
u32::from_ne_bytes([b[4], b[5], b[6], b[7]]),
u32::from_ne_bytes([b[8], b[9], b[10], b[11]]),
]
}
fn _vendor_id() -> [u32; 3] {
let CpuidResult { ebx, ecx, edx, .. } = __cpuid(0);
[ebx, edx, ecx]
}
fn _vendor_has_vmovdqa_atomic(vendor_id: [u32; 3], family: u32) -> bool {
// VMOVDQA is atomic on Intel, AMD, and Zhaoxin CPUs with AVX.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
vendor_id == _VENDOR_ID_INTEL
|| vendor_id == _VENDOR_ID_INTEL2
|| vendor_id == _VENDOR_ID_AMD
|| vendor_id == _VENDOR_ID_ZHAOXIN
|| vendor_id == _VENDOR_ID_CENTAUR && family > 6
}
#[cold]
fn _detect(info: &mut CpuInfo) {
let CpuidResult {
#[cfg(target_feature = "sse")]
eax: proc_info_eax,
ecx: proc_info_ecx,
..
} = __cpuid(1);
// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111
if test(proc_info_ecx, 13) {
info.set(CpuInfoFlag::cmpxchg16b);
}
// We only use VMOVDQA when SSE is enabled. See atomic_load_vmovdqa() in atomic128/x86_64.rs for more.
#[cfg(target_feature = "sse")]
{
use core::arch::x86_64::_xgetbv;
// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224
let cpu_xsave = test(proc_info_ecx, 26);
if cpu_xsave {
let cpu_osxsave = test(proc_info_ecx, 27);
if cpu_osxsave {
// SAFETY: Calling `_xgetbv` is safe because the CPU has `xsave` support
// and OS has set `osxsave`.
let xcr0 = unsafe { _xgetbv(0) };
let os_avx_support = xcr0 & 6 == 6;
if os_avx_support && test(proc_info_ecx, 28) {
let vendor_id = _vendor_id();
let family = (proc_info_eax >> 8) & 0x0F;
if _vendor_has_vmovdqa_atomic(vendor_id, family) {
info.set(CpuInfoFlag::vmovdqa_atomic);
}
}
}
}
}
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
use std::{mem, str};
use super::*;
#[test]
#[cfg_attr(portable_atomic_test_detect_false, ignore = "detection disabled")]
fn test_cpuid() {
assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), detect().cmpxchg16b());
let vendor_id = _vendor_id();
test_helper::eprintln_nocapture!(
"\n vendor_id: {} (ebx: {:x}, edx: {:x}, ecx: {:x})",
str::from_utf8(&unsafe { mem::transmute::<[u32; 3], [u8; 12]>(vendor_id) }).unwrap(),
vendor_id[0],
vendor_id[1],
vendor_id[2],
);
let CpuidResult { eax: proc_info_eax, .. } = __cpuid(1);
let family = (proc_info_eax >> 8) & 0x0F;
if _vendor_has_vmovdqa_atomic(vendor_id, family) {
assert_eq!(std::is_x86_feature_detected!("avx"), detect().vmovdqa_atomic());
} else {
assert!(!detect().vmovdqa_atomic());
}
assert_eq!(
unsafe { mem::transmute::<[u32; 3], [u8; 12]>(_VENDOR_ID_INTEL) },
*b"GenuineIntel"
);
}
}

View File

@@ -0,0 +1,471 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Fallback implementation using global locks.
This implementation uses seqlock for global locks.
This is basically based on global locks in crossbeam-utils's `AtomicCell`,
but seqlock is implemented in a way that does not depend on UB
(see comments in optimistic_read method in atomic! macro for details).
Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
type and the value type must be the same.
*/
#![cfg_attr(
any(
all(
target_arch = "x86_64",
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
),
all(
target_arch = "powerpc64",
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(
all(
target_os = "linux",
any(
all(
target_env = "gnu",
any(target_endian = "little", not(target_feature = "crt-static")),
),
all(
target_env = "musl",
any(not(target_feature = "crt-static"), feature = "std"),
),
target_env = "ohos",
all(target_env = "uclibc", not(target_feature = "crt-static")),
portable_atomic_outline_atomics,
),
),
target_os = "android",
all(
target_os = "freebsd",
any(
target_endian = "little",
not(target_feature = "crt-static"),
portable_atomic_outline_atomics,
),
),
target_os = "openbsd",
all(
target_os = "aix",
not(portable_atomic_pre_llvm_20),
portable_atomic_outline_atomics, // TODO(aix): currently disabled by default
),
),
not(any(miri, portable_atomic_sanitize_thread)),
),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
),
all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
),
all(
target_arch = "arm",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_os = "linux", target_os = "android"),
not(portable_atomic_no_outline_atomics),
),
),
allow(dead_code)
)]
#[macro_use]
pub(crate) mod utils;
// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
// around.
//
// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
// counter will not be increased that fast.
//
// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
// so use it to implement normal sequence lock.
cfg_has_fast_atomic_64! {
mod seq_lock;
}
cfg_no_fast_atomic_64! {
#[path = "seq_lock_wide.rs"]
mod seq_lock;
}
use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
use self::{
seq_lock::{SeqLock, SeqLockWriteGuard},
utils::CachePadded,
};
#[cfg(portable_atomic_no_strict_provenance)]
use crate::utils::ptr::PtrExt as _;
// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
// so use it to reduce chunks of byte-wise atomic memcpy.
use self::seq_lock::{AtomicChunk, Chunk};
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
#[inline]
#[must_use]
fn lock(addr: usize) -> &'static SeqLock {
// The number of locks is a prime number because we want to make sure `addr % LEN` gets
// dispersed across all locks.
//
// crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
// so the actual concurrency level will be smaller.
const LEN: usize = 67;
const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
static LOCKS: [CachePadded<SeqLock>; LEN] = [
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L,
];
// If the modulus is a constant number, the compiler will use crazy math to transform this into
// a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
&LOCKS[addr % LEN]
}
macro_rules! atomic {
($atomic_type:ident, $int_type:ident, $align:literal) => {
#[repr(C, align($align))]
pub(crate) struct $atomic_type {
v: UnsafeCell<$int_type>,
}
impl $atomic_type {
const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
#[inline]
unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
static_assert!($atomic_type::LEN > 1);
static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
// SAFETY: the caller must uphold the safety contract for `chunks`.
unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
}
#[inline]
fn optimistic_read(&self) -> $int_type {
// Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
// SAFETY:
// - There are no threads that perform non-atomic concurrent write operations.
// - There is no writer that updates the value using atomic operations of different granularity.
//
// If the atomic operation is not used here, it will cause a data race
// when `write` performs concurrent write operation.
// Such a data race is sometimes considered virtually unproblematic
// in SeqLock implementations:
//
// - https://github.com/Amanieu/seqlock/issues/2
// - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
// - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
//
// However, in our use case, the implementation that loads/stores value as
// chunks of usize is enough fast and sound, so we use that implementation.
//
// See also atomic-memcpy crate, a generic implementation of this pattern:
// https://github.com/taiki-e/atomic-memcpy
let chunks = unsafe { self.chunks() };
for i in 0..Self::LEN {
dst[i] = chunks[i].load(Ordering::Relaxed);
}
// SAFETY: integers are plain old data types so we can always transmute to them.
unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
}
#[inline]
fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
// This calls optimistic_read that can return teared value, but the resulting value
// is guaranteed not to be teared because we hold the lock to write.
self.optimistic_read()
}
#[inline]
fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
// SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
// SAFETY:
// - The guard guarantees that we hold the lock to write.
// - There are no threads that perform non-atomic concurrent read or write operations.
//
// See optimistic_read for the reason that atomic operations are used here.
let chunks = unsafe { self.chunks() };
for i in 0..Self::LEN {
chunks[i].store(val[i], Ordering::Relaxed);
}
}
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by the lock and atomic operation.
unsafe impl Sync for $atomic_type {}
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false;
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
let lock = lock(self.v.get().addr());
// Try doing an optimistic read first.
if let Some(stamp) = lock.optimistic_read() {
let val = self.optimistic_read();
if lock.validate_read(stamp) {
return val;
}
}
// Grab a regular write lock so that writers don't starve this load.
let guard = lock.write();
let val = self.read(&guard);
// The value hasn't been changed. Drop the guard without incrementing the stamp.
guard.abort();
val
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let guard = lock(self.v.get().addr()).write();
self.write(val, &guard)
}
#[inline]
pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(val, &guard);
prev
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
if prev == current {
self.write(new, &guard);
Ok(prev)
} else {
// The value hasn't been changed. Drop the guard without incrementing the stamp.
guard.abort();
Err(prev)
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev.wrapping_add(val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev.wrapping_sub(val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev & val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(!(prev & val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev | val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev ^ val, &guard);
prev
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(core::cmp::max(prev, val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(core::cmp::min(prev, val), &guard);
prev
}
#[inline]
pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(!prev, &guard);
prev
}
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
let guard = lock(self.v.get().addr()).write();
let prev = self.read(&guard);
self.write(prev.wrapping_neg(), &guard);
prev
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
self.v.get()
}
}
};
}
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(any(
test,
not(any(
not(portable_atomic_no_atomic_64),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
))
))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
test,
not(any(
target_has_atomic = "64",
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
))
))
)]
cfg_no_fast_atomic_64! {
atomic!(AtomicI64, i64, 8);
atomic!(AtomicU64, u64, 8);
}
atomic!(AtomicI128, i128, 16);
atomic!(AtomicU128, u128, 16);
#[cfg(test)]
mod tests {
use super::*;
cfg_no_fast_atomic_64! {
test_atomic_int!(i64);
test_atomic_int!(u64);
}
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
cfg_no_fast_atomic_64! {
stress_test!(u64);
}
stress_test!(u128);
}

View File

@@ -0,0 +1,184 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Helper for outline-atomics.
On architectures where DW atomics are not supported on older CPUs, we use
fallback implementation when DW atomic instructions are not supported and
outline-atomics is enabled.
This module provides helpers to implement them.
*/
use core::sync::atomic::Ordering;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "riscv64"))]
pub(crate) type Udw = u128;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "riscv64"))]
pub(crate) type AtomicUdw = super::super::super::fallback::AtomicU128;
#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "riscv64"))]
pub(crate) type AtomicIdw = super::super::super::fallback::AtomicI128;
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
pub(crate) type Udw = u64;
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
pub(crate) type AtomicUdw = super::super::super::fallback::AtomicU64;
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
pub(crate) type AtomicIdw = super::super::super::fallback::AtomicI64;
// Asserts that the function is called in the correct context.
macro_rules! debug_assert_outline_atomics {
() => {
#[cfg(target_arch = "x86_64")]
{
debug_assert!(!super::detect::detect().cmpxchg16b());
}
#[cfg(target_arch = "powerpc64")]
{
debug_assert!(!super::detect::detect().quadword_atomics());
}
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
{
debug_assert!(!super::detect::detect().zacas());
}
#[cfg(target_arch = "arm")]
{
debug_assert!(!super::has_kuser_cmpxchg64());
}
};
}
#[cold]
pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(src as *const AtomicUdw)).load(order)
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(src: *mut Udw) -> Udw;
// fallback's atomic load has at least acquire semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
atomic_load_non_seqcst = atomic_load(Ordering::Acquire);
atomic_load_seqcst = atomic_load(Ordering::SeqCst);
}
#[cfg(not(any(target_arch = "arm", target_arch = "riscv32", target_arch = "riscv64")))]
#[cold]
pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const AtomicUdw)).store(val, order);
}
}
#[cfg(not(any(target_arch = "arm", target_arch = "riscv32", target_arch = "riscv64")))]
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, val: Udw);
// fallback's atomic store has at least release semantics.
atomic_store_non_seqcst = atomic_store(Ordering::Release);
atomic_store_seqcst = atomic_store(Ordering::SeqCst);
}
#[cold]
pub(crate) unsafe fn atomic_compare_exchange(
dst: *mut Udw,
old: Udw,
new: Udw,
success: Ordering,
failure: Ordering,
) -> (Udw, bool) {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) {
Ok(v) => (v, true),
Err(v) => (v, false),
}
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool);
// fallback's atomic CAS has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
atomic_compare_exchange_non_seqcst
= atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire);
atomic_compare_exchange_seqcst
= atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst);
}
macro_rules! atomic_rmw_3 {
(
$name:ident($atomic_type:ident::$method_name:ident),
$non_seqcst_alias:ident, $seqcst_alias:ident
) => {
#[cold]
pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(
clippy::as_underscore,
clippy::cast_possible_wrap,
clippy::cast_ptr_alignment,
clippy::cast_sign_loss
)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw;
// fallback's atomic RMW has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
$non_seqcst_alias = $name(Ordering::AcqRel);
$seqcst_alias = $name(Ordering::SeqCst);
}
};
}
macro_rules! atomic_rmw_2 {
(
$name:ident($atomic_type:ident::$method_name:ident),
$non_seqcst_alias:ident, $seqcst_alias:ident
) => {
#[cold]
pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw {
debug_assert_outline_atomics!();
#[allow(clippy::cast_ptr_alignment)]
// SAFETY: the caller must uphold the safety contract.
unsafe {
(*(dst as *const $atomic_type)).$method_name(order) as Udw
}
}
fn_alias! {
#[cold]
pub(crate) unsafe fn(dst: *mut Udw) -> Udw;
// fallback's atomic RMW has at least AcqRel semantics.
#[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
$non_seqcst_alias = $name(Ordering::AcqRel);
$seqcst_alias = $name(Ordering::SeqCst);
}
};
}
atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst);
atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst);
atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst);
atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst);
atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst);
atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst);
atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst);
atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst);
atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst);
atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst);
atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst);
atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst);
atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst);

View File

@@ -0,0 +1,143 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs.
use core::{
mem::ManuallyDrop,
sync::atomic::{self, Ordering},
};
use super::utils::Backoff;
// See mod.rs for details.
#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp;
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp;
#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
pub(super) type Stamp = usize;
#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
pub(super) type Stamp = u64;
// See mod.rs for details.
pub(super) type AtomicChunk = AtomicStamp;
pub(super) type Chunk = Stamp;
/// A simple stamped lock.
pub(super) struct SeqLock {
/// The current state of the lock.
///
/// All bits except the least significant one hold the current stamp. When locked, the state
/// equals 1 and doesn't contain a valid stamp.
state: AtomicStamp,
}
impl SeqLock {
#[inline]
pub(super) const fn new() -> Self {
Self { state: AtomicStamp::new(0) }
}
/// If not locked, returns the current stamp.
///
/// This method should be called before optimistic reads.
#[inline]
pub(super) fn optimistic_read(&self) -> Option<Stamp> {
let state = self.state.load(Ordering::Acquire);
if state == 1 { None } else { Some(state) }
}
/// Returns `true` if the current stamp is equal to `stamp`.
///
/// This method should be called after optimistic reads to check whether they are valid. The
/// argument `stamp` should correspond to the one returned by method `optimistic_read`.
#[inline]
pub(super) fn validate_read(&self, stamp: Stamp) -> bool {
atomic::fence(Ordering::Acquire);
self.state.load(Ordering::Relaxed) == stamp
}
/// Grabs the lock for writing.
#[inline]
pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
let mut backoff = Backoff::new();
loop {
let previous = self.state.swap(1, Ordering::Acquire);
if previous != 1 {
atomic::fence(Ordering::Release);
return SeqLockWriteGuard { lock: self, state: previous };
}
while self.state.load(Ordering::Relaxed) == 1 {
backoff.snooze();
}
}
}
}
/// An RAII guard that releases the lock and increments the stamp when dropped.
#[must_use]
pub(super) struct SeqLockWriteGuard<'a> {
/// The parent lock.
lock: &'a SeqLock,
/// The stamp before locking.
state: Stamp,
}
impl SeqLockWriteGuard<'_> {
/// Releases the lock without incrementing the stamp.
#[inline]
pub(super) fn abort(self) {
// We specifically don't want to call drop(), since that's
// what increments the stamp.
let this = ManuallyDrop::new(self);
// Restore the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
this.lock.state.store(this.state, Ordering::Release);
}
}
impl Drop for SeqLockWriteGuard<'_> {
#[inline]
fn drop(&mut self) {
// Release the lock and increment the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
self.lock.state.store(self.state.wrapping_add(2), Ordering::Release);
}
}
#[cfg(test)]
mod tests {
use super::SeqLock;
#[test]
fn smoke() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
assert!(lock.validate_read(before));
{
let _guard = lock.write();
}
assert!(!lock.validate_read(before));
let after = lock.optimistic_read().unwrap();
assert_ne!(before, after);
}
#[test]
fn test_abort() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
{
let guard = lock.write();
guard.abort();
}
let after = lock.optimistic_read().unwrap();
assert_eq!(before, after, "aborted write does not update the stamp");
}
}

View File

@@ -0,0 +1,176 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs.
use core::{
mem::ManuallyDrop,
sync::atomic::{self, AtomicUsize, Ordering},
};
use super::utils::Backoff;
// See mod.rs for details.
pub(super) type AtomicChunk = AtomicUsize;
pub(super) type Chunk = usize;
/// A simple stamped lock.
///
/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low
/// bits.
pub(super) struct SeqLock {
/// The high bits of the current state of the lock.
state_hi: AtomicUsize,
/// The low bits of the current state of the lock.
///
/// All bits except the least significant one hold the current stamp. When locked, the state_lo
/// equals 1 and doesn't contain a valid stamp.
state_lo: AtomicUsize,
}
impl SeqLock {
#[inline]
pub(super) const fn new() -> Self {
Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) }
}
/// If not locked, returns the current stamp.
///
/// This method should be called before optimistic reads.
#[inline]
pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> {
// The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in
// `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`.
//
// As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1`
// happens before now; and therefore, (2) if `state_lo` is even, all writes within the
// critical section of (`state_hi`, `state_lo`) happens before now.
let state_hi = self.state_hi.load(Ordering::Acquire);
let state_lo = self.state_lo.load(Ordering::Acquire);
if state_lo == 1 { None } else { Some((state_hi, state_lo)) }
}
/// Returns `true` if the current stamp is equal to `stamp`.
///
/// This method should be called after optimistic reads to check whether they are valid. The
/// argument `stamp` should correspond to the one returned by method `optimistic_read`.
#[inline]
pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool {
// Thanks to the fence, if we're noticing any modification to the data at the critical
// section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be
// visible.
atomic::fence(Ordering::Acquire);
// So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification
// to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped
// around.
//
// If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`.
let state_lo = self.state_lo.load(Ordering::Acquire);
// If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped
// around, which we give up to correctly validate the read.
let state_hi = self.state_hi.load(Ordering::Relaxed);
// Except for the case that both `state_hi` and `state_lo` wrapped around, the following
// condition implies that we're noticing no modification to the data after the critical
// section of `(stamp.0, stamp.1)`.
(state_hi, state_lo) == stamp
}
/// Grabs the lock for writing.
#[inline]
pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
let mut backoff = Backoff::new();
loop {
let previous = self.state_lo.swap(1, Ordering::Acquire);
if previous != 1 {
// To synchronize with the acquire fence in `validate_read` via any modification to
// the data at the critical section of `(state_hi, previous)`.
atomic::fence(Ordering::Release);
return SeqLockWriteGuard { lock: self, state_lo: previous };
}
while self.state_lo.load(Ordering::Relaxed) == 1 {
backoff.snooze();
}
}
}
}
/// An RAII guard that releases the lock and increments the stamp when dropped.
#[must_use]
pub(super) struct SeqLockWriteGuard<'a> {
/// The parent lock.
lock: &'a SeqLock,
/// The stamp before locking.
state_lo: usize,
}
impl SeqLockWriteGuard<'_> {
/// Releases the lock without incrementing the stamp.
#[inline]
pub(super) fn abort(self) {
// We specifically don't want to call drop(), since that's
// what increments the stamp.
let this = ManuallyDrop::new(self);
// Restore the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
this.lock.state_lo.store(this.state_lo, Ordering::Release);
}
}
impl Drop for SeqLockWriteGuard<'_> {
#[inline]
fn drop(&mut self) {
let state_lo = self.state_lo.wrapping_add(2);
// Increase the high bits if the low bits wrap around.
//
// Release ordering for synchronizing with `optimistic_read`.
if state_lo == 0 {
let state_hi = self.lock.state_hi.load(Ordering::Relaxed);
self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release);
}
// Release the lock and increment the stamp.
//
// Release ordering for synchronizing with `optimistic_read`.
self.lock.state_lo.store(state_lo, Ordering::Release);
}
}
#[cfg(test)]
mod tests {
use super::SeqLock;
#[test]
fn smoke() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
assert!(lock.validate_read(before));
{
let _guard = lock.write();
}
assert!(!lock.validate_read(before));
let after = lock.optimistic_read().unwrap();
assert_ne!(before, after);
}
#[test]
fn test_abort() {
let lock = SeqLock::new();
let before = lock.optimistic_read().unwrap();
{
let guard = lock.write();
guard.abort();
}
let after = lock.optimistic_read().unwrap();
assert_eq!(before, after, "aborted write does not update the stamp");
}
}

View File

@@ -0,0 +1,147 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
use core::ops;
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.21/crossbeam-utils/src/cache_padded.rs.
/// Pads and aligns a value to the length of a cache line.
// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache
// lines at a time, so we have to align to 128 bytes rather than 64.
//
// Sources:
// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
//
// aarch64/arm64ec's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size.
//
// Sources:
// - https://www.mono-project.com/news/2016/09/12/arm64-icache/
//
// powerpc64 has 128-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26
#[cfg_attr(
any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
),
repr(align(128))
)]
// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12
#[cfg_attr(
any(
target_arch = "arm",
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "sparc",
target_arch = "hexagon",
),
repr(align(32))
)]
// m68k has 16-byte cache line size.
//
// Sources:
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9
#[cfg_attr(target_arch = "m68k", repr(align(16)))]
// s390x has 256-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13
#[cfg_attr(target_arch = "s390x", repr(align(256)))]
// x86, wasm, riscv, and sparc64 have 64-byte cache line size.
//
// Sources:
// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19
//
// All others are assumed to have 64-byte cache line size.
#[cfg_attr(
not(any(
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "arm",
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "sparc",
target_arch = "hexagon",
target_arch = "m68k",
target_arch = "s390x",
)),
repr(align(64))
)]
pub(crate) struct CachePadded<T> {
value: T,
}
impl<T> CachePadded<T> {
#[inline]
pub(crate) const fn new(value: T) -> Self {
Self { value }
}
}
impl<T> ops::Deref for CachePadded<T> {
type Target = T;
#[inline]
fn deref(&self) -> &Self::Target {
&self.value
}
}
// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs.
// Adjusted to reduce spinning.
/// Performs exponential backoff in spin loops.
pub(crate) struct Backoff {
step: u32,
}
// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48
const SPIN_LIMIT: u32 = 4;
impl Backoff {
#[inline]
pub(crate) const fn new() -> Self {
Self { step: 0 }
}
#[inline]
pub(crate) fn snooze(&mut self) {
if self.step <= SPIN_LIMIT {
for _ in 0..1 << self.step {
#[allow(deprecated)]
core::sync::atomic::spin_loop_hint();
}
self.step += 1;
} else {
#[cfg(not(feature = "std"))]
for _ in 0..1 << self.step {
#[allow(deprecated)]
core::sync::atomic::spin_loop_hint();
}
#[cfg(feature = "std")]
std::thread::yield_now();
}
}
}

View File

@@ -0,0 +1,266 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic float implementation based on AArch64 with FEAT_LSFE.
This module provides atomic float implementations using FEAT_LSFE instructions.
Generated asm:
- aarch64 (+lsfe) https://godbolt.org/z/7vaxeofv1
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::sync::atomic::Ordering;
#[cfg(portable_atomic_unstable_f16)]
use super::int::AtomicF16;
#[cfg(portable_atomic_unstable_f128)]
use super::int::AtomicF128;
use super::int::{AtomicF32, AtomicF64};
// TODO: optimize no return cases:
// https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/STFADD--STFADDL--Floating-point-atomic-add-in-memory--without-return-
// https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/STFMAXNM--STFMAXNML--Floating-point-atomic-maximum-number-in-memory--without-return-
// https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/STFMINNM--STFMINNML--Floating-point-atomic-minimum-number-in-memory--without-return-
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! start_lsfe {
() => {
".arch_extension lsfe"
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! atomic_rmw {
($op:ident, $order:ident) => {
atomic_rmw!($op, $order, write = $order)
};
($op:ident, $order:ident, write = $write:ident) => {
match $order {
Ordering::Relaxed => $op!("", "", ""),
Ordering::Acquire => $op!("a", "", ""),
Ordering::Release => $op!("", "l", ""),
Ordering::AcqRel => $op!("a", "l", ""),
// In MSVC environments, SeqCst stores/writes needs fences after writes.
// https://reviews.llvm.org/D141748
#[cfg(target_env = "msvc")]
Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
// AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
Ordering::SeqCst => $op!("a", "l", ""),
_ => unreachable!(),
}
};
}
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! atomic_rmw_inst {
($op:ident, $order:ident) => {
atomic_rmw_inst!($op, $order, write = $order)
};
($op:ident, $order:ident, write = $write:ident) => {
match $order {
Ordering::Relaxed => $op!("2", ""), // ""
Ordering::Acquire => $op!("a", ""), // "a"
Ordering::Release => $op!("6", ""), // "l"
Ordering::AcqRel => $op!("e", ""), // "al"
// In MSVC environments, SeqCst stores/writes needs fences after writes.
// https://reviews.llvm.org/D141748
#[cfg(target_env = "msvc")]
Ordering::SeqCst if $write == Ordering::SeqCst => $op!("e", "dmb ish"),
// AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
Ordering::SeqCst => $op!("e", ""),
_ => unreachable!(),
}
};
}
macro_rules! atomic_float {
($atomic_type:ident, $float_type:ident, $modifier:tt, $inst_modifier:tt) => {
impl $atomic_type {
#[inline]
pub(crate) fn fetch_add(&self, val: $float_type, order: Ordering) -> $float_type {
let dst = self.as_ptr();
let out;
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
//
// Refs: https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/LDFADD--LDFADDA--LDFADDAL--LDFADDL--Floating-point-atomic-add-in-memory-
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! add {
($acquire:tt, $release:tt, $fence:tt) => {
asm!(
start_lsfe!(),
concat!("ldfadd", $acquire, $release, " {out:", $modifier, "}, {val:", $modifier, "}, [{dst}]"),
$fence,
dst = in(reg) ptr_reg!(dst),
val = in(vreg) val,
out = lateout(vreg) out,
options(nostack),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw!(add, order);
// LLVM supports FEAT_LSFE instructions on LLVM 20+, so use .inst directive on old LLVM.
// https://github.com/llvm/llvm-project/commit/67ff5ba9af9754261abe11d762af11532a816126
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! add {
($order:tt, $fence:tt) => {
asm!(
// ldfadd{,a,l,al} {h,s,d}0, {h,s,d}1, [x2]
concat!(".inst 0x", $inst_modifier, "c", $order, "00041"),
$fence,
in("x2") ptr_reg!(dst),
in("v1") val,
out("v0") out,
options(nostack),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_inst!(add, order);
}
out
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $float_type, order: Ordering) -> $float_type {
// There is no atomic sub instruction, so add `-val`.
self.fetch_add(-val, order)
}
#[inline]
pub(crate) fn fetch_max(&self, val: $float_type, order: Ordering) -> $float_type {
let dst = self.as_ptr();
let out;
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
//
// Refs: https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/LDFMAXNM--LDFMAXNMA--LDFMAXNMAL--LDFMAXNML--Floating-point-atomic-maximum-number-in-memory-
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! max {
($acquire:tt, $release:tt, $fence:tt) => {
asm!(
start_lsfe!(),
concat!("ldfmaxnm", $acquire, $release, " {out:", $modifier, "}, {val:", $modifier, "}, [{dst}]"),
$fence,
dst = in(reg) ptr_reg!(dst),
val = in(vreg) val,
out = lateout(vreg) out,
options(nostack),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw!(max, order);
// LLVM supports FEAT_LSFE instructions on LLVM 20+, so use .inst directive on old LLVM.
// https://github.com/llvm/llvm-project/commit/67ff5ba9af9754261abe11d762af11532a816126
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! max {
($order:tt, $fence:tt) => {
asm!(
// ldfmaxnm{,a,l,al} {h,s,d}0, {h,s,d}1, [x2]
concat!(".inst 0x", $inst_modifier, "c", $order, "06041"),
$fence,
in("x2") ptr_reg!(dst),
in("v1") val,
out("v0") out,
options(nostack),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_inst!(max, order);
}
out
}
#[inline]
pub(crate) fn fetch_min(&self, val: $float_type, order: Ordering) -> $float_type {
let dst = self.as_ptr();
let out;
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
//
// Refs: https://developer.arm.com/documentation/ddi0602/2024-12/SIMD-FP-Instructions/LDFMINNM--LDFMINNMA--LDFMINNMAL--LDFMINNML--Floating-point-atomic-minimum-number-in-memory-
unsafe {
#[cfg(not(portable_atomic_pre_llvm_20))]
macro_rules! min {
($acquire:tt, $release:tt, $fence:tt) => {
asm!(
start_lsfe!(),
concat!("ldfminnm", $acquire, $release, " {out:", $modifier, "}, {val:", $modifier, "}, [{dst}]"),
$fence,
dst = in(reg) ptr_reg!(dst),
val = in(vreg) val,
out = lateout(vreg) out,
options(nostack),
)
};
}
#[cfg(not(portable_atomic_pre_llvm_20))]
atomic_rmw!(min, order);
// LLVM supports FEAT_LSFE instructions on LLVM 20+, so use .inst directive on old LLVM.
// https://github.com/llvm/llvm-project/commit/67ff5ba9af9754261abe11d762af11532a816126
#[cfg(portable_atomic_pre_llvm_20)]
macro_rules! min {
($order:tt, $fence:tt) => {
asm!(
// ldfminnm{,a,l,al} {h,s,d}0, {h,s,d}1, [x2]
concat!(".inst 0x", $inst_modifier, "c", $order, "07041"),
$fence,
in("x2") ptr_reg!(dst),
in("v1") val,
out("v0") out,
options(nostack),
)
};
}
#[cfg(portable_atomic_pre_llvm_20)]
atomic_rmw_inst!(min, order);
}
out
}
}
};
}
#[cfg(portable_atomic_unstable_f16)]
atomic_float!(AtomicF16, f16, "h", "7");
atomic_float!(AtomicF32, f32, "s", "b");
atomic_float!(AtomicF64, f64, "d", "f");
#[cfg(portable_atomic_unstable_f128)]
impl AtomicF128 {
#[inline]
pub(crate) fn fetch_add(&self, val: f128, order: Ordering) -> f128 {
self.fetch_update_(order, |x| x + val)
}
#[inline]
pub(crate) fn fetch_sub(&self, val: f128, order: Ordering) -> f128 {
self.fetch_update_(order, |x| x - val)
}
#[inline]
pub(super) fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> f128
where
F: FnMut(f128) -> f128,
{
// This is a private function and all instances of `f` only operate on the value
// loaded, so there is no need to synchronize the first load/failed CAS.
let mut prev = self.load(Ordering::Relaxed);
loop {
let next = f(prev);
match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(next_prev) => prev = next_prev,
}
}
}
#[inline]
pub(crate) fn fetch_max(&self, val: f128, order: Ordering) -> f128 {
self.fetch_update_(order, |x| x.max(val))
}
#[inline]
pub(crate) fn fetch_min(&self, val: f128, order: Ordering) -> f128 {
self.fetch_update_(order, |x| x.min(val))
}
}

View File

@@ -0,0 +1,251 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic float implementation based on atomic integer.
This module provides atomic float implementations using atomic integer.
Note that most of `fetch_*` operations of atomic floats are implemented using
CAS loops, which can be slower than equivalent operations of atomic integers.
AArch64 with FEAT_LSFE and GPU targets have atomic instructions for float.
See aarch64.rs for AArch64 with FEAT_LSFE.
GPU targets will also use architecture-specific implementations instead of this implementation in the
future: https://github.com/taiki-e/portable-atomic/issues/34 / https://github.com/taiki-e/portable-atomic/pull/45
*/
// TODO: fetch_{minimum,maximum}* https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p3008r2.html / https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
#![cfg_attr(
all(target_pointer_width = "16", not(feature = "fallback")),
allow(unused_imports, unused_macros)
)]
use core::{cell::UnsafeCell, sync::atomic::Ordering};
macro_rules! atomic_float {
(
$atomic_type:ident, $float_type:ident, $atomic_int_type:ident, $int_type:ident,
$align:literal
) => {
#[repr(C, align($align))]
pub(crate) struct $atomic_type {
v: UnsafeCell<$float_type>,
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by atomic operations.
unsafe impl Sync for $atomic_type {}
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $float_type) -> Self {
Self { v: UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
crate::$atomic_int_type::is_lock_free()
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool =
crate::$atomic_int_type::is_always_lock_free();
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn load(&self, order: Ordering) -> $float_type {
$float_type::from_bits(self.as_bits().load(order))
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, val: $float_type, order: Ordering) {
self.as_bits().store(val.to_bits(), order)
}
const_fn! {
const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
#[inline(always)]
pub(crate) const fn as_bits(&self) -> &crate::$atomic_int_type {
// SAFETY: $atomic_type and $atomic_int_type have the same layout,
// and there is no concurrent access to the value that does not go through this method.
unsafe { &*(self as *const Self as *const crate::$atomic_int_type) }
}
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $float_type {
self.v.get()
}
}
cfg_has_atomic_cas_or_amo32! {
impl $atomic_type {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn swap(&self, val: $float_type, order: Ordering) -> $float_type {
$float_type::from_bits(self.as_bits().swap(val.to_bits(), order))
}
cfg_has_atomic_cas! {
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange(
&self,
current: $float_type,
new: $float_type,
success: Ordering,
failure: Ordering,
) -> Result<$float_type, $float_type> {
match self.as_bits().compare_exchange(
current.to_bits(),
new.to_bits(),
success,
failure,
) {
Ok(v) => Ok($float_type::from_bits(v)),
Err(v) => Err($float_type::from_bits(v)),
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn compare_exchange_weak(
&self,
current: $float_type,
new: $float_type,
success: Ordering,
failure: Ordering,
) -> Result<$float_type, $float_type> {
match self.as_bits().compare_exchange_weak(
current.to_bits(),
new.to_bits(),
success,
failure,
) {
Ok(v) => Ok($float_type::from_bits(v)),
Err(v) => Err($float_type::from_bits(v)),
}
}
#[cfg(not(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_add(&self, val: $float_type, order: Ordering) -> $float_type {
self.fetch_update_(order, |x| x + val)
}
#[cfg(not(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_sub(&self, val: $float_type, order: Ordering) -> $float_type {
self.fetch_update_(order, |x| x - val)
}
#[cfg(not(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $float_type
where
F: FnMut($float_type) -> $float_type,
{
// This is a private function and all instances of `f` only operate on the value
// loaded, so there is no need to synchronize the first load/failed CAS.
let mut prev = self.load(Ordering::Relaxed);
loop {
let next = f(prev);
match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(next_prev) => prev = next_prev,
}
}
}
#[cfg(not(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_max(&self, val: $float_type, order: Ordering) -> $float_type {
self.fetch_update_(order, |x| x.max(val))
}
#[cfg(not(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
)))]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_min(&self, val: $float_type, order: Ordering) -> $float_type {
self.fetch_update_(order, |x| x.min(val))
}
} // cfg_has_atomic_cas!
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_neg(&self, order: Ordering) -> $float_type {
const NEG_MASK: $int_type = !0 / 2 + 1;
$float_type::from_bits(self.as_bits().fetch_xor(NEG_MASK, order))
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn fetch_abs(&self, order: Ordering) -> $float_type {
const ABS_MASK: $int_type = !0 / 2;
$float_type::from_bits(self.as_bits().fetch_and(ABS_MASK, order))
}
}
} // cfg_has_atomic_cas_or_amo32!
};
}
#[cfg(portable_atomic_unstable_f16)]
cfg_has_atomic_16! {
atomic_float!(AtomicF16, f16, AtomicU16, u16, 2);
}
cfg_has_atomic_32! {
atomic_float!(AtomicF32, f32, AtomicU32, u32, 4);
}
cfg_has_atomic_64! {
atomic_float!(AtomicF64, f64, AtomicU64, u64, 8);
}
#[cfg(portable_atomic_unstable_f128)]
cfg_has_atomic_128! {
atomic_float!(AtomicF128, f128, AtomicU128, u128, 16);
}

View File

@@ -0,0 +1,33 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic float implementations
*/
#![allow(clippy::float_arithmetic)]
mod int;
#[cfg(all(
any(target_arch = "aarch64", target_arch = "arm64ec"),
any(target_feature = "lsfe", portable_atomic_target_feature = "lsfe"),
target_feature = "neon", // for vreg
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
))]
mod aarch64;
#[cfg(portable_atomic_unstable_f16)]
cfg_has_atomic_16! {
pub(crate) use self::int::AtomicF16;
}
cfg_has_atomic_32! {
pub(crate) use self::int::AtomicF32;
}
cfg_has_atomic_64! {
pub(crate) use self::int::AtomicF64;
}
#[cfg(portable_atomic_unstable_f128)]
cfg_has_atomic_128! {
pub(crate) use self::int::AtomicF128;
}

View File

@@ -0,0 +1,36 @@
# Implementation of disabling interrupts
This module is used to provide atomic CAS for targets where atomic CAS is not available in the standard library.
- On MSP430 and AVR, they are always single-core and has no unprivileged mode, so this module is always used.
- On Armv6-M (thumbv6m), pre-v6 Arm (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa, they could be multi-core, so this module is used when the `unsafe-assume-single-core` feature (or `portable_atomic_unsafe_assume_single_core` cfg) is enabled.
The `unsafe-assume-single-core` implementation uses privileged instructions to disable interrupts, so it usually doesn't work on unprivileged mode.
Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent.
Consider using the [`critical-section` feature](../../../README.md#optional-features-critical-section) for systems that cannot use the `unsafe-assume-single-core` feature (or `portable_atomic_unsafe_assume_single_core` cfg).
For some targets, the implementation can be changed by explicitly enabling features.
- On Armv6-M, this disables interrupts by modifying the PRIMASK register.
- On pre-v6 Arm, this disables interrupts by modifying the I (IRQ mask) bit of the CPSR.
- On pre-v6 Arm with the `disable-fiq` feature (or `portable_atomic_disable_fiq` cfg), this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR.
- On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register.
- On RISC-V (without A-extension) with the `s-mode` feature (or `portable_atomic_s_mode` cfg), this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register.
- On RISC-V (without A-extension) with the `zaamo` target feature (or `force-amo` feature or `portable_atomic_force_amo` cfg), this uses AMO instructions for RMWs that have corresponding AMO instructions even if A-extension is disabled. For other RMWs, this disables interrupts as usual.
- On MSP430, this disables interrupts by modifying the GIE (Global Interrupt Enable) bit of the status register (SR).
- On AVR, this disables interrupts by modifying the I (Global Interrupt Enable) bit of the status register (SREG).
- On Xtensa, this disables interrupts by modifying the PS special register.
Some operations don't require disabling interrupts:
- On architectures except for AVR: loads and stores with pointer size or smaller
- On AVR: 8-bit loads and stores
- On MSP430 additionally: {8,16}-bit `add,sub,and,or,xor,not`
- On RISC-V with the `zaamo` target feature (or `portable_atomic_target_feature="zaamo"` cfg or `force-amo` feature or `portable_atomic_force_amo` cfg) additionally: 32-bit(RV32)/{32,64}-bit(RV64) `swap,fetch_{add,sub,and,or,xor,not,max,min},add,sub,and,or,xor,not`, {8,16}-bit `fetch_{and,or,xor,not},and,or,xor,not`[^1], and all operations of `AtomicBool`
However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations.
Feel free to submit an issue if your target is not supported yet.
[^1]: With the `zabha` target feature, {8,16}-bit `swap,fetch_{add,sub,max,min},add,sub` too.

View File

@@ -0,0 +1,151 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs-
Generated asm:
- armv5te https://godbolt.org/z/fhaW3d9Kv
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
// - 0x80 - I (IRQ mask) bit (1 << 7)
// - 0x40 - F (FIQ mask) bit (1 << 6)
// We disable only IRQs by default. See also https://github.com/taiki-e/portable-atomic/pull/28#issuecomment-1214146912.
#[cfg(not(portable_atomic_disable_fiq))]
macro_rules! mask {
() => {
"0x80"
};
}
#[cfg(portable_atomic_disable_fiq)]
macro_rules! mask {
() => {
"0xC0" // 0x80 | 0x40
};
}
pub(super) type State = u32;
/// Disables interrupts and returns the previous interrupt state.
#[inline]
#[instruction_set(arm::a32)]
pub(super) fn disable() -> State {
let cpsr: State;
// SAFETY: reading CPSR and disabling interrupts are safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
asm!(
"mrs {prev}, cpsr", // prev = CPSR
concat!("orr {new}, {prev}, ", mask!()), // new = prev | mask
"msr cpsr_c, {new}", // CPSR.{I,F,T,M} = new.{I,F,T,M}
prev = out(reg) cpsr,
new = out(reg) _,
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
options(nostack, preserves_flags),
);
}
cpsr
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline]
#[instruction_set(arm::a32)]
pub(super) unsafe fn restore(prev_cpsr: State) {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
//
// This clobbers the control field mask byte of CPSR. See msp430.rs to safety on this.
// (preserves_flags is fine because we can clobber only the I, F, T, and M bits of CPSR.)
//
// Refs: https://developer.arm.com/documentation/dui0473/m/arm-and-thumb-instructions/msr--general-purpose-register-to-psr-
unsafe {
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
asm!(
"msr cpsr_c, {prev_cpsr}", // CPSR.{I,F,T,M} = prev_cpsr.{I,F,T,M}
prev_cpsr = in(reg) prev_cpsr,
options(nostack, preserves_flags),
);
}
}
// On pre-v6 Arm, we cannot use core::sync::atomic here because they call the
// `__sync_*` builtins for non-relaxed load/store (because pre-v6 Arm doesn't
// have Data Memory Barrier).
//
// Generated asm:
// - armv5te https://godbolt.org/z/deqTqPzqz
pub(crate) mod atomic {
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::{cell::UnsafeCell, sync::atomic::Ordering};
macro_rules! atomic {
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty $(as $cast:ty)?, $suffix:tt) => {
#[repr(transparent)]
pub(crate) struct $atomic_type $(<$($generics)*>)? {
v: UnsafeCell<$value_type>,
}
// Send is implicitly implemented for atomic integers, but not for atomic pointers.
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
pub(crate) fn load(&self, _order: Ordering) -> $value_type {
let src = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
let out $(: $cast)?;
// inline asm without nomem/readonly implies compiler fence.
// And compiler fence is fine because the user explicitly declares that
// the system is single-core by using an unsafe cfg.
asm!(
concat!("ldr", $suffix, " {out}, [{src}]"), // atomic { out = *src }
src = in(reg) src,
out = lateout(reg) out,
options(nostack, preserves_flags),
);
out $(as $cast as $value_type)?
}
}
#[inline]
pub(crate) fn store(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
// inline asm without nomem/readonly implies compiler fence.
// And compiler fence is fine because the user explicitly declares that
// the system is single-core by using an unsafe cfg.
asm!(
concat!("str", $suffix, " {val}, [{dst}]"), // atomic { *dst = val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
options(nostack, preserves_flags),
);
}
}
}
};
}
atomic!(AtomicI8, i8, "b");
atomic!(AtomicU8, u8, "b");
atomic!(AtomicI16, i16, "h");
atomic!(AtomicU16, u16, "h");
atomic!(AtomicI32, i32, "");
atomic!(AtomicU32, u32, "");
atomic!(AtomicIsize, isize, "");
atomic!(AtomicUsize, usize, "");
atomic!([T] AtomicPtr, *mut T as *mut u8, "");
}

View File

@@ -0,0 +1,56 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Adapted from https://github.com/rust-embedded/cortex-m.
Refs: https://developer.arm.com/documentation/ddi0419/c/System-Level-Architecture/System-Level-Programmers--Model/Registers/The-special-purpose-mask-register--PRIMASK
Generated asm:
- armv6-m https://godbolt.org/z/1sqKnsY6n
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
pub(super) use core::sync::atomic;
pub(super) type State = u32;
/// Disables interrupts and returns the previous interrupt state.
#[inline(always)]
pub(super) fn disable() -> State {
let primask: State;
// SAFETY: reading the priority mask register and disabling interrupts are safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
asm!(
"mrs {primask}, PRIMASK", // primask = PRIMASK
"cpsid i", // PRIMASK.PM = 1
primask = out(reg) primask,
options(nostack, preserves_flags),
);
}
primask
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline(always)]
pub(super) unsafe fn restore(prev_primask: State) {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
// and we've checked that interrupts were enabled before disabling interrupts.
unsafe {
// This clobbers the entire PRIMASK register. See msp430.rs to safety on this.
//
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
asm!(
"msr PRIMASK, {prev_primask}", // PRIMASK = prev_primask
prev_primask = in(reg) prev_primask,
options(nostack, preserves_flags),
);
}
}

View File

@@ -0,0 +1,72 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Adapted from https://github.com/Rahix/avr-device.
See also src/imp/avr.rs.
Refs:
- AVR® Instruction Set Manual, Rev. DS40002198B
https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf
Generated asm:
- avr https://godbolt.org/z/W5jxGsToc
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
#[cfg(not(portable_atomic_no_asm))]
pub(super) use super::super::avr as atomic;
pub(super) type State = u8;
/// Disables interrupts and returns the previous interrupt state.
#[inline(always)]
pub(super) fn disable() -> State {
let sreg: State;
// SAFETY: reading the status register (SREG) and disabling interrupts are safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
// Do not use `preserves_flags` because CLI modifies the I bit of the status register (SREG).
// Refs: https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf#page=58
#[cfg(not(portable_atomic_no_asm))]
asm!(
"in {sreg}, 0x3F", // sreg = SREG
"cli", // SREG.I = 0
sreg = out(reg) sreg,
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
{
llvm_asm!("in $0, 0x3F" : "=r"(sreg) ::: "volatile");
llvm_asm!("cli" ::: "memory" : "volatile");
}
}
sreg
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline(always)]
pub(super) unsafe fn restore(prev_sreg: State) {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
unsafe {
// This clobbers the entire status register. See msp430.rs to safety on this.
//
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
// Do not use `preserves_flags` because OUT modifies the status register (SREG).
#[cfg(not(portable_atomic_no_asm))]
asm!(
"out 0x3F, {prev_sreg}", // SREG = prev_sreg
prev_sreg = in(reg) prev_sreg,
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!("out 0x3F, $0" :: "r"(prev_sreg) : "memory" : "volatile");
}
}

View File

@@ -0,0 +1,944 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Critical section based fallback implementations
This module supports two different critical section implementations:
- Built-in "disable all interrupts".
- Call into the `critical-section` crate (which allows the user to plug any implementation).
The `critical-section`-based fallback is enabled when the user asks for it with the `critical-section`
Cargo feature.
The "disable interrupts" fallback is not sound on multi-core systems.
Also, this uses privileged instructions to disable interrupts, so it usually
doesn't work on unprivileged mode. Using this fallback in an environment where privileged
instructions are not available is also usually considered **unsound**,
although the details are system-dependent.
Therefore, this implementation will only be enabled in one of the following cases:
- When the user explicitly declares that the system is single-core and that
privileged instructions are available using an unsafe cfg.
- When we can safely assume that the system is single-core and that
privileged instructions are available on the system.
AVR, which is single core[^avr1] and LLVM also generates code that disables
interrupts [^avr2] in atomic ops by default, is considered the latter.
MSP430 as well.
See also README.md of this directory.
[^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074
[^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5
*/
// On some platforms, atomic load/store can be implemented in a more efficient
// way than disabling interrupts. On MSP430, some RMWs that do not return the
// previous value can also be optimized.
//
// Note: On single-core systems, it is okay to use critical session-based
// CAS together with atomic load/store. The load/store will not be
// called while interrupts are disabled, and since the load/store is
// atomic, it is not affected by interrupts even if interrupts are enabled.
#[cfg(not(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
)))]
use self::arch::atomic;
#[cfg(not(feature = "critical-section"))]
#[cfg_attr(
all(
target_arch = "arm",
any(target_feature = "mclass", portable_atomic_target_feature = "mclass"),
),
path = "armv6m.rs"
)]
#[cfg_attr(
all(
target_arch = "arm",
not(any(target_feature = "mclass", portable_atomic_target_feature = "mclass")),
),
path = "armv4t.rs"
)]
#[cfg_attr(target_arch = "avr", path = "avr.rs")]
#[cfg_attr(target_arch = "msp430", path = "msp430.rs")]
#[cfg_attr(any(target_arch = "riscv32", target_arch = "riscv64"), path = "riscv.rs")]
#[cfg_attr(target_arch = "xtensa", path = "xtensa.rs")]
mod arch;
use core::{cell::UnsafeCell, ptr, sync::atomic::Ordering};
// Critical section implementations might use locks internally.
#[cfg(feature = "critical-section")]
const IS_ALWAYS_LOCK_FREE: bool = false;
// Consider atomic operations based on disabling interrupts on single-core
// systems are lock-free. (We consider the pre-v6 Arm Linux's atomic operations
// provided in a similar way by the Linux kernel to be lock-free.)
#[cfg(not(feature = "critical-section"))]
const IS_ALWAYS_LOCK_FREE: bool = true;
#[cfg(feature = "critical-section")]
#[inline]
fn with<F, R>(f: F) -> R
where
F: FnOnce() -> R,
{
critical_section::with(|_| f())
}
#[cfg(not(feature = "critical-section"))]
#[inline(always)]
fn with<F, R>(f: F) -> R
where
F: FnOnce() -> R,
{
// Get current interrupt state and disable interrupts
let state = arch::disable();
let r = f();
// Restore interrupt state
// SAFETY: the state was retrieved by the previous `disable`.
unsafe { arch::restore(state) }
r
}
#[cfg_attr(target_pointer_width = "16", repr(C, align(2)))]
#[cfg_attr(target_pointer_width = "32", repr(C, align(4)))]
#[cfg_attr(target_pointer_width = "64", repr(C, align(8)))]
#[cfg_attr(target_pointer_width = "128", repr(C, align(16)))]
pub(crate) struct AtomicPtr<T> {
p: UnsafeCell<*mut T>,
}
// SAFETY: any data races are prevented by disabling interrupts or
// atomic intrinsics (see module-level comments).
unsafe impl<T> Send for AtomicPtr<T> {}
// SAFETY: any data races are prevented by disabling interrupts or
// atomic intrinsics (see module-level comments).
unsafe impl<T> Sync for AtomicPtr<T> {}
impl<T> AtomicPtr<T> {
#[inline]
pub(crate) const fn new(p: *mut T) -> Self {
Self { p: UnsafeCell::new(p) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = IS_ALWAYS_LOCK_FREE;
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> *mut T {
crate::utils::assert_load_ordering(order);
#[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
{
self.as_native().load(order)
}
#[cfg(any(target_arch = "avr", feature = "critical-section"))]
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.p.get().read() })
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, ptr: *mut T, order: Ordering) {
crate::utils::assert_store_ordering(order);
#[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
{
self.as_native().store(ptr, order);
}
#[cfg(any(target_arch = "avr", feature = "critical-section"))]
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.p.get().write(ptr) });
}
#[inline]
pub(crate) fn swap(&self, ptr: *mut T, order: Ordering) -> *mut T {
let _ = order;
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
not(feature = "critical-section"),
any(
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
),
))]
{
self.as_native().swap(ptr, order)
}
#[cfg(not(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
not(feature = "critical-section"),
any(
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
),
)))]
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.p.get().read();
self.p.get().write(ptr);
prev
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: *mut T,
new: *mut T,
success: Ordering,
failure: Ordering,
) -> Result<*mut T, *mut T> {
crate::utils::assert_compare_exchange_ordering(success, failure);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.p.get().read();
if ptr::eq(prev, current) {
self.p.get().write(new);
Ok(prev)
} else {
Err(prev)
}
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: *mut T,
new: *mut T,
success: Ordering,
failure: Ordering,
) -> Result<*mut T, *mut T> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut *mut T {
self.p.get()
}
#[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
#[inline(always)]
fn as_native(&self) -> &atomic::AtomicPtr<T> {
// SAFETY: AtomicPtr and atomic::AtomicPtr have the same layout and
// guarantee atomicity in a compatible way. (see module-level comments)
unsafe { &*(self as *const Self as *const atomic::AtomicPtr<T>) }
}
}
macro_rules! atomic_int {
(base, $atomic_type:ident, $int_type:ident, $align:literal) => {
#[repr(C, align($align))]
pub(crate) struct $atomic_type {
v: UnsafeCell<$int_type>,
}
// Send is implicitly implemented.
// SAFETY: any data races are prevented by disabling interrupts or
// atomic intrinsics (see module-level comments).
unsafe impl Sync for $atomic_type {}
impl $atomic_type {
#[inline]
pub(crate) const fn new(v: $int_type) -> Self {
Self { v: UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = IS_ALWAYS_LOCK_FREE;
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $int_type {
self.v.get()
}
}
};
(load_store_atomic $([$kind:ident])?, $atomic_type:ident, $int_type:ident, $align:literal) => {
atomic_int!(base, $atomic_type, $int_type, $align);
#[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
not(feature = "critical-section"),
any(
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
),
))]
atomic_int!(cas $([$kind])?, $atomic_type, $int_type);
#[cfg(not(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
not(feature = "critical-section"),
any(
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
),
)))]
atomic_int!(cas[emulate], $atomic_type, $int_type);
impl $atomic_type {
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
#[cfg(not(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
)))]
{
self.as_native().load(order)
}
#[cfg(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
))]
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.v.get().read() })
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
#[cfg(not(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
)))]
{
self.as_native().store(val, order);
}
#[cfg(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
))]
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.v.get().write(val) });
}
#[cfg(not(any(
all(target_arch = "avr", portable_atomic_no_asm),
feature = "critical-section",
)))]
#[inline(always)]
fn as_native(&self) -> &atomic::$atomic_type {
// SAFETY: $atomic_type and atomic::$atomic_type have the same layout and
// guarantee atomicity in a compatible way. (see module-level comments)
unsafe { &*(self as *const Self as *const atomic::$atomic_type) }
}
}
#[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))]
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
#[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))]
impl $atomic_type {
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
}
#[cfg(all(target_arch = "msp430", not(feature = "critical-section")))]
impl $atomic_type {
#[inline]
pub(crate) fn add(&self, val: $int_type, order: Ordering) {
self.as_native().add(val, order);
}
#[inline]
pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
self.as_native().sub(val, order);
}
#[inline]
pub(crate) fn and(&self, val: $int_type, order: Ordering) {
self.as_native().and(val, order);
}
#[inline]
pub(crate) fn or(&self, val: $int_type, order: Ordering) {
self.as_native().or(val, order);
}
#[inline]
pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
self.as_native().xor(val, order);
}
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.as_native().not(order);
}
}
};
(all_critical_session, $atomic_type:ident, $int_type:ident, $align:literal) => {
atomic_int!(base, $atomic_type, $int_type, $align);
atomic_int!(cas[emulate], $atomic_type, $int_type);
impl_default_no_fetch_ops!($atomic_type, $int_type);
impl_default_bit_opts!($atomic_type, $int_type);
impl $atomic_type {
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $int_type {
crate::utils::assert_load_ordering(order);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.v.get().read() })
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $int_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe { self.v.get().write(val) });
}
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
}
};
(cas[emulate], $atomic_type:ident, $int_type:ident) => {
impl $atomic_type {
#[inline]
pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(val);
prev
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
if prev == current {
self.v.get().write(new);
Ok(prev)
} else {
Err(prev)
}
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_add(val));
prev
})
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_sub(val));
prev
})
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev & val);
prev
})
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(!(prev & val));
prev
})
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev | val);
prev
})
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev ^ val);
prev
})
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(core::cmp::max(prev, val));
prev
})
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(core::cmp::min(prev, val));
prev
})
}
#[inline]
pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(!prev);
prev
})
}
#[inline]
pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_neg());
prev
})
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
};
// RISC-V 32-bit(RV32)/{32,64}-bit(RV64) RMW with Zaamo extension
// RISC-V 8-bit/16-bit RMW with Zabha extension
(cas, $atomic_type:ident, $int_type:ident) => {
impl $atomic_type {
#[inline]
pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().swap(val, order)
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
if prev == current {
self.v.get().write(new);
Ok(prev)
} else {
Err(prev)
}
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_add(val, order)
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_sub(val, order)
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_and(val, order)
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(!(prev & val));
prev
})
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_or(val, order)
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_xor(val, order)
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_max(val, order)
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_min(val, order)
}
#[inline]
pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
self.as_native().fetch_not(order)
}
#[inline]
pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_neg());
prev
})
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
};
// RISC-V 8-bit/16-bit RMW with Zaamo extension
(cas[sub_word], $atomic_type:ident, $int_type:ident) => {
#[cfg(any(target_feature = "zabha", portable_atomic_target_feature = "zabha"))]
atomic_int!(cas, $atomic_type, $int_type);
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
impl $atomic_type {
#[inline]
pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(val);
prev
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
crate::utils::assert_compare_exchange_ordering(success, failure);
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
if prev == current {
self.v.get().write(new);
Ok(prev)
} else {
Err(prev)
}
})
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn compare_exchange_weak(
&self,
current: $int_type,
new: $int_type,
success: Ordering,
failure: Ordering,
) -> Result<$int_type, $int_type> {
self.compare_exchange(current, new, success, failure)
}
#[inline]
pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_add(val));
prev
})
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_sub(val));
prev
})
}
#[inline]
pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_and(val, order)
}
#[inline]
pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(!(prev & val));
prev
})
}
#[inline]
pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_or(val, order)
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
self.as_native().fetch_xor(val, order)
}
#[inline]
pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(core::cmp::max(prev, val));
prev
})
}
#[inline]
pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(core::cmp::min(prev, val));
prev
})
}
#[inline]
pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
self.as_native().fetch_not(order)
}
#[inline]
pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
// SAFETY: any data races are prevented by disabling interrupts (see
// module-level comments) and the raw pointer is valid because we got it
// from a reference.
with(|| unsafe {
let prev = self.v.get().read();
self.v.get().write(prev.wrapping_neg());
prev
})
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
};
}
#[cfg(target_pointer_width = "16")]
#[cfg(not(target_arch = "avr"))]
atomic_int!(load_store_atomic, AtomicIsize, isize, 2);
#[cfg(target_pointer_width = "16")]
#[cfg(not(target_arch = "avr"))]
atomic_int!(load_store_atomic, AtomicUsize, usize, 2);
#[cfg(target_arch = "avr")]
atomic_int!(all_critical_session, AtomicIsize, isize, 2);
#[cfg(target_arch = "avr")]
atomic_int!(all_critical_session, AtomicUsize, usize, 2);
#[cfg(target_pointer_width = "32")]
atomic_int!(load_store_atomic, AtomicIsize, isize, 4);
#[cfg(target_pointer_width = "32")]
atomic_int!(load_store_atomic, AtomicUsize, usize, 4);
#[cfg(target_pointer_width = "64")]
atomic_int!(load_store_atomic, AtomicIsize, isize, 8);
#[cfg(target_pointer_width = "64")]
atomic_int!(load_store_atomic, AtomicUsize, usize, 8);
#[cfg(target_pointer_width = "128")]
atomic_int!(load_store_atomic, AtomicIsize, isize, 16);
#[cfg(target_pointer_width = "128")]
atomic_int!(load_store_atomic, AtomicUsize, usize, 16);
#[cfg(not(all(target_arch = "avr", portable_atomic_no_asm)))]
atomic_int!(load_store_atomic[sub_word], AtomicI8, i8, 1);
#[cfg(not(all(target_arch = "avr", portable_atomic_no_asm)))]
atomic_int!(load_store_atomic[sub_word], AtomicU8, u8, 1);
#[cfg(all(target_arch = "avr", portable_atomic_no_asm))]
atomic_int!(all_critical_session, AtomicI8, i8, 1);
#[cfg(all(target_arch = "avr", portable_atomic_no_asm))]
atomic_int!(all_critical_session, AtomicU8, u8, 1);
#[cfg(not(target_arch = "avr"))]
atomic_int!(load_store_atomic[sub_word], AtomicI16, i16, 2);
#[cfg(not(target_arch = "avr"))]
atomic_int!(load_store_atomic[sub_word], AtomicU16, u16, 2);
#[cfg(target_arch = "avr")]
atomic_int!(all_critical_session, AtomicI16, i16, 2);
#[cfg(target_arch = "avr")]
atomic_int!(all_critical_session, AtomicU16, u16, 2);
#[cfg(not(target_pointer_width = "16"))]
atomic_int!(load_store_atomic, AtomicI32, i32, 4);
#[cfg(not(target_pointer_width = "16"))]
atomic_int!(load_store_atomic, AtomicU32, u32, 4);
#[cfg(target_pointer_width = "16")]
#[cfg(any(test, feature = "fallback"))]
atomic_int!(all_critical_session, AtomicI32, i32, 4);
#[cfg(target_pointer_width = "16")]
#[cfg(any(test, feature = "fallback"))]
atomic_int!(all_critical_session, AtomicU32, u32, 4);
cfg_has_fast_atomic_64! {
atomic_int!(load_store_atomic, AtomicI64, i64, 8);
atomic_int!(load_store_atomic, AtomicU64, u64, 8);
}
#[cfg(any(test, feature = "fallback"))]
cfg_no_fast_atomic_64! {
atomic_int!(all_critical_session, AtomicI64, i64, 8);
atomic_int!(all_critical_session, AtomicU64, u64, 8);
}
#[cfg(any(test, feature = "fallback"))]
atomic_int!(all_critical_session, AtomicI128, i128, 16);
#[cfg(any(test, feature = "fallback"))]
atomic_int!(all_critical_session, AtomicU128, u128, 16);
#[cfg(test)]
mod tests {
use super::*;
test_atomic_ptr_single_thread!();
test_atomic_int_single_thread!(i8);
test_atomic_int_single_thread!(u8);
test_atomic_int_single_thread!(i16);
test_atomic_int_single_thread!(u16);
test_atomic_int_single_thread!(i32);
test_atomic_int_single_thread!(u32);
test_atomic_int_single_thread!(i64);
test_atomic_int_single_thread!(u64);
test_atomic_int_single_thread!(i128);
test_atomic_int_single_thread!(u128);
test_atomic_int_single_thread!(isize);
test_atomic_int_single_thread!(usize);
}

View File

@@ -0,0 +1,77 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Adapted from https://github.com/rust-embedded/msp430.
See also src/imp/msp430.rs.
Refs:
- MSP430x5xx and MSP430x6xx Family User's Guide, Rev. Q
https://www.ti.com/lit/ug/slau208q/slau208q.pdf
Generated asm:
- msp430 https://godbolt.org/z/fc6h89xac
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
pub(super) use super::super::msp430 as atomic;
pub(super) type State = u16;
/// Disables interrupts and returns the previous interrupt state.
#[inline(always)]
pub(super) fn disable() -> State {
let sr: State;
// SAFETY: reading the status register and disabling interrupts are safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
// Do not use `preserves_flags` because DINT modifies the GIE (global interrupt enable) bit of the status register.
// See "NOTE: Enable and Disable Interrupt" of User's Guide for NOP: https://www.ti.com/lit/ug/slau208q/slau208q.pdf#page=60
#[cfg(not(portable_atomic_no_asm))]
asm!(
"mov r2, {sr}", // sr = SR
"dint {{ nop", // SR.GIE = 0
sr = out(reg) sr,
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
{
llvm_asm!("mov r2, $0" : "=r"(sr) ::: "volatile");
llvm_asm!("dint { nop" ::: "memory" : "volatile");
}
}
sr
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline(always)]
pub(super) unsafe fn restore(prev_sr: State) {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
unsafe {
// This clobbers the entire status register, but we never explicitly modify
// flags within a critical session, and the only flags that may be changed
// within a critical session are the arithmetic flags that are changed as
// a side effect of arithmetic operations, etc., which LLVM recognizes,
// so it is safe to clobber them here.
// See also the discussion at https://github.com/taiki-e/portable-atomic/pull/40.
//
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
// Do not use `preserves_flags` because MOV modifies the status register.
// See "NOTE: Enable and Disable Interrupt" of User's Guide for NOP: https://www.ti.com/lit/ug/slau208q/slau208q.pdf#page=60
#[cfg(not(portable_atomic_no_asm))]
asm!(
"nop {{ mov {prev_sr}, r2 {{ nop", // SR = prev_sr
prev_sr = in(reg) prev_sr,
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!("nop { mov $0, r2 { nop" :: "r"(prev_sr) : "memory" : "volatile");
}
}

View File

@@ -0,0 +1,95 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Refs:
- RISC-V Instruction Set Manual
Machine Status (mstatus and mstatush) Registers
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/machine.adoc#machine-status-mstatus-and-mstatush-registers
Supervisor Status (sstatus) Register
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/supervisor.adoc#supervisor-status-sstatus-register
See also src/imp/riscv.rs.
Generated asm:
- riscv64gc https://godbolt.org/z/zTrzT1Ee7
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
pub(super) use super::super::riscv as atomic;
// Status register
#[cfg(not(portable_atomic_s_mode))]
macro_rules! status {
() => {
"mstatus"
};
}
#[cfg(portable_atomic_s_mode)]
macro_rules! status {
() => {
"sstatus"
};
}
// MIE (Machine Interrupt Enable) bit (1 << 3)
#[cfg(not(portable_atomic_s_mode))]
const MASK: State = 0x8;
#[cfg(not(portable_atomic_s_mode))]
macro_rules! mask {
() => {
"0x8"
};
}
// SIE (Supervisor Interrupt Enable) bit (1 << 1)
#[cfg(portable_atomic_s_mode)]
const MASK: State = 0x2;
#[cfg(portable_atomic_s_mode)]
macro_rules! mask {
() => {
"0x2"
};
}
#[cfg(target_arch = "riscv32")]
pub(super) type State = u32;
#[cfg(target_arch = "riscv64")]
pub(super) type State = u64;
/// Disables interrupts and returns the previous interrupt state.
#[inline(always)]
pub(super) fn disable() -> State {
let status: State;
// SAFETY: reading mstatus/sstatus and disabling interrupts is safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
asm!(
concat!("csrrci {status}, ", status!(), ", ", mask!()), // atomic { status = status!(); status!() &= !mask!() }
status = out(reg) status,
options(nostack, preserves_flags),
);
}
status
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline(always)]
pub(super) unsafe fn restore(status: State) {
if status & MASK != 0 {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
// and we've checked that interrupts were enabled before disabling interrupts.
unsafe {
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
asm!(
concat!("csrsi ", status!(), ", ", mask!()), // atomic { status!() |= mask!() }
options(nostack, preserves_flags),
);
}
}
}

View File

@@ -0,0 +1,56 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Refs:
- Xtensa Instruction Set Architecture (ISA) Summary for all Xtensa LX Processors
https://www.cadence.com/content/dam/cadence-www/global/en_US/documents/tools/silicon-solutions/compute-ip/isa-summary.pdf
- Linux kernel's Xtensa atomic implementation
https://github.com/torvalds/linux/blob/v6.13/arch/xtensa/include/asm/atomic.h
*/
use core::arch::asm;
pub(super) use core::sync::atomic;
pub(super) type State = u32;
/// Disables interrupts and returns the previous interrupt state.
#[inline(always)]
pub(super) fn disable() -> State {
let ps: State;
// SAFETY: reading the PS special register and disabling all interrupts is safe.
// (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
unsafe {
// Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
// Interrupt level 15 to disable all interrupts.
// SYNC after RSIL is not required.
asm!(
"rsil {ps}, 15", // ps = PS; PS.INTLEVEL = 15
ps = out(reg) ps,
options(nostack),
);
}
ps
}
/// Restores the previous interrupt state.
///
/// # Safety
///
/// The state must be the one retrieved by the previous `disable`.
#[inline(always)]
pub(super) unsafe fn restore(prev_ps: State) {
// SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
// and we've checked that interrupts were enabled before disabling interrupts.
unsafe {
// Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
// SYNC after WSR is required to guarantee that subsequent RSIL read the written value.
// See also 3.8.10 Processor Control Instructions of Xtensa Instruction Set Architecture (ISA) Summary for all Xtensa LX Processors.
asm!(
"wsr.ps {prev_ps}", // PS = prev_ps
"rsync", // wait
prev_ps = in(reg) prev_ps,
options(nostack),
);
}
}

572
vendor/portable-atomic/src/imp/mod.rs vendored Normal file
View File

@@ -0,0 +1,572 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
// -----------------------------------------------------------------------------
// Lock-free implementations
#[cfg(not(any(
all(
portable_atomic_no_atomic_load_store,
not(all(target_arch = "bpf", not(feature = "critical-section"))),
),
target_arch = "avr",
target_arch = "msp430",
)))]
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(all(
any(
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
portable_atomic_unsafe_assume_single_core,
),
portable_atomic_no_atomic_cas,
)))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(not(all(
any(
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
portable_atomic_unsafe_assume_single_core,
),
not(target_has_atomic = "ptr"),
)))
)]
mod core_atomic;
// AVR
#[cfg(target_arch = "avr")]
#[cfg(not(portable_atomic_no_asm))]
#[cfg(not(feature = "critical-section"))]
mod avr;
// MSP430
#[cfg(target_arch = "msp430")]
pub(crate) mod msp430;
// RISC-V without A-extension
#[cfg(any(test, not(feature = "critical-section")))]
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(any(
all(test, not(any(miri, portable_atomic_sanitize_thread))),
portable_atomic_no_atomic_cas,
))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
all(test, not(any(miri, portable_atomic_sanitize_thread))),
not(target_has_atomic = "ptr"),
))
)]
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
mod riscv;
// x86-specific optimizations
// Miri and Sanitizer do not support inline assembly.
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
))]
mod x86;
// 64-bit atomic implementations on 32-bit architectures
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
mod atomic64;
// 128-bit atomic implementations on 64-bit architectures
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "x86_64",
))]
mod atomic128;
// -----------------------------------------------------------------------------
// Lock-based fallback implementations
#[cfg(feature = "fallback")]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
#[cfg(any(
test,
not(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
all(
target_arch = "x86_64",
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
),
all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
),
),
all(
target_arch = "s390x",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
))
))]
mod fallback;
// -----------------------------------------------------------------------------
// Critical section based fallback implementations
// On AVR, we always use critical section based fallback implementation.
// AVR can be safely assumed to be single-core, so this is sound.
// MSP430 as well.
// See the module-level comments of interrupt module for more.
#[cfg(any(
all(test, target_os = "none"),
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(test, not(target_has_atomic = "ptr")))
)]
#[cfg(any(
target_arch = "arm",
target_arch = "avr",
target_arch = "msp430",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "xtensa",
feature = "critical-section",
))]
mod interrupt;
// -----------------------------------------------------------------------------
// Atomic float implementations
#[cfg(feature = "float")]
pub(crate) mod float;
// -----------------------------------------------------------------------------
// has CAS | (has core atomic & !(avr | msp430 | critical section)) => core atomic
#[cfg(not(any(
portable_atomic_no_atomic_load_store,
target_arch = "avr",
target_arch = "msp430",
)))]
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(not(all(
any(
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
portable_atomic_unsafe_assume_single_core,
),
portable_atomic_no_atomic_cas,
)))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(not(all(
any(
target_arch = "riscv32",
target_arch = "riscv64",
feature = "critical-section",
portable_atomic_unsafe_assume_single_core,
),
not(target_has_atomic = "ptr"),
)))
)]
items! {
pub(crate) use self::core_atomic::{
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
AtomicUsize,
};
#[cfg_attr(
portable_atomic_no_cfg_target_has_atomic,
cfg(any(
not(portable_atomic_no_atomic_64),
not(any(target_pointer_width = "16", target_pointer_width = "32")),
))
)]
#[cfg_attr(
not(portable_atomic_no_cfg_target_has_atomic),
cfg(any(
target_has_atomic = "64",
not(any(target_pointer_width = "16", target_pointer_width = "32")),
))
)]
pub(crate) use self::core_atomic::{AtomicI64, AtomicU64};
}
// bpf & !(critical section) => core atomic
#[cfg(all(
target_arch = "bpf",
portable_atomic_no_atomic_load_store,
not(feature = "critical-section"),
))]
pub(crate) use self::core_atomic::{AtomicI64, AtomicIsize, AtomicPtr, AtomicU64, AtomicUsize};
// RISC-V without A-extension & !(assume single core | critical section)
#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))]
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
items! {
pub(crate) use self::riscv::{
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
AtomicUsize,
};
#[cfg(target_arch = "riscv64")]
pub(crate) use self::riscv::{AtomicI64, AtomicU64};
}
// no core atomic CAS & (assume single core | critical section) => critical section based fallback
#[cfg(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
target_arch = "avr",
target_arch = "msp430",
))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))]
items! {
pub(crate) use self::interrupt::{
AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize,
};
#[cfg(any(not(target_pointer_width = "16"), feature = "fallback"))]
pub(crate) use self::interrupt::{AtomicI32, AtomicU32};
#[cfg(any(
not(any(target_pointer_width = "16", target_pointer_width = "32")),
feature = "fallback",
))]
pub(crate) use self::interrupt::{AtomicI64, AtomicU64};
#[cfg(feature = "fallback")]
pub(crate) use self::interrupt::{AtomicI128, AtomicU128};
}
// no core (64-bit | 128-bit) atomic & has CAS => use lock-base fallback
#[cfg(feature = "fallback")]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
items! {
#[cfg(not(any(
all(
target_arch = "arm",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
not(portable_atomic_no_outline_atomics),
),
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
),
)))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
pub(crate) use self::fallback::{AtomicI64, AtomicU64};
#[cfg(not(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
all(
target_arch = "x86_64",
not(all(
any(miri, portable_atomic_sanitize_thread),
portable_atomic_no_cmpxchg16b_intrinsic,
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
),
),
),
all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
),
all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(
all(
target_os = "linux",
any(
all(
target_env = "gnu",
any(target_endian = "little", not(target_feature = "crt-static")),
),
all(
target_env = "musl",
any(not(target_feature = "crt-static"), feature = "std"),
),
target_env = "ohos",
all(target_env = "uclibc", not(target_feature = "crt-static")),
portable_atomic_outline_atomics,
),
),
target_os = "android",
all(
target_os = "freebsd",
any(
target_endian = "little",
not(target_feature = "crt-static"),
portable_atomic_outline_atomics,
),
),
target_os = "openbsd",
all(
target_os = "aix",
not(portable_atomic_pre_llvm_20),
portable_atomic_outline_atomics, // TODO(aix): currently disabled by default
),
),
not(any(miri, portable_atomic_sanitize_thread)),
),
),
),
all(
target_arch = "s390x",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
)))]
pub(crate) use self::fallback::{AtomicI128, AtomicU128};
}
// 64-bit atomics (platform-specific)
// pre-v6 Arm Linux
#[cfg(feature = "fallback")]
#[cfg(all(
target_arch = "arm",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
not(portable_atomic_no_outline_atomics),
))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
pub(crate) use self::atomic64::arm_linux::{AtomicI64, AtomicU64};
// riscv32 & (zacas | outline-atomics)
#[cfg(all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
))]
pub(crate) use self::atomic64::riscv32::{AtomicI64, AtomicU64};
// 128-bit atomics (platform-specific)
// AArch64
#[cfg(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
))]
pub(crate) use self::atomic128::aarch64::{AtomicI128, AtomicU128};
// x86_64 & (cmpxchg16b | outline-atomics)
#[cfg(all(
target_arch = "x86_64",
not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_no_cmpxchg16b_intrinsic)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "cmpxchg16b",
portable_atomic_target_feature = "cmpxchg16b",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
),
),
))]
pub(crate) use self::atomic128::x86_64::{AtomicI128, AtomicU128};
// riscv64 & (zacas | outline-atomics)
#[cfg(all(
target_arch = "riscv64",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(
target_feature = "zacas",
portable_atomic_target_feature = "zacas",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(target_os = "linux", target_os = "android"),
),
),
))]
pub(crate) use self::atomic128::riscv64::{AtomicI128, AtomicU128};
// powerpc64 & (pwr8 | outline-atomics)
#[cfg(all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
any(
all(
target_os = "linux",
any(
all(
target_env = "gnu",
any(target_endian = "little", not(target_feature = "crt-static")),
),
all(
target_env = "musl",
any(not(target_feature = "crt-static"), feature = "std"),
),
target_env = "ohos",
all(target_env = "uclibc", not(target_feature = "crt-static")),
portable_atomic_outline_atomics,
),
),
target_os = "android",
all(
target_os = "freebsd",
any(
target_endian = "little",
not(target_feature = "crt-static"),
portable_atomic_outline_atomics,
),
),
target_os = "openbsd",
all(
target_os = "aix",
not(portable_atomic_pre_llvm_20),
portable_atomic_outline_atomics, // TODO(aix): currently disabled by default
),
),
not(any(miri, portable_atomic_sanitize_thread)),
),
),
))]
pub(crate) use self::atomic128::powerpc64::{AtomicI128, AtomicU128};
// s390x
#[cfg(all(
target_arch = "s390x",
not(all(any(miri, portable_atomic_sanitize_thread), not(portable_atomic_atomic_intrinsics))),
not(portable_atomic_no_asm),
))]
pub(crate) use self::atomic128::s390x::{AtomicI128, AtomicU128};

282
vendor/portable-atomic/src/imp/msp430.rs vendored Normal file
View File

@@ -0,0 +1,282 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic implementation on MSP430.
Adapted from https://github.com/pftbest/msp430-atomic.
Operations not supported here are provided by disabling interrupts.
See also src/imp/interrupt/msp430.rs.
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#msp430
Note: Ordering is always SeqCst.
Refs:
- MSP430x5xx and MSP430x6xx Family User's Guide, Rev. Q
https://www.ti.com/lit/ug/slau208q/slau208q.pdf
- atomic-maybe-uninit
https://github.com/taiki-e/atomic-maybe-uninit
Generated asm:
- msp430 https://godbolt.org/z/MGrd4jPoq
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
#[cfg(not(feature = "critical-section"))]
use core::cell::UnsafeCell;
use core::sync::atomic::Ordering;
/// An atomic fence.
///
/// # Panics
///
/// Panics if `order` is [`Relaxed`](Ordering::Relaxed).
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub fn fence(order: Ordering) {
match order {
Ordering::Relaxed => panic!("there is no such thing as a relaxed fence"),
// MSP430 is single-core and a compiler fence works as an atomic fence.
_ => compiler_fence(order),
}
}
/// A compiler memory fence.
///
/// # Panics
///
/// Panics if `order` is [`Relaxed`](Ordering::Relaxed).
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub fn compiler_fence(order: Ordering) {
match order {
Ordering::Relaxed => panic!("there is no such thing as a relaxed compiler fence"),
_ => {}
}
// SAFETY: using an empty asm is safe.
unsafe {
// Do not use `nomem` and `readonly` because prevent preceding and subsequent memory accesses from being reordered.
#[cfg(not(portable_atomic_no_asm))]
asm!("", options(nostack, preserves_flags));
#[cfg(portable_atomic_no_asm)]
llvm_asm!("" ::: "memory" : "volatile");
}
}
macro_rules! atomic {
(load_store,
$([$($generics:tt)*])? $atomic_type:ident, $value_type:ty $(as $cast:ty)?, $size:tt
) => {
#[cfg(not(feature = "critical-section"))]
#[repr(transparent)]
pub(crate) struct $atomic_type $(<$($generics)*>)? {
v: UnsafeCell<$value_type>,
}
#[cfg(not(feature = "critical-section"))]
// Send is implicitly implemented for atomic integers, but not for atomic pointers.
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
#[cfg(not(feature = "critical-section"))]
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
#[cfg(not(feature = "critical-section"))]
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $value_type {
crate::utils::assert_load_ordering(order);
let src = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
let out $(: $cast)?;
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("mov.", $size, " @{src}, {out}"), // atomic { out = *src }
src = in(reg) src,
out = lateout(reg) out,
options(nostack, preserves_flags),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("mov.", $size, " $1, $0")
: "=r"(out) : "*m"(src) : "memory" : "volatile"
);
out $(as $cast as $value_type)?
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("mov.", $size, " {val}, 0({dst})"), // atomic { *dst = val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
options(nostack, preserves_flags),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("mov.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
}
};
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty $(as $cast:ty)?, $size:tt) => {
atomic!(load_store, $([$($generics)*])? $atomic_type, $value_type $(as $cast)?, $size);
#[cfg(not(feature = "critical-section"))]
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
pub(crate) fn add(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("add.", $size, " {val}, 0({dst})"), // atomic { *dst += val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
// Do not use `preserves_flags` because ADD modifies the V, N, Z, and C bits of the status register.
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("add.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
#[inline]
pub(crate) fn sub(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("sub.", $size, " {val}, 0({dst})"), // atomic { *dst -= val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
// Do not use `preserves_flags` because SUB modifies the V, N, Z, and C bits of the status register.
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("sub.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
#[inline]
pub(crate) fn and(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("and.", $size, " {val}, 0({dst})"), // atomic { *dst &= val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
// Do not use `preserves_flags` because AND modifies the V, N, Z, and C bits of the status register.
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("and.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
#[inline]
pub(crate) fn or(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("bis.", $size, " {val}, 0({dst})"), // atomic { *dst |= val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
options(nostack, preserves_flags),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("bis.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
#[inline]
pub(crate) fn xor(&self, val: $value_type, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("xor.", $size, " {val}, 0({dst})"), // atomic { *dst ^= val }
dst = in(reg) dst,
val = in(reg) val $(as $cast)?,
// Do not use `preserves_flags` because XOR modifies the V, N, Z, and C bits of the status register.
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("xor.", $size, " $1, $0")
:: "*m"(dst), "ir"(val) : "memory" : "volatile"
);
}
}
#[inline]
pub(crate) fn not(&self, _order: Ordering) {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
#[cfg(not(portable_atomic_no_asm))]
asm!(
concat!("inv.", $size, " 0({dst})"), // atomic { *dst = !*dst }
dst = in(reg) dst,
// Do not use `preserves_flags` because INV modifies the V, N, Z, and C bits of the status register.
options(nostack),
);
#[cfg(portable_atomic_no_asm)]
llvm_asm!(
concat!("inv.", $size, " $0")
:: "*m"(dst) : "memory" : "volatile"
);
}
}
}
};
}
atomic!(AtomicI8, i8, "b");
atomic!(AtomicU8, u8, "b");
atomic!(AtomicI16, i16, "w");
atomic!(AtomicU16, u16, "w");
atomic!(AtomicIsize, isize, "w");
atomic!(AtomicUsize, usize, "w");
atomic!(load_store, [T] AtomicPtr, *mut T as *mut u8, "w");

934
vendor/portable-atomic/src/imp/riscv.rs vendored Normal file
View File

@@ -0,0 +1,934 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic load/store implementation on RISC-V.
This is for RISC-V targets without A extension. (pre-1.76 rustc doesn't provide atomics
at all on such targets. https://github.com/rust-lang/rust/pull/114499)
Also, optionally provides RMW implementation when Zaamo extension or force-amo feature is enabled.
See "Atomic operation overview by architecture" in atomic-maybe-uninit for a more comprehensive and
detailed description of the atomic and synchronize instructions in this architecture:
https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md#risc-v
Refs:
- RISC-V Instruction Set Manual
"Zaamo" Extension for Atomic Memory Operations
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/a-st-ext.adoc#zaamo-extension-for-atomic-memory-operations
"Zabha" Extension for Byte and Halfword Atomic Memory Operations
https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zabha.adoc
- RISC-V Atomics ABI Specification
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc
- atomic-maybe-uninit
https://github.com/taiki-e/atomic-maybe-uninit
Generated asm:
- riscv64gc https://godbolt.org/z/Ws933n9jE
- riscv64gc (+zabha) https://godbolt.org/z/zEKPPW11f
- riscv32imac https://godbolt.org/z/TKbYdbaE9
- riscv32imac (+zabha) https://godbolt.org/z/TnePfK6co
*/
// TODO: Zacas/Zalrsc extension
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::{cell::UnsafeCell, sync::atomic::Ordering};
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
#[cfg(target_arch = "riscv32")]
macro_rules! w {
() => {
""
};
}
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
#[cfg(target_arch = "riscv64")]
macro_rules! w {
() => {
"w"
};
}
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
macro_rules! atomic_rmw_amo_ext {
// Use +a also for zaamo because `option arch +zaamo` requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/8be079cdddfd628d356d9ddb5ab397ea95fb1030
("w") => {
"+a"
};
("d") => {
"+a"
};
("b") => {
"+a,+zabha"
};
("h") => {
"+a,+zabha"
};
}
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
macro_rules! atomic_rmw_amo {
($op:ident, $dst:ident, $val:ident $(as $cast:ty)?, $order:ident, $size:tt) => {{
let out $(: $cast)?;
macro_rules! op {
($asm_order:tt) => {
// SAFETY: The user guaranteed that the AMO instruction is available in this
// system by setting the portable_atomic_force_amo/target_feature and
// portable_atomic_unsafe_assume_single_core.
// The caller of this macro must guarantee the validity of the pointer.
asm!(
".option push",
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
// LLVM supports `.option arch` directive on LLVM 17+.
// https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4
// Note that `.insn <value>` directive requires LLVM 19.
// https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9
concat!(".option arch, ", atomic_rmw_amo_ext!($size)),
concat!("amo", stringify!($op), ".", $size, $asm_order, " {out}, {val}, 0({dst})"), // atomic { _x = *dst; *dst = op(_x, val); out = _x }
".option pop",
dst = in(reg) ptr_reg!($dst),
val = in(reg) $val $(as $cast)?,
out = lateout(reg) out,
options(nostack, preserves_flags),
)
};
}
match $order {
Ordering::Relaxed => op!(""),
Ordering::Acquire => op!(".aq"),
Ordering::Release => op!(".rl"),
// AcqRel and SeqCst RMWs are equivalent.
Ordering::AcqRel | Ordering::SeqCst => op!(".aqrl"),
_ => unreachable!(),
}
out
}};
}
// 32-bit val.wrapping_shl(shift) but no extra `& (u32::BITS - 1)`
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
#[inline(always)]
fn sllw(val: u32, shift: u32) -> u32 {
// SAFETY: Calling sll{,w} is safe.
unsafe {
let out;
asm!(
concat!("sll", w!(), " {out}, {val}, {shift}"), // out = val << shift & 31
out = lateout(reg) out,
val = in(reg) val,
shift = in(reg) shift,
options(pure, nomem, nostack, preserves_flags),
);
out
}
}
// 32-bit val.wrapping_shr(shift) but no extra `& (u32::BITS - 1)`
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
macro_rules! srlw {
($val:expr, $shift:expr) => {
// SAFETY: Calling srl{,w} is safe.
unsafe {
let val: u32 = $val;
let shift: u32 = $shift;
let out;
asm!(
concat!("srl", w!(), " {out}, {val}, {shift}"), // out = val >> shift & 31
out = lateout(reg) out,
val = in(reg) val,
shift = in(reg) shift,
options(pure, nomem, nostack, preserves_flags),
);
out
}
};
}
macro_rules! atomic_load_store {
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty $(as $cast:ty)?, $size:tt) => {
#[repr(transparent)]
pub(crate) struct $atomic_type $(<$($generics)*>)? {
v: UnsafeCell<$value_type>,
}
// Send is implicitly implemented for atomic integers, but not for atomic pointers.
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
// SAFETY: any data races are prevented by atomic operations.
unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
#[cfg(any(test, not(portable_atomic_unsafe_assume_single_core)))]
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
pub(crate) const fn new(v: $value_type) -> Self {
Self { v: UnsafeCell::new(v) }
}
#[inline]
pub(crate) fn is_lock_free() -> bool {
Self::IS_ALWAYS_LOCK_FREE
}
pub(crate) const IS_ALWAYS_LOCK_FREE: bool = true;
#[inline]
pub(crate) const fn as_ptr(&self) -> *mut $value_type {
self.v.get()
}
}
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn load(&self, order: Ordering) -> $value_type {
crate::utils::assert_load_ordering(order);
let src = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
let out $(: $cast)?;
macro_rules! atomic_load {
($acquire:tt, $release:tt) => {
asm!(
$release, // fence
concat!("l", $size, " {out}, 0({src})"), // atomic { out = *src }
$acquire, // fence
src = in(reg) ptr_reg!(src),
out = lateout(reg) out,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_load!("", ""),
Ordering::Acquire => atomic_load!("fence r, rw", ""),
Ordering::SeqCst => atomic_load!("fence r, rw", "fence rw, rw"),
_ => unreachable!(),
}
out $(as $cast as $value_type)?
}
}
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
macro_rules! atomic_store {
($acquire:tt, $release:tt) => {
asm!(
$release, // fence
concat!("s", $size, " {val}, 0({dst})"), // atomic { *dst = val }
$acquire, // fence
dst = in(reg) ptr_reg!(dst),
val = in(reg) val $(as $cast)?,
options(nostack, preserves_flags),
)
};
}
match order {
Ordering::Relaxed => atomic_store!("", ""),
Ordering::Release => atomic_store!("", "fence rw, w"),
// https://github.com/llvm/llvm-project/commit/3ea8f2526541884e03d5bd4f4e46f4eb190990b6
Ordering::SeqCst => atomic_store!("fence rw, rw", "fence rw, w"),
_ => unreachable!(),
}
}
}
}
};
}
macro_rules! atomic_ptr {
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty $(as $cast:ty)?, $size:tt) => {
atomic_load_store!($([$($generics)*])? $atomic_type, $value_type $(as $cast)?, $size);
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[inline]
pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
atomic_rmw_amo!(swap, dst, val $(as $cast)?, order, $size)
$(as $cast as $value_type)?
}
}
}
};
}
macro_rules! atomic {
($atomic_type:ident, $value_type:ty, $size:tt, $max:tt, $min:tt) => {
atomic_load_store!($atomic_type, $value_type, $size);
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))]
impl_default_no_fetch_ops!($atomic_type, $value_type);
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))]
impl_default_bit_opts!($atomic_type, $value_type);
// There is no amo{sub,nand,neg}.
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
impl $atomic_type {
#[inline]
pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(swap, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_add(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(add, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_sub(&self, val: $value_type, order: Ordering) -> $value_type {
self.fetch_add(val.wrapping_neg(), order)
}
#[inline]
pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(and, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(or, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(xor, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
let dst = self.v.get();
#[cfg(target_arch = "riscv32")]
let val: u32 = !0;
#[cfg(target_arch = "riscv64")]
let val: u64 = !0;
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!(xor, dst, val, order, $size) }
}
#[cfg(not(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
)))]
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn fetch_max(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!($max, dst, val, order, $size) }
}
#[inline]
pub(crate) fn fetch_min(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe { atomic_rmw_amo!($min, dst, val, order, $size) }
}
}
};
}
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
trait ZeroExtend: Copy {
/// Zero-extends `self` to `u32` if it is smaller than 32-bit.
fn zero_extend(self) -> u32;
}
macro_rules! zero_extend {
($int:ident, $uint:ident) => {
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
impl ZeroExtend for $uint {
#[inline(always)]
fn zero_extend(self) -> u32 {
self as u32
}
}
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
impl ZeroExtend for $int {
#[allow(clippy::cast_sign_loss)]
#[inline(always)]
fn zero_extend(self) -> u32 {
self as $uint as u32
}
}
};
}
zero_extend!(i8, u8);
zero_extend!(i16, u16);
macro_rules! atomic_sub_word {
($atomic_type:ident, $value_type:ty, $size:tt, $max:tt, $min:tt) => {
#[cfg(any(target_feature = "zabha", portable_atomic_target_feature = "zabha"))]
atomic!($atomic_type, $value_type, $size, $max, $min);
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
atomic_load_store!($atomic_type, $value_type, $size);
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))]
impl_default_bit_opts!($atomic_type, $value_type);
#[cfg(any(
test,
portable_atomic_force_amo,
target_feature = "zaamo",
portable_atomic_target_feature = "zaamo",
))]
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
impl $atomic_type {
#[inline]
pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
let (dst, shift, mut mask) = crate::utils::create_sub_word_mask_values(dst);
mask = !sllw(mask, shift);
let mut val = sllw(ZeroExtend::zero_extend(val), shift);
val |= mask;
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
let out: u32 = unsafe { atomic_rmw_amo!(and, dst, val, order, "w") };
srlw!(out, shift)
}
#[cfg(not(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
)))]
#[inline]
pub(crate) fn and(&self, val: $value_type, order: Ordering) {
self.fetch_and(val, order);
}
#[inline]
pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
let val = sllw(ZeroExtend::zero_extend(val), shift);
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
let out: u32 = unsafe { atomic_rmw_amo!(or, dst, val, order, "w") };
srlw!(out, shift)
}
#[cfg(not(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
)))]
#[inline]
pub(crate) fn or(&self, val: $value_type, order: Ordering) {
self.fetch_or(val, order);
}
#[inline]
pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
let dst = self.v.get();
let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
let val = sllw(ZeroExtend::zero_extend(val), shift);
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
let out: u32 = unsafe { atomic_rmw_amo!(xor, dst, val, order, "w") };
srlw!(out, shift)
}
#[cfg(not(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
)))]
#[inline]
pub(crate) fn xor(&self, val: $value_type, order: Ordering) {
self.fetch_xor(val, order);
}
#[inline]
pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
self.fetch_xor(!0, order)
}
#[cfg(not(any(
portable_atomic_unsafe_assume_single_core,
feature = "critical-section",
)))]
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
}
};
}
atomic_sub_word!(AtomicI8, i8, "b", max, min);
atomic_sub_word!(AtomicU8, u8, "b", maxu, minu);
atomic_sub_word!(AtomicI16, i16, "h", max, min);
atomic_sub_word!(AtomicU16, u16, "h", maxu, minu);
atomic!(AtomicI32, i32, "w", max, min);
atomic!(AtomicU32, u32, "w", maxu, minu);
#[cfg(target_arch = "riscv64")]
atomic!(AtomicI64, i64, "d", max, min);
#[cfg(target_arch = "riscv64")]
atomic!(AtomicU64, u64, "d", maxu, minu);
#[cfg(target_pointer_width = "32")]
atomic!(AtomicIsize, isize, "w", max, min);
#[cfg(target_pointer_width = "32")]
atomic!(AtomicUsize, usize, "w", maxu, minu);
#[cfg(target_pointer_width = "32")]
atomic_ptr!([T] AtomicPtr, *mut T as *mut u8, "w");
#[cfg(target_pointer_width = "64")]
atomic!(AtomicIsize, isize, "d", max, min);
#[cfg(target_pointer_width = "64")]
atomic!(AtomicUsize, usize, "d", maxu, minu);
#[cfg(target_pointer_width = "64")]
atomic_ptr!([T] AtomicPtr, *mut T as *mut u8, "d");
#[cfg(test)]
mod tests {
use super::*;
test_atomic_ptr_load_store!();
test_atomic_int_load_store!(i8);
test_atomic_int_load_store!(u8);
test_atomic_int_load_store!(i16);
test_atomic_int_load_store!(u16);
test_atomic_int_load_store!(i32);
test_atomic_int_load_store!(u32);
#[cfg(target_arch = "riscv64")]
test_atomic_int_load_store!(i64);
#[cfg(target_arch = "riscv64")]
test_atomic_int_load_store!(u64);
test_atomic_int_load_store!(isize);
test_atomic_int_load_store!(usize);
macro_rules! test_atomic_ptr_amo {
() => {
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks
)]
mod test_atomic_ptr_amo {
use super::*;
test_atomic_ptr_amo!(AtomicPtr<u8>);
}
};
($atomic_type:ty) => {
use crate::tests::helper;
::quickcheck::quickcheck! {
fn quickcheck_swap(x: usize, y: usize) -> bool {
let x = sptr::invalid_mut(x);
let y = sptr::invalid_mut(y);
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.swap(y, order), x);
assert_eq!(a.swap(x, order), y);
}
true
}
}
};
}
macro_rules! test_atomic_int_amo {
($int_type:ident) => {
paste::paste! {
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks
)]
mod [<test_atomic_ $int_type _amo>] {
use super::*;
test_atomic_int_amo!([<Atomic $int_type:camel>], $int_type);
}
}
};
($atomic_type:ty, $int_type:ident) => {
use crate::tests::helper;
::quickcheck::quickcheck! {
fn quickcheck_swap(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.swap(y, order), x);
assert_eq!(a.swap(x, order), y);
}
true
}
fn quickcheck_fetch_add(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_add(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y));
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_add(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x));
}
true
}
fn quickcheck_fetch_sub(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_sub(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y));
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_sub(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x));
}
true
}
fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_and(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x & y);
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_and(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y & x);
}
true
}
fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_or(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x | y);
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_or(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y | x);
}
true
}
fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_xor(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x ^ y);
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_xor(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y ^ x);
}
true
}
fn quickcheck_fetch_max(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_max(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(x, y));
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_max(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(y, x));
}
true
}
fn quickcheck_fetch_min(x: $int_type, y: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_min(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(x, y));
let a = <$atomic_type>::new(y);
assert_eq!(a.fetch_min(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(y, x));
}
true
}
fn quickcheck_fetch_not(x: $int_type) -> bool {
for &order in &helper::SWAP_ORDERINGS {
let a = <$atomic_type>::new(x);
assert_eq!(a.fetch_not(order), x);
assert_eq!(a.load(Ordering::Relaxed), !x);
assert_eq!(a.fetch_not(order), !x);
assert_eq!(a.load(Ordering::Relaxed), x);
}
true
}
}
};
}
macro_rules! test_atomic_int_amo_sub_word {
($int_type:ident) => {
paste::paste! {
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks
)]
mod [<test_atomic_ $int_type _amo>] {
use super::*;
#[cfg(any(target_feature = "zabha", portable_atomic_target_feature = "zabha"))]
test_atomic_int_amo!([<Atomic $int_type:camel>], $int_type);
#[cfg(not(any(target_feature = "zabha", portable_atomic_target_feature = "zabha")))]
test_atomic_int_amo_sub_word!([<Atomic $int_type:camel>], $int_type);
}
}
};
($atomic_type:ty, $int_type:ident) => {
use crate::tests::helper::{self, *};
::quickcheck::quickcheck! {
fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
let mut rng = fastrand::Rng::new();
for &order in &helper::SWAP_ORDERINGS {
for base in [0, !0] {
let mut arr = Align16([
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
]);
let a_idx = rng.usize(3..=6);
arr.0[a_idx] = <$atomic_type>::new(x);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_and(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x & y);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
arr.0[a_idx] = <$atomic_type>::new(y);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_and(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y & x);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
}
}
true
}
fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
let mut rng = fastrand::Rng::new();
for &order in &helper::SWAP_ORDERINGS {
for base in [0, !0] {
let mut arr = Align16([
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
]);
let a_idx = rng.usize(3..=6);
arr.0[a_idx] = <$atomic_type>::new(x);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_or(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x | y);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
arr.0[a_idx] = <$atomic_type>::new(y);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_or(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y | x);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
}
}
true
}
fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
let mut rng = fastrand::Rng::new();
for &order in &helper::SWAP_ORDERINGS {
for base in [0, !0] {
let mut arr = Align16([
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
]);
let a_idx = rng.usize(3..=6);
arr.0[a_idx] = <$atomic_type>::new(x);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_xor(y, order), x);
assert_eq!(a.load(Ordering::Relaxed), x ^ y);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
arr.0[a_idx] = <$atomic_type>::new(y);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_xor(x, order), y);
assert_eq!(a.load(Ordering::Relaxed), y ^ x);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
}
}
true
}
fn quickcheck_fetch_not(x: $int_type) -> bool {
let mut rng = fastrand::Rng::new();
for &order in &helper::SWAP_ORDERINGS {
for base in [0, !0] {
let mut arr = Align16([
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
<$atomic_type>::new(base),
]);
let a_idx = rng.usize(3..=6);
arr.0[a_idx] = <$atomic_type>::new(x);
let a = &arr.0[a_idx];
assert_eq!(a.fetch_not(order), x);
assert_eq!(a.load(Ordering::Relaxed), !x);
assert_eq!(a.fetch_not(order), !x);
assert_eq!(a.load(Ordering::Relaxed), x);
for i in 0..a_idx {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
for i in a_idx + 1..arr.0.len() {
assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
}
}
}
true
}
}
};
}
test_atomic_ptr_amo!();
test_atomic_int_amo_sub_word!(i8);
test_atomic_int_amo_sub_word!(u8);
test_atomic_int_amo_sub_word!(i16);
test_atomic_int_amo_sub_word!(u16);
test_atomic_int_amo!(i32);
test_atomic_int_amo!(u32);
#[cfg(target_arch = "riscv64")]
test_atomic_int_amo!(i64);
#[cfg(target_arch = "riscv64")]
test_atomic_int_amo!(u64);
test_atomic_int_amo!(isize);
test_atomic_int_amo!(usize);
}

236
vendor/portable-atomic/src/imp/x86.rs vendored Normal file
View File

@@ -0,0 +1,236 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
/*
Atomic operations implementation on x86/x86_64.
This module provides atomic operations not supported by LLVM or optimizes
cases where LLVM code generation is not optimal.
Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
this module and use CAS loop instead.
Refs:
- x86 and amd64 instruction reference https://www.felixcloutier.com/x86
Generated asm:
- x86_64 https://godbolt.org/z/ETa1MGTP3
*/
#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::sync::atomic::Ordering;
use super::core_atomic::{
AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16, AtomicU32,
AtomicU64, AtomicUsize,
};
#[cfg(target_pointer_width = "32")]
macro_rules! ptr_modifier {
() => {
":e"
};
}
#[cfg(target_pointer_width = "64")]
macro_rules! ptr_modifier {
() => {
""
};
}
macro_rules! atomic_int {
($atomic_type:ident, $ptr_size:tt) => {
impl $atomic_type {
#[inline]
pub(crate) fn not(&self, _order: Ordering) {
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
//
// https://www.felixcloutier.com/x86/not
unsafe {
// atomic RMW is always SeqCst.
asm!(
concat!("lock not ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"),
dst = in(reg) dst,
options(nostack, preserves_flags),
);
}
}
#[inline]
pub(crate) fn neg(&self, _order: Ordering) {
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
//
// https://www.felixcloutier.com/x86/neg
unsafe {
// atomic RMW is always SeqCst.
asm!(
concat!("lock neg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"),
dst = in(reg) dst,
// Do not use `preserves_flags` because NEG modifies the CF, OF, SF, ZF, AF, and PF flag.
options(nostack),
);
}
}
}
};
}
atomic_int!(AtomicI8, "byte");
atomic_int!(AtomicU8, "byte");
atomic_int!(AtomicI16, "word");
atomic_int!(AtomicU16, "word");
atomic_int!(AtomicI32, "dword");
atomic_int!(AtomicU32, "dword");
#[cfg(target_arch = "x86_64")]
atomic_int!(AtomicI64, "qword");
#[cfg(target_arch = "x86_64")]
atomic_int!(AtomicU64, "qword");
#[cfg(target_pointer_width = "32")]
atomic_int!(AtomicIsize, "dword");
#[cfg(target_pointer_width = "32")]
atomic_int!(AtomicUsize, "dword");
#[cfg(target_pointer_width = "64")]
atomic_int!(AtomicIsize, "qword");
#[cfg(target_pointer_width = "64")]
atomic_int!(AtomicUsize, "qword");
#[cfg(target_arch = "x86")]
impl AtomicI64 {
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
#[cfg(target_arch = "x86")]
impl AtomicU64 {
#[inline]
pub(crate) fn not(&self, order: Ordering) {
self.fetch_not(order);
}
#[inline]
pub(crate) fn neg(&self, order: Ordering) {
self.fetch_neg(order);
}
}
macro_rules! atomic_bit_opts {
($atomic_type:ident, $int_type:ident, $val_modifier:tt, $ptr_size:tt) => {
// LLVM 14 and older don't support generating `lock bt{s,r,c}`.
// LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets.
// LLVM 16+ can generate `lock bt{s,r,c}` for both immediate and register bit offsets.
// https://godbolt.org/z/TGhr5z4ds
// So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations.
#[cfg(not(portable_atomic_pre_llvm_16))]
impl_default_bit_opts!($atomic_type, $int_type);
#[cfg(portable_atomic_pre_llvm_16)]
impl $atomic_type {
// `<integer>::BITS` requires Rust 1.53
const BITS: u32 = (core::mem::size_of::<$int_type>() * 8) as u32;
#[inline]
pub(crate) fn bit_set(&self, bit: u32, _order: Ordering) -> bool {
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
// the masking by the bit size of the type ensures that we do not shift
// out of bounds.
//
// https://www.felixcloutier.com/x86/bts
unsafe {
let r: u8;
// atomic RMW is always SeqCst.
asm!(
concat!("lock bts ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
"setb {r}",
dst = in(reg) dst,
bit = in(reg) (bit & (Self::BITS - 1)) as $int_type,
r = out(reg_byte) r,
// Do not use `preserves_flags` because BTS modifies the CF flag.
options(nostack),
);
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
r != 0
}
}
#[inline]
pub(crate) fn bit_clear(&self, bit: u32, _order: Ordering) -> bool {
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
// the masking by the bit size of the type ensures that we do not shift
// out of bounds.
//
// https://www.felixcloutier.com/x86/btr
unsafe {
let r: u8;
// atomic RMW is always SeqCst.
asm!(
concat!("lock btr ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
"setb {r}",
dst = in(reg) dst,
bit = in(reg) (bit & (Self::BITS - 1)) as $int_type,
r = out(reg_byte) r,
// Do not use `preserves_flags` because BTR modifies the CF flag.
options(nostack),
);
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
r != 0
}
}
#[inline]
pub(crate) fn bit_toggle(&self, bit: u32, _order: Ordering) -> bool {
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
// the masking by the bit size of the type ensures that we do not shift
// out of bounds.
//
// https://www.felixcloutier.com/x86/btc
unsafe {
let r: u8;
// atomic RMW is always SeqCst.
asm!(
concat!("lock btc ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
"setb {r}",
dst = in(reg) dst,
bit = in(reg) (bit & (Self::BITS - 1)) as $int_type,
r = out(reg_byte) r,
// Do not use `preserves_flags` because BTC modifies the CF flag.
options(nostack),
);
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
r != 0
}
}
}
};
}
impl_default_bit_opts!(AtomicI8, i8);
impl_default_bit_opts!(AtomicU8, u8);
atomic_bit_opts!(AtomicI16, i16, ":x", "word");
atomic_bit_opts!(AtomicU16, u16, ":x", "word");
atomic_bit_opts!(AtomicI32, i32, ":e", "dword");
atomic_bit_opts!(AtomicU32, u32, ":e", "dword");
#[cfg(target_arch = "x86_64")]
atomic_bit_opts!(AtomicI64, i64, "", "qword");
#[cfg(target_arch = "x86_64")]
atomic_bit_opts!(AtomicU64, u64, "", "qword");
#[cfg(target_arch = "x86")]
impl_default_bit_opts!(AtomicI64, i64);
#[cfg(target_arch = "x86")]
impl_default_bit_opts!(AtomicU64, u64);
#[cfg(target_pointer_width = "32")]
atomic_bit_opts!(AtomicIsize, isize, ":e", "dword");
#[cfg(target_pointer_width = "32")]
atomic_bit_opts!(AtomicUsize, usize, ":e", "dword");
#[cfg(target_pointer_width = "64")]
atomic_bit_opts!(AtomicIsize, isize, "", "qword");
#[cfg(target_pointer_width = "64")]
atomic_bit_opts!(AtomicUsize, usize, "", "qword");

5194
vendor/portable-atomic/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

2605
vendor/portable-atomic/src/tests/helper.rs vendored Normal file

File diff suppressed because it is too large Load Diff

494
vendor/portable-atomic/src/tests/mod.rs vendored Normal file
View File

@@ -0,0 +1,494 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
#![allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[macro_use]
pub(crate) mod helper;
#[allow(dead_code)]
#[path = "../../version.rs"]
mod version;
use test_helper as _;
use super::*;
test_atomic_bool_pub!();
test_atomic_ptr_pub!();
test_atomic_int_pub!(isize);
test_atomic_int_pub!(usize);
test_atomic_int_pub!(i8);
test_atomic_int_pub!(u8);
test_atomic_int_pub!(i16);
test_atomic_int_pub!(u16);
test_atomic_int_pub!(i32);
test_atomic_int_pub!(u32);
test_atomic_int_pub!(i64);
test_atomic_int_pub!(u64);
test_atomic_int_pub!(i128);
test_atomic_int_pub!(u128);
#[cfg(all(feature = "float", portable_atomic_unstable_f16))]
test_atomic_float_pub!(f16);
#[cfg(feature = "float")]
test_atomic_float_pub!(f32);
#[cfg(feature = "float")]
test_atomic_float_pub!(f64);
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
test_atomic_float_pub!(f128);
#[deny(improper_ctypes)]
extern "C" {
fn _atomic_bool_ffi_safety(_: AtomicBool);
fn _atomic_ptr_ffi_safety(_: AtomicPtr<u8>);
fn _atomic_isize_ffi_safety(_: AtomicIsize);
fn _atomic_usize_ffi_safety(_: AtomicUsize);
fn _atomic_i8_ffi_safety(_: AtomicI8);
fn _atomic_u8_ffi_safety(_: AtomicU8);
fn _atomic_i16_ffi_safety(_: AtomicI16);
fn _atomic_u16_ffi_safety(_: AtomicU16);
fn _atomic_i32_ffi_safety(_: AtomicI32);
fn _atomic_u32_ffi_safety(_: AtomicU32);
fn _atomic_i64_ffi_safety(_: AtomicI64);
fn _atomic_u64_ffi_safety(_: AtomicU64);
// TODO: https://github.com/rust-lang/lang-team/issues/255
// fn _atomic_i128_ffi_safety(_: AtomicI128);
// fn _atomic_u128_ffi_safety(_: AtomicU128);
#[cfg(all(feature = "float", portable_atomic_unstable_f16))]
fn _atomic_f16_ffi_safety(_: AtomicF16);
#[cfg(feature = "float")]
fn _atomic_f32_ffi_safety(_: AtomicF32);
#[cfg(feature = "float")]
fn _atomic_f64_ffi_safety(_: AtomicF64);
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
fn _atomic_f128_ffi_safety(_: AtomicF128);
}
#[test]
fn test_is_lock_free() {
assert!(AtomicI8::is_always_lock_free());
assert!(AtomicI8::is_lock_free());
assert!(AtomicU8::is_always_lock_free());
assert!(AtomicU8::is_lock_free());
assert!(AtomicI16::is_always_lock_free());
assert!(AtomicI16::is_lock_free());
assert!(AtomicU16::is_always_lock_free());
assert!(AtomicU16::is_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f16))]
assert!(AtomicF16::is_always_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f16))]
assert!(AtomicF16::is_lock_free());
assert!(AtomicI32::is_always_lock_free());
assert!(AtomicI32::is_lock_free());
assert!(AtomicU32::is_always_lock_free());
assert!(AtomicU32::is_lock_free());
#[cfg(feature = "float")]
assert!(AtomicF32::is_always_lock_free());
#[cfg(feature = "float")]
assert!(AtomicF32::is_lock_free());
#[cfg(not(portable_atomic_no_cfg_target_has_atomic))]
{
if cfg!(any(
target_has_atomic = "64",
all(
target_arch = "riscv32",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
)) {
assert!(AtomicI64::is_always_lock_free());
assert!(AtomicI64::is_lock_free());
assert!(AtomicU64::is_always_lock_free());
assert!(AtomicU64::is_lock_free());
#[cfg(feature = "float")]
assert!(AtomicF64::is_always_lock_free());
#[cfg(feature = "float")]
assert!(AtomicF64::is_lock_free());
} else if cfg!(all(
feature = "fallback",
target_arch = "arm",
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
not(portable_atomic_no_outline_atomics),
not(target_has_atomic = "64"),
not(portable_atomic_test_detect_false),
)) {
assert!(!AtomicI64::is_always_lock_free());
assert!(!AtomicU64::is_always_lock_free());
#[cfg(feature = "float")]
assert!(!AtomicF64::is_always_lock_free());
assert!(AtomicI64::is_lock_free());
assert!(AtomicU64::is_lock_free());
#[cfg(feature = "float")]
assert!(AtomicF64::is_lock_free());
} else {
assert!(!AtomicI64::is_always_lock_free());
assert!(!AtomicU64::is_always_lock_free());
#[cfg(feature = "float")]
assert!(!AtomicF64::is_always_lock_free());
#[cfg(not(target_arch = "riscv32"))]
{
assert!(!AtomicI64::is_lock_free());
assert!(!AtomicU64::is_lock_free());
#[cfg(feature = "float")]
assert!(!AtomicF64::is_lock_free());
}
#[cfg(target_arch = "riscv32")]
{
// TODO(riscv): check detect.has_zacas
}
}
}
if cfg!(portable_atomic_no_asm) && cfg!(not(portable_atomic_unstable_asm)) {
assert!(!AtomicI128::is_always_lock_free());
assert!(!AtomicI128::is_lock_free());
assert!(!AtomicU128::is_always_lock_free());
assert!(!AtomicU128::is_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(!AtomicF128::is_always_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(!AtomicF128::is_lock_free());
} else if cfg!(any(
all(
target_arch = "aarch64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
),
all(
target_arch = "arm64ec",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
all(
target_arch = "x86_64",
any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
),
all(
target_arch = "riscv64",
any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
),
all(
target_arch = "powerpc64",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
portable_atomic_unstable_asm_experimental_arch,
any(
target_feature = "quadword-atomics",
portable_atomic_target_feature = "quadword-atomics",
),
),
all(
target_arch = "s390x",
not(all(
any(miri, portable_atomic_sanitize_thread),
not(portable_atomic_atomic_intrinsics),
)),
not(portable_atomic_no_asm),
),
)) {
assert!(AtomicI128::is_always_lock_free());
assert!(AtomicI128::is_lock_free());
assert!(AtomicU128::is_always_lock_free());
assert!(AtomicU128::is_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(AtomicF128::is_always_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(AtomicF128::is_lock_free());
} else {
assert!(!AtomicI128::is_always_lock_free());
assert!(!AtomicU128::is_always_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(!AtomicF128::is_always_lock_free());
#[cfg(not(any(
target_arch = "x86_64",
target_arch = "powerpc64",
target_arch = "riscv64",
)))]
{
assert!(!AtomicI128::is_lock_free());
assert!(!AtomicU128::is_lock_free());
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert!(!AtomicF128::is_lock_free());
}
#[cfg(target_arch = "x86_64")]
{
let has_cmpxchg16b = cfg!(all(
feature = "fallback",
not(portable_atomic_no_outline_atomics),
not(any(target_env = "sgx", miri)),
not(portable_atomic_test_detect_false),
)) && std::is_x86_feature_detected!("cmpxchg16b");
assert_eq!(AtomicI128::is_lock_free(), has_cmpxchg16b);
assert_eq!(AtomicU128::is_lock_free(), has_cmpxchg16b);
#[cfg(all(feature = "float", portable_atomic_unstable_f128))]
assert_eq!(AtomicF128::is_lock_free(), has_cmpxchg16b);
}
#[cfg(target_arch = "powerpc64")]
{
// TODO(powerpc64): is_powerpc_feature_detected is unstable
}
#[cfg(target_arch = "riscv64")]
{
// TODO(riscv): check detect.has_zacas
}
}
}
// test version parsing code used in the build script.
#[test]
fn test_rustc_version() {
use self::version::Version;
// rustc 1.34 (rustup)
let v = Version::parse(
"rustc 1.34.2 (6c2484dc3 2019-05-13)
binary: rustc
commit-hash: 6c2484dc3c532c052f159264e970278d8b77cdc9
commit-date: 2019-05-13
host: x86_64-apple-darwin
release: 1.34.2
LLVM version: 8.0",
)
.unwrap();
assert_eq!(v, Version::stable(34, 8));
// rustc 1.50 (rustup)
let v = Version::parse(
"rustc 1.50.0 (cb75ad5db 2021-02-10)
binary: rustc
commit-hash: cb75ad5db02783e8b0222fee363c5f63f7e2cf5b
commit-date: 2021-02-10
host: aarch64-unknown-linux-gnu
release: 1.50.0",
)
.unwrap();
assert_eq!(v, Version::stable(50, 0));
// rustc 1.67 (rustup)
let v = Version::parse(
"rustc 1.67.0 (fc594f156 2023-01-24)
binary: rustc
commit-hash: fc594f15669680fa70d255faec3ca3fb507c3405
commit-date: 2023-01-24
host: aarch64-apple-darwin
release: 1.67.0
LLVM version: 15.0.6",
)
.unwrap();
assert_eq!(v, Version::stable(67, 15));
// rustc 1.68-beta (rustup)
let v = Version::parse(
"rustc 1.68.0-beta.2 (10b73bf73 2023-02-01)
binary: rustc
commit-hash: 10b73bf73a6b770cd92ad8ff538173bc3298411c
commit-date: 2023-02-01
host: aarch64-apple-darwin
release: 1.68.0-beta.2
LLVM version: 15.0.6",
)
.unwrap();
// We do not distinguish between stable and beta because we are only
// interested in whether unstable features are potentially available.
assert_eq!(v, Version::stable(68, 15));
// rustc nightly-2019-01-27 (rustup)
let v = Version::parse(
"rustc 1.33.0-nightly (20c2cba61 2019-01-26)
binary: rustc
commit-hash: 20c2cba61dc83e612d25ed496025171caa3db30f
commit-date: 2019-01-26
host: x86_64-apple-darwin
release: 1.33.0-nightly
LLVM version: 8.0",
)
.unwrap();
assert_eq!(v.minor, 33);
assert!(v.nightly);
assert_eq!(v.llvm, 8);
assert_eq!(v.commit_date().year, 2019);
assert_eq!(v.commit_date().month, 1);
assert_eq!(v.commit_date().day, 26);
// rustc 1.69-nightly (rustup)
let v = Version::parse(
"rustc 1.69.0-nightly (bd39bbb4b 2023-02-07)
binary: rustc
commit-hash: bd39bbb4bb92df439bf6d85470e296cc6a47ffbd
commit-date: 2023-02-07
host: aarch64-apple-darwin
release: 1.69.0-nightly
LLVM version: 15.0.7",
)
.unwrap();
assert_eq!(v.minor, 69);
assert!(v.nightly);
assert_eq!(v.llvm, 15);
assert_eq!(v.commit_date().year, 2023);
assert_eq!(v.commit_date().month, 2);
assert_eq!(v.commit_date().day, 7);
// clippy-driver 1.69-nightly (rustup)
let v = Version::parse(
"rustc 1.69.0-nightly (bd39bbb4b 2023-02-07)
binary: rustc
commit-hash: bd39bbb4bb92df439bf6d85470e296cc6a47ffbd
commit-date: 2023-02-07
host: aarch64-apple-darwin
release: 1.69.0-nightly
LLVM version: 15.0.7",
)
.unwrap();
assert_eq!(v.minor, 69);
assert!(v.nightly);
assert_eq!(v.llvm, 15);
assert_eq!(v.commit_date().year, 2023);
assert_eq!(v.commit_date().month, 2);
assert_eq!(v.commit_date().day, 7);
// rustc 1.69-dev (from source: ./x.py build)
let v = Version::parse(
"rustc 1.69.0-dev
binary: rustc
commit-hash: unknown
commit-date: unknown
host: aarch64-unknown-linux-gnu
release: 1.69.0-dev
LLVM version: 16.0.0",
)
.unwrap();
assert_eq!(v.minor, 69);
assert!(v.nightly);
assert_eq!(v.llvm, 16);
assert_eq!(v.commit_date().year, 0);
assert_eq!(v.commit_date().month, 0);
assert_eq!(v.commit_date().day, 0);
// rustc 1.48 (debian 11: apt-get install cargo)
let v = Version::parse(
"rustc 1.48.0
binary: rustc
commit-hash: unknown
commit-date: unknown
host: aarch64-unknown-linux-gnu
release: 1.48.0
LLVM version: 11.0",
)
.unwrap();
assert_eq!(v, Version::stable(48, 11));
// rustc 1.67 (fedora: dnf install cargo)
let v = Version::parse(
"rustc 1.67.0 (fc594f156 2023-01-24) (Fedora 1.67.0-2.fc37)
binary: rustc
commit-hash: fc594f15669680fa70d255faec3ca3fb507c3405
commit-date: 2023-01-24
host: aarch64-unknown-linux-gnu
release: 1.67.0
LLVM version: 15.0.7",
)
.unwrap();
assert_eq!(v, Version::stable(67, 15));
// rustc 1.64 (alpine: apk add cargo)
let v = Version::parse(
"rustc 1.64.0
binary: rustc
commit-hash: unknown
commit-date: unknown
host: aarch64-alpine-linux-musl
release: 1.64.0
LLVM version: 15.0.3",
)
.unwrap();
assert_eq!(v, Version::stable(64, 15));
}
#[cfg(feature = "serde")]
#[test]
fn test_serde() {
use std::fmt;
use serde::{
de::{Deserialize, Deserializer},
ser::{Serialize, Serializer},
};
use serde_test::{Token, assert_tokens};
#[derive(Debug)]
struct DebugPartialEq<T>(T);
impl<T: fmt::Debug> PartialEq for DebugPartialEq<T> {
fn eq(&self, other: &Self) -> bool {
std::format!("{:?}", self) == std::format!("{:?}", other)
}
}
impl<T: Serialize> Serialize for DebugPartialEq<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
self.0.serialize(serializer)
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for DebugPartialEq<T> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
T::deserialize(deserializer).map(Self)
}
}
macro_rules! t {
($atomic_type:ty, $value_type:ident $(as $token_value_type:ident)?, $token_type:ident) => {
std::eprint!("test_serde {} ... ", stringify!($value_type));
assert_tokens(&DebugPartialEq(<$atomic_type>::new($value_type::MAX)), &[
Token::$token_type($value_type::MAX $(as $token_value_type)?),
]);
assert_tokens(&DebugPartialEq(<$atomic_type>::new($value_type::MIN)), &[
Token::$token_type($value_type::MIN $(as $token_value_type)?),
]);
std::eprintln!("ok");
};
}
assert_tokens(&DebugPartialEq(AtomicBool::new(true)), &[Token::Bool(true)]);
assert_tokens(&DebugPartialEq(AtomicBool::new(false)), &[Token::Bool(false)]);
t!(AtomicIsize, isize as i64, I64);
t!(AtomicUsize, usize as u64, U64);
t!(AtomicI8, i8, I8);
t!(AtomicU8, u8, U8);
t!(AtomicI16, i16, I16);
t!(AtomicU16, u16, U16);
t!(AtomicI32, i32, I32);
t!(AtomicU32, u32, U32);
t!(AtomicI64, i64, I64);
t!(AtomicU64, u64, U64);
t!(AtomicI128, i128, I128);
t!(AtomicU128, u128, U128);
// TODO(f16_and_f128): Test f16 & f128 once stabilized.
#[cfg(feature = "float")]
t!(AtomicF32, f32, F32);
#[cfg(feature = "float")]
// TODO: fixed in LLVM 18?
#[cfg(not(target_arch = "mips"))] // LLVM 17 (nightly-2023-08-09) bug: assertion failed at core/src/num/diy_float.rs:78:9
t!(AtomicF64, f64, F64);
}

984
vendor/portable-atomic/src/utils.rs vendored Normal file
View File

@@ -0,0 +1,984 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
#![cfg_attr(not(all(test, feature = "float")), allow(dead_code, unused_macros))]
#[macro_use]
#[path = "gen/utils.rs"]
mod generated;
use core::sync::atomic::Ordering;
macro_rules! static_assert {
($cond:expr $(,)?) => {{
let [] = [(); true as usize - $crate::utils::_assert_is_bool($cond) as usize];
}};
}
pub(crate) const fn _assert_is_bool(v: bool) -> bool {
v
}
macro_rules! static_assert_layout {
($atomic_type:ty, $value_type:ty) => {
static_assert!(
core::mem::align_of::<$atomic_type>() == core::mem::size_of::<$atomic_type>()
);
static_assert!(core::mem::size_of::<$atomic_type>() == core::mem::size_of::<$value_type>());
};
}
// #[doc = concat!(...)] requires Rust 1.54
macro_rules! doc_comment {
($doc:expr, $($tt:tt)*) => {
#[doc = $doc]
$($tt)*
};
}
// Adapted from https://github.com/BurntSushi/memchr/blob/2.4.1/src/memchr/x86/mod.rs#L9-L71.
/// # Safety
///
/// - the caller must uphold the safety contract for the function returned by $detect_body.
/// - the memory pointed by the function pointer returned by $detect_body must be visible from any threads.
///
/// The second requirement is always met if the function pointer is to the function definition.
/// (Currently, all uses of this macro in our code are in this case.)
#[allow(unused_macros)]
#[cfg(not(portable_atomic_no_outline_atomics))]
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
all(target_arch = "x86_64", not(any(target_env = "sgx", miri))),
))]
macro_rules! ifunc {
(unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)? { $($detect_body:tt)* }) => {{
type FnTy = unsafe fn($($arg_ty),*) $(-> $ret_ty)?;
static FUNC: core::sync::atomic::AtomicPtr<()>
= core::sync::atomic::AtomicPtr::new(detect as *mut ());
#[cold]
unsafe fn detect($($arg_pat: $arg_ty),*) $(-> $ret_ty)? {
let func: FnTy = { $($detect_body)* };
FUNC.store(func as *mut (), core::sync::atomic::Ordering::Relaxed);
// SAFETY: the caller must uphold the safety contract for the function returned by $detect_body.
unsafe { func($($arg_pat),*) }
}
// SAFETY: `FnTy` is a function pointer, which is always safe to transmute with a `*mut ()`.
// (To force the caller to use unsafe block for this macro, do not use
// unsafe block here.)
let func = {
core::mem::transmute::<*mut (), FnTy>(FUNC.load(core::sync::atomic::Ordering::Relaxed))
};
// SAFETY: the caller must uphold the safety contract for the function returned by $detect_body.
// (To force the caller to use unsafe block for this macro, do not use
// unsafe block here.)
func($($arg_pat),*)
}};
}
#[allow(unused_macros)]
#[cfg(not(portable_atomic_no_outline_atomics))]
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
all(target_arch = "x86_64", not(any(target_env = "sgx", miri))),
))]
macro_rules! fn_alias {
(
$(#[$($fn_attr:tt)*])*
$vis:vis unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)?;
$(#[$($alias_attr:tt)*])*
$new:ident = $from:ident($($last_args:tt)*);
$($rest:tt)*
) => {
$(#[$($fn_attr)*])*
$(#[$($alias_attr)*])*
$vis unsafe fn $new($($arg_pat: $arg_ty),*) $(-> $ret_ty)? {
// SAFETY: the caller must uphold the safety contract.
unsafe { $from($($arg_pat,)* $($last_args)*) }
}
fn_alias! {
$(#[$($fn_attr)*])*
$vis unsafe fn($($arg_pat: $arg_ty),*) $(-> $ret_ty)?;
$($rest)*
}
};
(
$(#[$($attr:tt)*])*
$vis:vis unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)?;
) => {}
}
/// Make the given function const if the given condition is true.
macro_rules! const_fn {
(
const_if: #[cfg($($cfg:tt)+)];
$(#[$($attr:tt)*])*
$vis:vis const $($rest:tt)*
) => {
#[cfg($($cfg)+)]
$(#[$($attr)*])*
$vis const $($rest)*
#[cfg(not($($cfg)+))]
$(#[$($attr)*])*
$vis $($rest)*
};
}
/// Implements `core::fmt::Debug` and `serde::{Serialize, Deserialize}` (when serde
/// feature is enabled) for atomic bool, integer, or float.
macro_rules! impl_debug_and_serde {
// TODO(f16_and_f128): Implement serde traits for f16 & f128 once stabilized.
(AtomicF16) => {
impl_debug!(AtomicF16);
};
(AtomicF128) => {
impl_debug!(AtomicF128);
};
($atomic_type:ident) => {
impl_debug!($atomic_type);
#[cfg(feature = "serde")]
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
impl serde::ser::Serialize for $atomic_type {
#[allow(clippy::missing_inline_in_public_items)] // serde doesn't use inline on std atomic's Serialize/Deserialize impl
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
{
// https://github.com/serde-rs/serde/blob/v1.0.152/serde/src/ser/impls.rs#L958-L959
self.load(Ordering::Relaxed).serialize(serializer)
}
}
#[cfg(feature = "serde")]
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
impl<'de> serde::de::Deserialize<'de> for $atomic_type {
#[allow(clippy::missing_inline_in_public_items)] // serde doesn't use inline on std atomic's Serialize/Deserialize impl
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
serde::de::Deserialize::deserialize(deserializer).map(Self::new)
}
}
};
}
macro_rules! impl_debug {
($atomic_type:ident) => {
impl fmt::Debug for $atomic_type {
#[inline] // fmt is not hot path, but #[inline] on fmt seems to still be useful: https://github.com/rust-lang/rust/pull/117727
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// std atomic types use Relaxed in Debug::fmt: https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L2188
fmt::Debug::fmt(&self.load(Ordering::Relaxed), f)
}
}
};
}
// We do not provide `nand` because it cannot be optimized on neither x86 nor MSP430.
// https://godbolt.org/z/ahWejchbT
macro_rules! impl_default_no_fetch_ops {
($atomic_type:ident, bool) => {
impl $atomic_type {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn and(&self, val: bool, order: Ordering) {
self.fetch_and(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn or(&self, val: bool, order: Ordering) {
self.fetch_or(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn xor(&self, val: bool, order: Ordering) {
self.fetch_xor(val, order);
}
}
};
($atomic_type:ident, $int_type:ty) => {
impl $atomic_type {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn add(&self, val: $int_type, order: Ordering) {
self.fetch_add(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
self.fetch_sub(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn and(&self, val: $int_type, order: Ordering) {
self.fetch_and(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn or(&self, val: $int_type, order: Ordering) {
self.fetch_or(val, order);
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
self.fetch_xor(val, order);
}
}
};
}
macro_rules! impl_default_bit_opts {
($atomic_type:ident, $int_type:ty) => {
impl $atomic_type {
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn bit_set(&self, bit: u32, order: Ordering) -> bool {
let mask = <$int_type>::wrapping_shl(1, bit);
self.fetch_or(mask, order) & mask != 0
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn bit_clear(&self, bit: u32, order: Ordering) -> bool {
let mask = <$int_type>::wrapping_shl(1, bit);
self.fetch_and(!mask, order) & mask != 0
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn bit_toggle(&self, bit: u32, order: Ordering) -> bool {
let mask = <$int_type>::wrapping_shl(1, bit);
self.fetch_xor(mask, order) & mask != 0
}
}
};
}
// This just outputs the input as is, but can be used like an item-level block by using it with cfg.
macro_rules! items {
($($tt:tt)*) => {
$($tt)*
};
}
#[allow(dead_code)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
// Stable version of https://doc.rust-lang.org/nightly/std/hint/fn.assert_unchecked.html.
// TODO: use real core::hint::assert_unchecked on 1.81+ https://github.com/rust-lang/rust/pull/123588
#[inline(always)]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) unsafe fn assert_unchecked(cond: bool) {
if !cond {
if cfg!(debug_assertions) {
unreachable!()
} else {
// SAFETY: the caller promised `cond` is true.
unsafe { core::hint::unreachable_unchecked() }
}
}
}
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L3338
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn assert_load_ordering(order: Ordering) {
match order {
Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {}
Ordering::Release => panic!("there is no such thing as a release load"),
Ordering::AcqRel => panic!("there is no such thing as an acquire-release load"),
_ => unreachable!(),
}
}
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L3323
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn assert_store_ordering(order: Ordering) {
match order {
Ordering::Release | Ordering::Relaxed | Ordering::SeqCst => {}
Ordering::Acquire => panic!("there is no such thing as an acquire store"),
Ordering::AcqRel => panic!("there is no such thing as an acquire-release store"),
_ => unreachable!(),
}
}
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/sync/atomic.rs#L3404
#[inline]
#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
pub(crate) fn assert_compare_exchange_ordering(success: Ordering, failure: Ordering) {
match success {
Ordering::AcqRel
| Ordering::Acquire
| Ordering::Relaxed
| Ordering::Release
| Ordering::SeqCst => {}
_ => unreachable!(),
}
match failure {
Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {}
Ordering::Release => panic!("there is no such thing as a release failure ordering"),
Ordering::AcqRel => panic!("there is no such thing as an acquire-release failure ordering"),
_ => unreachable!(),
}
}
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0418r2.html
// https://github.com/rust-lang/rust/pull/98383
#[allow(dead_code)]
#[inline]
pub(crate) fn upgrade_success_ordering(success: Ordering, failure: Ordering) -> Ordering {
match (success, failure) {
(Ordering::Relaxed, Ordering::Acquire) => Ordering::Acquire,
(Ordering::Release, Ordering::Acquire) => Ordering::AcqRel,
(_, Ordering::SeqCst) => Ordering::SeqCst,
_ => success,
}
}
/// Zero-extends the given 32-bit pointer to `MaybeUninit<u64>`.
/// This is used for 64-bit architecture's 32-bit ABI (e.g., AArch64 ILP32 ABI).
/// See ptr_reg! macro in src/gen/utils.rs for details.
#[cfg(not(portable_atomic_no_asm_maybe_uninit))]
#[cfg(target_pointer_width = "32")]
#[allow(dead_code)]
#[inline]
pub(crate) fn zero_extend64_ptr(v: *mut ()) -> core::mem::MaybeUninit<u64> {
#[repr(C)]
struct ZeroExtended {
#[cfg(target_endian = "big")]
pad: *mut (),
v: *mut (),
#[cfg(target_endian = "little")]
pad: *mut (),
}
// SAFETY: we can safely transmute any 64-bit value to MaybeUninit<u64>.
unsafe { core::mem::transmute(ZeroExtended { v, pad: core::ptr::null_mut() }) }
}
#[allow(dead_code)]
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "powerpc64",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "x86_64",
))]
/// A 128-bit value represented as a pair of 64-bit values.
///
/// This type is `#[repr(C)]`, both fields have the same in-memory representation
/// and are plain old data types, so access to the fields is always safe.
#[derive(Clone, Copy)]
#[repr(C)]
pub(crate) union U128 {
pub(crate) whole: u128,
pub(crate) pair: Pair<u64>,
}
#[allow(dead_code)]
#[cfg(any(target_arch = "arm", target_arch = "riscv32"))]
/// A 64-bit value represented as a pair of 32-bit values.
///
/// This type is `#[repr(C)]`, both fields have the same in-memory representation
/// and are plain old data types, so access to the fields is always safe.
#[derive(Clone, Copy)]
#[repr(C)]
pub(crate) union U64 {
pub(crate) whole: u64,
pub(crate) pair: Pair<u32>,
}
#[allow(dead_code)]
#[derive(Clone, Copy)]
#[repr(C)]
pub(crate) struct Pair<T: Copy> {
// little endian order
#[cfg(any(
target_endian = "little",
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
))]
pub(crate) lo: T,
pub(crate) hi: T,
// big endian order
#[cfg(not(any(
target_endian = "little",
target_arch = "aarch64",
target_arch = "arm",
target_arch = "arm64ec",
)))]
pub(crate) lo: T,
}
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
type MinWord = u32;
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
type RetInt = u32;
// Adapted from https://github.com/taiki-e/atomic-maybe-uninit/blob/v0.3.6/src/utils.rs#L255.
// Helper for implementing sub-word atomic operations using word-sized LL/SC loop or CAS loop.
//
// Refs: https://github.com/llvm/llvm-project/blob/llvmorg-20.1.0/llvm/lib/CodeGen/AtomicExpandPass.cpp#L799
// (aligned_ptr, shift, mask)
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
#[allow(dead_code)]
#[inline]
pub(crate) fn create_sub_word_mask_values<T>(ptr: *mut T) -> (*mut MinWord, RetInt, RetInt) {
#[cfg(portable_atomic_no_strict_provenance)]
use self::ptr::PtrExt as _;
use core::mem;
// RISC-V, MIPS, SPARC, LoongArch, Xtensa, BPF: shift amount of 32-bit shift instructions is 5 bits unsigned (0-31).
// PowerPC, C-SKY: shift amount of 32-bit shift instructions is 6 bits unsigned (0-63) and shift amount 32-63 means "clear".
// Arm: shift amount of 32-bit shift instructions is 8 bits unsigned (0-255).
// Hexagon: shift amount of 32-bit shift instructions is 7 bits signed (-64-63) and negative shift amount means "reverse the direction of the shift".
// (On s390x, we don't use the mask returned from this function.)
// (See also https://devblogs.microsoft.com/oldnewthing/20230904-00/?p=108704 for others)
const SHIFT_MASK: bool = !cfg!(any(
target_arch = "bpf",
target_arch = "loongarch32",
target_arch = "loongarch64",
target_arch = "mips",
target_arch = "mips32r6",
target_arch = "mips64",
target_arch = "mips64r6",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "sparc",
target_arch = "sparc64",
target_arch = "xtensa",
));
let ptr_mask = mem::size_of::<MinWord>() - 1;
let aligned_ptr = ptr.with_addr(ptr.addr() & !ptr_mask) as *mut MinWord;
let ptr_lsb = if SHIFT_MASK {
ptr.addr() & ptr_mask
} else {
// We use 32-bit wrapping shift instructions in asm on these platforms.
ptr.addr()
};
let shift = if cfg!(any(target_endian = "little", target_arch = "s390x")) {
ptr_lsb.wrapping_mul(8)
} else {
(ptr_lsb ^ (mem::size_of::<MinWord>() - mem::size_of::<T>())).wrapping_mul(8)
};
let mut mask: RetInt = (1 << (mem::size_of::<T>() * 8)) - 1; // !(0 as T) as RetInt
if SHIFT_MASK {
mask <<= shift;
}
#[allow(clippy::cast_possible_truncation)]
{
(aligned_ptr, shift as RetInt, mask)
}
}
// This module provides core::ptr strict_provenance/exposed_provenance polyfill for pre-1.84 rustc.
#[allow(dead_code)]
pub(crate) mod ptr {
#[cfg(portable_atomic_no_strict_provenance)]
use core::mem;
#[cfg(not(portable_atomic_no_strict_provenance))]
#[allow(unused_imports)]
pub(crate) use core::ptr::{with_exposed_provenance, with_exposed_provenance_mut};
#[cfg(portable_atomic_no_strict_provenance)]
#[inline(always)]
#[must_use]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn with_exposed_provenance<T>(addr: usize) -> *const T {
addr as *const T
}
#[cfg(portable_atomic_no_strict_provenance)]
#[inline(always)]
#[must_use]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub(crate) fn with_exposed_provenance_mut<T>(addr: usize) -> *mut T {
addr as *mut T
}
#[cfg(portable_atomic_no_strict_provenance)]
pub(crate) trait PtrExt<T: ?Sized>: Copy {
#[must_use]
fn addr(self) -> usize;
#[must_use]
fn with_addr(self, addr: usize) -> Self
where
T: Sized;
}
#[cfg(portable_atomic_no_strict_provenance)]
impl<T: ?Sized> PtrExt<T> for *mut T {
#[inline(always)]
#[must_use]
fn addr(self) -> usize {
// A pointer-to-integer transmute currently has exactly the right semantics: it returns the
// address without exposing the provenance. Note that this is *not* a stable guarantee about
// transmute semantics, it relies on sysroot crates having special status.
// SAFETY: Pointer-to-integer transmutes are valid (if you are okay with losing the
// provenance).
#[allow(clippy::transmutes_expressible_as_ptr_casts)]
unsafe {
mem::transmute(self as *mut ())
}
}
#[allow(clippy::cast_possible_wrap)]
#[inline]
#[must_use]
fn with_addr(self, addr: usize) -> Self
where
T: Sized,
{
// This should probably be an intrinsic to avoid doing any sort of arithmetic, but
// meanwhile, we can implement it with `wrapping_offset`, which preserves the pointer's
// provenance.
let self_addr = self.addr() as isize;
let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr);
(self as *mut u8).wrapping_offset(offset) as *mut T
}
}
}
// This module provides:
// - core::ffi polyfill (c_* type aliases and CStr) for pre-1.64 rustc compatibility.
// (core::ffi::* (except c_void) requires Rust 1.64)
// - safe abstraction (c! macro) for creating static C strings without runtime checks.
// (c"..." requires Rust 1.77)
// - helper macros for defining FFI bindings.
#[cfg(any(
test,
portable_atomic_test_no_std_static_assert_ffi,
not(any(target_arch = "x86", target_arch = "x86_64"))
))]
#[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))]
#[allow(dead_code, non_camel_case_types, unused_macros)]
#[macro_use]
pub(crate) mod ffi {
pub(crate) type c_void = core::ffi::c_void;
// c_{,u}int is {i,u}16 on 16-bit targets, otherwise {i,u}32.
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/ffi/mod.rs#L156
#[cfg(target_pointer_width = "16")]
pub(crate) type c_int = i16;
#[cfg(target_pointer_width = "16")]
pub(crate) type c_uint = u16;
#[cfg(not(target_pointer_width = "16"))]
pub(crate) type c_int = i32;
#[cfg(not(target_pointer_width = "16"))]
pub(crate) type c_uint = u32;
// c_{,u}long is {i,u}64 on non-Windows 64-bit targets, otherwise {i,u}32.
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/ffi/mod.rs#L168
#[cfg(all(target_pointer_width = "64", not(windows)))]
pub(crate) type c_long = i64;
#[cfg(all(target_pointer_width = "64", not(windows)))]
pub(crate) type c_ulong = u64;
#[cfg(not(all(target_pointer_width = "64", not(windows))))]
pub(crate) type c_long = i32;
#[cfg(not(all(target_pointer_width = "64", not(windows))))]
pub(crate) type c_ulong = u32;
// c_size_t is currently always usize.
// https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/ffi/mod.rs#L76
pub(crate) type c_size_t = usize;
// c_char is u8 by default on non-Apple/non-Windows/non-Vita Arm/C-SKY/Hexagon/MSP430/PowerPC/RISC-V/s390x/Xtensa targets, otherwise i8 by default.
// See references in https://github.com/rust-lang/rust/issues/129945 for details.
#[cfg(all(
not(any(target_vendor = "apple", windows, target_os = "vita")),
any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "csky",
target_arch = "hexagon",
target_arch = "msp430",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "xtensa",
),
))]
pub(crate) type c_char = u8;
#[cfg(not(all(
not(any(target_vendor = "apple", windows, target_os = "vita")),
any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "csky",
target_arch = "hexagon",
target_arch = "msp430",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "s390x",
target_arch = "xtensa",
),
)))]
pub(crate) type c_char = i8;
// Static assertions for C type definitions.
#[cfg(test)]
const _: fn() = || {
let _: c_int = 0 as std::os::raw::c_int;
let _: c_uint = 0 as std::os::raw::c_uint;
let _: c_long = 0 as std::os::raw::c_long;
let _: c_ulong = 0 as std::os::raw::c_ulong;
#[cfg(unix)]
let _: c_size_t = 0 as libc::size_t; // std::os::raw::c_size_t is unstable
let _: c_char = 0 as std::os::raw::c_char;
};
#[repr(transparent)]
pub(crate) struct CStr([c_char]);
impl CStr {
#[inline]
#[must_use]
pub(crate) const fn as_ptr(&self) -> *const c_char {
self.0.as_ptr()
}
/// # Safety
///
/// The provided slice **must** be nul-terminated and not contain any interior
/// nul bytes.
#[inline]
#[must_use]
pub(crate) unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr {
// SAFETY: Casting to CStr is safe because *our* CStr is #[repr(transparent)]
// and its internal representation is a [u8] too. (Note that std's CStr
// is not #[repr(transparent)].)
// Dereferencing the obtained pointer is safe because it comes from a
// reference. Making a reference is then safe because its lifetime
// is bound by the lifetime of the given `bytes`.
unsafe { &*(bytes as *const [u8] as *const CStr) }
}
#[cfg(test)]
#[inline]
#[must_use]
pub(crate) fn to_bytes_with_nul(&self) -> &[u8] {
// SAFETY: Transmuting a slice of `c_char`s to a slice of `u8`s
// is safe on all supported targets.
#[allow(clippy::unnecessary_cast)] // triggered for targets that c_char is u8
unsafe {
&*(&self.0 as *const [c_char] as *const [u8])
}
}
}
macro_rules! c {
($s:expr) => {{
const BYTES: &[u8] = concat!($s, "\0").as_bytes();
const _: () = static_assert!(crate::utils::ffi::_const_is_c_str(BYTES));
#[allow(unused_unsafe)]
// SAFETY: we've checked `BYTES` is a valid C string
unsafe {
crate::utils::ffi::CStr::from_bytes_with_nul_unchecked(BYTES)
}
}};
}
#[must_use]
pub(crate) const fn _const_is_c_str(bytes: &[u8]) -> bool {
#[cfg(portable_atomic_no_track_caller)]
{
// const_if_match/const_loop was stabilized (nightly-2020-06-30) 2 days before
// track_caller was stabilized (nightly-2020-07-02), so we reuse the cfg for
// track_caller here instead of emitting a cfg for const_if_match/const_loop.
// https://github.com/rust-lang/rust/pull/72437
// track_caller was stabilized 11 days after the oldest nightly version
// that uses this module, and is included in the same 1.46 stable release.
// The check here is insufficient in this case, but this is fine because this function
// is internal code that is not used to process input from the user and our CI checks
// all builtin targets and some custom targets with some versions of newer compilers.
!bytes.is_empty()
}
#[cfg(not(portable_atomic_no_track_caller))]
{
// Based on https://github.com/rust-lang/rust/blob/1.84.0/library/core/src/ffi/c_str.rs#L417
// - bytes must be nul-terminated.
// - bytes must not contain any interior nul bytes.
if bytes.is_empty() {
return false;
}
let mut i = bytes.len() - 1;
if bytes[i] != 0 {
return false;
}
// Ending null byte exists, skip to the rest.
while i != 0 {
i -= 1;
if bytes[i] == 0 {
return false;
}
}
true
}
}
/// Defines types with #[cfg(test)] static assertions which checks
/// types are the same as the platform's latest header files' ones.
// Note: This macro is sys_ty!({ }), not sys_ty! { }.
// An extra brace is used in input to make contents rustfmt-able.
macro_rules! sys_type {
({$(
$(#[$attr:meta])*
$vis:vis type $([$($windows_path:ident)::+])? $name:ident = $ty:ty;
)*}) => {
$(
$(#[$attr])*
$vis type $name = $ty;
)*
// Static assertions for FFI bindings.
// This checks that FFI bindings defined in this crate and FFI bindings generated for
// the platform's latest header file using bindgen have the same types.
// Since this is static assertion, we can detect problems with
// `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
// without actually running tests on these platforms.
// See also https://github.com/taiki-e/test-helper/blob/HEAD/tools/codegen/src/ffi.rs.
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
#[allow(
unused_imports,
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
const _: fn() = || {
#[cfg(not(any(target_os = "aix", windows)))]
use test_helper::sys;
#[cfg(target_os = "aix")]
use libc as sys;
$(
$(#[$attr])*
{
$(use windows_sys::$($windows_path)::+ as sys;)?
let _: $name = 0 as sys::$name;
}
)*
};
};
}
/// Defines #[repr(C)] structs with #[cfg(test)] static assertions which checks
/// fields are the same as the platform's latest header files' ones.
// Note: This macro is sys_struct!({ }), not sys_struct! { }.
// An extra brace is used in input to make contents rustfmt-able.
macro_rules! sys_struct {
({$(
$(#[$attr:meta])*
$vis:vis struct $([$($windows_path:ident)::+])? $name:ident {$(
$(#[$field_attr:meta])*
$field_vis:vis $field_name:ident: $field_ty:ty,
)*}
)*}) => {
$(
$(#[$attr])*
#[derive(Clone, Copy)]
#[cfg_attr(
any(test, portable_atomic_test_no_std_static_assert_ffi),
derive(Debug, PartialEq)
)]
#[repr(C)]
$vis struct $name {$(
$(#[$field_attr])*
$field_vis $field_name: $field_ty,
)*}
)*
// Static assertions for FFI bindings.
// This checks that FFI bindings defined in this crate and FFI bindings generated for
// the platform's latest header file using bindgen have the same fields.
// Since this is static assertion, we can detect problems with
// `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
// without actually running tests on these platforms.
// See also https://github.com/taiki-e/test-helper/blob/HEAD/tools/codegen/src/ffi.rs.
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
#[allow(unused_imports, clippy::undocumented_unsafe_blocks)]
const _: fn() = || {
#[cfg(not(any(target_os = "aix", windows)))]
use test_helper::sys;
#[cfg(target_os = "aix")]
use libc as sys;
$(
$(#[$attr])*
{
$(use windows_sys::$($windows_path)::+ as sys;)?
static_assert!(
core::mem::size_of::<$name>()
== core::mem::size_of::<sys::$name>()
);
let s: $name = unsafe { core::mem::zeroed() };
// field names and types
let _ = sys::$name {$(
$(#[$field_attr])*
$field_name: s.$field_name,
)*};
// field offsets
#[cfg(not(portable_atomic_no_offset_of))]
{$(
$(#[$field_attr])*
static_assert!(
core::mem::offset_of!($name, $field_name) ==
core::mem::offset_of!(sys::$name, $field_name),
);
)*}
}
)*
};
};
}
/// Defines constants with #[cfg(test)] static assertions which checks
/// values are the same as the platform's latest header files' ones.
// Note: This macro is sys_const!({ }), not sys_const! { }.
// An extra brace is used in input to make contents rustfmt-able.
macro_rules! sys_const {
({$(
$(#[$attr:meta])*
$vis:vis const $([$($windows_path:ident)::+])? $name:ident: $ty:ty = $val:expr;
)*}) => {
$(
$(#[$attr])*
$vis const $name: $ty = $val;
)*
// Static assertions for FFI bindings.
// This checks that FFI bindings defined in this crate and FFI bindings generated for
// the platform's latest header file using bindgen have the same values.
// Since this is static assertion, we can detect problems with
// `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
// without actually running tests on these platforms.
// See also https://github.com/taiki-e/test-helper/blob/HEAD/tools/codegen/src/ffi.rs.
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
#[allow(
unused_attributes, // for #[allow(..)] in $(#[$attr])*
unused_imports,
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_possible_truncation,
)]
const _: fn() = || {
#[cfg(not(any(target_os = "aix", windows)))]
use test_helper::sys;
#[cfg(target_os = "aix")]
use libc as sys;
$(
$(#[$attr])*
{
$(use windows_sys::$($windows_path)::+ as sys;)?
sys_const_cmp!($name, $ty);
}
)*
};
};
}
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
macro_rules! sys_const_cmp {
(RTLD_DEFAULT, $ty:ty) => {
// ptr comparison and ptr-to-int cast are not stable on const context, so use ptr-to-int
// transmute and compare its result.
static_assert!(
// SAFETY: Pointer-to-integer transmutes are valid (since we are okay with losing the
// provenance here). (Same as <pointer>::addr().)
unsafe {
core::mem::transmute::<$ty, usize>(RTLD_DEFAULT)
== core::mem::transmute::<$ty, usize>(sys::RTLD_DEFAULT)
}
);
};
($name:ident, $ty:ty) => {
static_assert!($name == sys::$name as $ty);
};
}
/// Defines functions with #[cfg(test)] static assertions which checks
/// signatures are the same as the platform's latest header files' ones.
// Note: This macro is sys_fn!({ }), not sys_fn! { }.
// An extra brace is used in input to make contents rustfmt-able.
macro_rules! sys_fn {
({
$(#[$extern_attr:meta])*
extern $abi:literal {$(
$(#[$fn_attr:meta])*
$vis:vis fn $([$($windows_path:ident)::+])? $name:ident(
$($args:tt)*
) $(-> $ret_ty:ty)?;
)*}
}) => {
$(#[$extern_attr])*
extern $abi {$(
$(#[$fn_attr])*
$vis fn $name($($args)*) $(-> $ret_ty)?;
)*}
// Static assertions for FFI bindings.
// This checks that FFI bindings defined in this crate and FFI bindings generated for
// the platform's latest header file using bindgen have the same signatures.
// Since this is static assertion, we can detect problems with
// `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
// without actually running tests on these platforms.
// See also https://github.com/taiki-e/test-helper/blob/HEAD/tools/codegen/src/ffi.rs.
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
#[allow(unused_imports)]
const _: fn() = || {
#[cfg(not(any(target_os = "aix", windows)))]
use test_helper::sys;
#[cfg(target_os = "aix")]
use libc as sys;
$(
$(#[$fn_attr])*
{
$(use windows_sys::$($windows_path)::+ as sys;)?
sys_fn_cmp!($abi fn $name($($args)*) $(-> $ret_ty)?);
}
)*
};
};
}
#[cfg(any(test, portable_atomic_test_no_std_static_assert_ffi))]
macro_rules! sys_fn_cmp {
(
$abi:literal fn $name:ident($($_arg_pat:ident: $arg_ty:ty),*, ...) $(-> $ret_ty:ty)?
) => {
let mut _f: unsafe extern $abi fn($($arg_ty),*, ...) $(-> $ret_ty)? = $name;
_f = sys::$name;
};
(
$abi:literal fn $name:ident($($_arg_pat:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)?
) => {
let mut _f: unsafe extern $abi fn($($arg_ty),*) $(-> $ret_ty)? = $name;
_f = sys::$name;
};
}
#[allow(
clippy::alloc_instead_of_core,
clippy::std_instead_of_alloc,
clippy::std_instead_of_core,
clippy::undocumented_unsafe_blocks,
clippy::wildcard_imports
)]
#[cfg(test)]
mod tests {
#[test]
fn test_c_macro() {
#[track_caller]
fn t(s: &crate::utils::ffi::CStr, raw: &[u8]) {
assert_eq!(s.to_bytes_with_nul(), raw);
}
t(c!(""), b"\0");
t(c!("a"), b"a\0");
t(c!("abc"), b"abc\0");
t(c!(concat!("abc", "d")), b"abcd\0");
}
#[test]
fn test_is_c_str() {
#[track_caller]
fn t(bytes: &[u8]) {
assert_eq!(
super::_const_is_c_str(bytes),
std::ffi::CStr::from_bytes_with_nul(bytes).is_ok()
);
}
t(b"\0");
t(b"a\0");
t(b"abc\0");
t(b"");
t(b"a");
t(b"abc");
t(b"\0a");
t(b"\0a\0");
t(b"ab\0c\0");
t(b"\0\0");
}
}
}