Skip to content

Commit

Permalink
Optimize non-relaxed load/store on pre-v6 ARM Linux/Android
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Aug 1, 2023
1 parent 0fa669d commit 5b9bc15
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 29 deletions.
4 changes: 4 additions & 0 deletions bench/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
#[allow(dead_code, unused_macros)]
#[path = "../../src/utils.rs"]
mod utils;

#[allow(dead_code, unused_macros)]
#[macro_use]
#[path = "../../src/tests"]
Expand Down Expand Up @@ -43,6 +44,9 @@ mod arch;
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/arm_linux.rs"]
mod arch;
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/mod.rs"]
mod imp;
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/atomic128/intrinsics.rs"]
Expand Down
6 changes: 2 additions & 4 deletions bench/benches/imp/spinlock_fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
//
// This module is currently only enabled on benchmark.

use core::{
cell::UnsafeCell,
sync::atomic::{AtomicUsize, Ordering},
};
use core::{cell::UnsafeCell, sync::atomic::Ordering};

use super::fallback::utils::{Backoff, CachePadded};
use crate::imp::AtomicUsize;

struct Spinlock {
state: AtomicUsize,
Expand Down
9 changes: 6 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,14 @@ fn main() {
}
}
target_feature_if("mclass", is_mclass, &version, None, true);
let mut v5te = known && subarch.starts_with("v5te");
let v6 = known
&& (subarch.starts_with("v6")
|| subarch.starts_with("v7")
|| subarch.starts_with("v8")
|| subarch.starts_with("v9"));
target_feature_if("v6", v6, &version, None, true);
v5te |= target_feature_if("v6", v6, &version, None, true);
target_feature_if("v5te", v5te, &version, None, true);
}
"powerpc64" => {
let target_endian =
Expand Down Expand Up @@ -335,7 +337,7 @@ fn target_feature_if(
version: &Version,
stabilized: Option<u32>,
is_rustc_target_feature: bool,
) {
) -> bool {
// HACK: Currently, it seems that the only way to handle unstable target
// features on the stable is to parse the `-C target-feature` in RUSTFLAGS.
//
Expand All @@ -350,7 +352,7 @@ fn target_feature_if(
&& (version.nightly || stabilized.map_or(false, |stabilized| version.minor >= stabilized))
{
// In this case, cfg(target_feature = "...") would work, so skip emitting our own target_feature cfg.
return;
return false;
} else if let Some(rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") {
for mut flag in rustflags.to_string_lossy().split('\x1f') {
flag = strip_prefix(flag, "-C").unwrap_or(flag);
Expand All @@ -370,6 +372,7 @@ fn target_feature_if(
if has_target_feature {
println!("cargo:rustc-cfg=portable_atomic_target_feature=\"{}\"", name);
}
has_target_feature
}

fn target_cpu() -> Option<String> {
Expand Down
156 changes: 139 additions & 17 deletions src/imp/arm_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,29 @@
// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[path = "fallback/outline_atomics.rs"]
mod fallback;

#[cfg(not(portable_atomic_no_asm))]
use core::arch::asm;
use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
use core::sync::atomic::Ordering;
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
use core::{cell::UnsafeCell, mem};

/// A 64-bit value represented as a pair of 32-bit values.
///
/// This type is `#[repr(C)]`, both fields have the same in-memory representation
/// and are plain old datatypes, so access to the fields is always safe.
#[derive(Clone, Copy)]
#[repr(C)]
union U64 {
whole: u64,
pair: Pair,
}
#[derive(Clone, Copy)]
#[repr(C)]
struct Pair {
lo: u32,
hi: u32,
}
use super::core_atomic::{
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
AtomicUsize,
};

// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
// __kuser_helper_version >= 3 (kernel version 2.6.15+)
const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0;
// __kuser_helper_version >= 5 (kernel version 3.1+)
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
const KUSER_CMPXCHG64: usize = 0xFFFF0F60;

#[inline]
fn __kuser_helper_version() -> i32 {
use core::sync::atomic::AtomicI32;
Expand All @@ -54,6 +49,123 @@ fn __kuser_helper_version() -> i32 {
CACHE.store(v, Ordering::Relaxed);
v
}

#[cfg(any(target_feature = "v5te", portable_atomic_target_feature = "v5te"))]
macro_rules! blx {
($addr:tt) => {
concat!("blx ", $addr)
};
}
#[cfg(not(any(target_feature = "v5te", portable_atomic_target_feature = "v5te")))]
macro_rules! blx {
($addr:tt) => {
concat!("mov lr, pc", "\n", "bx ", $addr)
};
}

macro_rules! atomic_load_store {
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
#[inline]
pub(crate) fn load(&self, order: Ordering) -> $value_type {
crate::utils::assert_load_ordering(order);
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
match order {
Ordering::Relaxed => self.inner.load(Ordering::Relaxed),
// Acquire and SeqCst loads are equivalent.
Ordering::Acquire | Ordering::SeqCst => {
debug_assert!(__kuser_helper_version() >= 3);
let src = self.as_ptr();
let out;
asm!(
concat!("ldr", $asm_suffix, " {out}, [{src}]"),
blx!("{kuser_memory_barrier}"),
src = in(reg) src,
out = lateout(reg) out,
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
out("lr") _,
options(nostack, preserves_flags),
);
out
}
_ => unreachable!("{:?}", order),
}
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
macro_rules! atomic_store_release {
($acquire:expr) => {{
debug_assert!(__kuser_helper_version() >= 3);
asm!(
blx!("{kuser_memory_barrier}"),
concat!("str", $asm_suffix, " {val}, [{dst}]"),
$acquire,
dst = in(reg) dst,
val = in(reg) val,
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
out("lr") _,
options(nostack, preserves_flags),
)
}};
}
match order {
Ordering::Relaxed => self.inner.store(val, Ordering::Relaxed),
Ordering::Release => atomic_store_release!(""),
Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")),
_ => unreachable!("{:?}", order),
}
}
}
}
};
}

atomic_load_store!(AtomicI8, i8, "b");
atomic_load_store!(AtomicU8, u8, "b");
atomic_load_store!(AtomicI16, i16, "h");
atomic_load_store!(AtomicU16, u16, "h");
atomic_load_store!(AtomicI32, i32, "");
atomic_load_store!(AtomicU32, u32, "");
atomic_load_store!(AtomicIsize, isize, "");
atomic_load_store!(AtomicUsize, usize, "");
atomic_load_store!([T] AtomicPtr, *mut T, "");

/// A 64-bit value represented as a pair of 32-bit values.
///
/// This type is `#[repr(C)]`, both fields have the same in-memory representation
/// and are plain old datatypes, so access to the fields is always safe.
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[derive(Clone, Copy)]
#[repr(C)]
union U64 {
whole: u64,
pair: Pair,
}
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[derive(Clone, Copy)]
#[repr(C)]
struct Pair {
lo: u32,
hi: u32,
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
fn has_kuser_cmpxchg64() -> bool {
// Note: detect_false cfg is intended to make it easy for portable-atomic developers to
Expand All @@ -64,6 +176,7 @@ fn has_kuser_cmpxchg64() -> bool {
}
__kuser_helper_version() >= 5
}
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
// SAFETY: the caller must uphold the safety contract.
Expand All @@ -75,6 +188,7 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut
}

// 64-bit atomic load by two 32-bit atomic loads.
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
// SAFETY: the caller must uphold the safety contract.
Expand All @@ -92,6 +206,7 @@ unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
}
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline(always)]
unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
where
Expand Down Expand Up @@ -123,6 +238,7 @@ macro_rules! atomic_with_ifunc {
unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* }
fallback = $seqcst_fallback_fn:ident
) => {
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn $name($($arg)*) $(-> $ret_ty)? {
unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? {
Expand Down Expand Up @@ -265,6 +381,7 @@ atomic_with_ifunc! {
fallback = atomic_neg_seqcst
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
macro_rules! atomic64 {
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
#[repr(C, align(8))]
Expand Down Expand Up @@ -454,7 +571,9 @@ macro_rules! atomic64 {
};
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);

#[allow(
Expand All @@ -475,10 +594,13 @@ mod tests {
assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() });
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
test_atomic_int!(i64);
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
test_atomic_int!(u64);

// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
stress_test!(u64);
}
32 changes: 30 additions & 2 deletions src/imp/core_atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ unsafe impl Sync for NoRefUnwindSafe {}

#[repr(transparent)]
pub(crate) struct AtomicPtr<T> {
inner: core::sync::atomic::AtomicPtr<T>,
pub(crate) inner: core::sync::atomic::AtomicPtr<T>,
// Prevent RefUnwindSafe from being propagated from the std atomic type.
_marker: PhantomData<NoRefUnwindSafe>,
}
Expand All @@ -45,6 +45,13 @@ impl<T> AtomicPtr<T> {
pub(crate) fn into_inner(self) -> *mut T {
self.inner.into_inner()
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand All @@ -54,6 +61,13 @@ impl<T> AtomicPtr<T> {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand Down Expand Up @@ -125,7 +139,7 @@ macro_rules! atomic_int {
($atomic_type:ident, $int_type:ident) => {
#[repr(transparent)]
pub(crate) struct $atomic_type {
inner: core::sync::atomic::$atomic_type,
pub(crate) inner: core::sync::atomic::$atomic_type,
// Prevent RefUnwindSafe from being propagated from the std atomic type.
_marker: PhantomData<NoRefUnwindSafe>,
}
Expand Down Expand Up @@ -167,6 +181,13 @@ macro_rules! atomic_int {
pub(crate) fn into_inner(self) -> $int_type {
self.inner.into_inner()
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand All @@ -176,6 +197,13 @@ macro_rules! atomic_int {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand Down
3 changes: 2 additions & 1 deletion src/imp/fallback/seq_lock_wide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

use core::{
mem::ManuallyDrop,
sync::atomic::{self, AtomicUsize, Ordering},
sync::atomic::{self, Ordering},
};

use super::utils::Backoff;
use crate::imp::AtomicUsize;

// See mod.rs for details.
pub(super) type AtomicChunk = AtomicUsize;
Expand Down
Loading

0 comments on commit 5b9bc15

Please sign in to comment.