From 29c59d161fd05ceb67ac7d0a7933130fafdc2526 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Wed, 20 Jul 2022 22:49:22 +0900 Subject: [PATCH 1/3] Revert "Fix fence on non-x86 arch and miri (#16)" This reverts commit 54df36a543bdb508bfacb4f371ba8f46187b32a6. --- src/bounded.rs | 6 ++++-- src/lib.rs | 11 ++--------- src/unbounded.rs | 3 ++- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/bounded.rs b/src/bounded.rs index b98846c..7a7fb71 100644 --- a/src/bounded.rs +++ b/src/bounded.rs @@ -129,7 +129,8 @@ impl Bounded { } } } else if stamp.wrapping_add(self.one_lap) == tail + 1 { - let head = crate::full_fence_for_load(|| self.head.load(Ordering::Relaxed)); + crate::full_fence(); + let head = self.head.load(Ordering::Relaxed); // If the head lags one lap behind the tail as well... if head.wrapping_add(self.one_lap) == tail { @@ -190,7 +191,8 @@ impl Bounded { } } } else if stamp == head { - let tail = crate::full_fence_for_load(|| self.tail.load(Ordering::Relaxed)); + crate::full_fence(); + let tail = self.tail.load(Ordering::Relaxed); // If the tail equals the head, that means the queue is empty. if (tail & !self.mark_bit) == head { diff --git a/src/lib.rs b/src/lib.rs index 3133e6b..c4e5abd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -445,11 +445,8 @@ impl fmt::Display for PushError { /// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster. #[inline] -fn full_fence_for_load(load_op: impl FnOnce() -> T) -> T { - if cfg!(all( - any(target_arch = "x86", target_arch = "x86_64"), - not(miri) - )) { +fn full_fence() { + if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { // HACK(stjepang): On x86 architectures there are two different ways of executing // a `SeqCst` fence. // @@ -464,11 +461,7 @@ fn full_fence_for_load(load_op: impl FnOnce() -> T) -> T { // x86 platforms is going to optimize this away. let a = AtomicUsize::new(0); let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst); - // On x86, `lock cmpxchg; mov` is fine. See also https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html. - load_op() } else { - let res = load_op(); atomic::fence(Ordering::SeqCst); - res } } diff --git a/src/unbounded.rs b/src/unbounded.rs index df1b674..c7cba68 100644 --- a/src/unbounded.rs +++ b/src/unbounded.rs @@ -237,7 +237,8 @@ impl Unbounded { let mut new_head = head + (1 << SHIFT); if new_head & MARK_BIT == 0 { - let tail = crate::full_fence_for_load(|| self.tail.index.load(Ordering::Relaxed)); + crate::full_fence(); + let tail = self.tail.index.load(Ordering::Relaxed); // If the tail equals the head, that means the queue is empty. if head >> SHIFT == tail >> SHIFT { From 93b090266525d49d3ef7441bdcdc730bff0ffd3c Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Wed, 20 Jul 2022 22:54:53 +0900 Subject: [PATCH 2/3] Use compiler_fence in full_fence on x86 --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index c4e5abd..85efd1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -459,8 +459,10 @@ fn full_fence() { // The ideal solution here would be to use inline assembly, but we're instead creating a // temporary atomic variable and compare-and-exchanging its value. No sane compiler to // x86 platforms is going to optimize this away. + atomic::compiler_fence(Ordering::SeqCst); let a = AtomicUsize::new(0); let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst); + atomic::compiler_fence(Ordering::SeqCst); } else { atomic::fence(Ordering::SeqCst); } From 33d863af8ade0ac4c3d3fb74f5e3deeea3ceb002 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Wed, 20 Jul 2022 22:57:54 +0900 Subject: [PATCH 3/3] Do not use x86 specific fence on Miri --- src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 85efd1f..d54cee5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -446,7 +446,10 @@ impl fmt::Display for PushError { /// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster. #[inline] fn full_fence() { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { + if cfg!(all( + any(target_arch = "x86", target_arch = "x86_64"), + not(miri) + )) { // HACK(stjepang): On x86 architectures there are two different ways of executing // a `SeqCst` fence. //