Skip to content

Commit 28e0907

Browse files
committed
nontemporal_store: make sure that the intrinsic is truly just a hint
1 parent 29e9248 commit 28e0907

File tree

5 files changed

+49
-15
lines changed

5 files changed

+49
-15
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>(
725725

726726
// Cranelift treats stores as volatile by default
727727
// FIXME correctly handle unaligned_volatile_store
728-
// FIXME actually do nontemporal stores if requested
728+
// FIXME actually do nontemporal stores if requested (but do not just emit MOVNT on x86;
729+
// see the LLVM backend for details)
729730
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
730731
dest.write_cvalue(fx, val);
731732
}

compiler/rustc_codegen_gcc/src/builder.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
11271127
self.llbb().add_assignment(self.location, aligned_destination, val);
11281128
// TODO(antoyo): handle align and flags.
11291129
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
1130+
// When adding support for NONTEMPORAL, make sure to not just emit MOVNT on x86; see the
1131+
// LLVM backend for details.
11301132
self.cx.context.new_rvalue_zero(self.type_i32())
11311133
}
11321134

compiler/rustc_codegen_llvm/src/builder.rs

+23-7
Original file line numberDiff line numberDiff line change
@@ -727,13 +727,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
727727
llvm::LLVMSetVolatile(store, llvm::True);
728728
}
729729
if flags.contains(MemFlags::NONTEMPORAL) {
730-
// According to LLVM [1] building a nontemporal store must
731-
// *always* point to a metadata value of the integer 1.
732-
//
733-
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
734-
let one = self.cx.const_i32(1);
735-
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
736-
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
730+
// Make sure that the current target architectures supports "sane" non-temporal
731+
// stores, i.e., non-temporal stores that are equivalent to regular stores except
732+
// for performance. LLVM doesn't seem to care about this, and will happily treat
733+
// `!nontemporal` stores as-if they were normal stores (for reordering optimizations
734+
// etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
735+
// regular stores but require special fences.
736+
// So we keep a list of architectures where `!nontemporal` is known to be truly just
737+
// a hint, and use regular stores everywhere else.
738+
// (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
739+
// before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
740+
const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] = &["aarch64", "arm"];
741+
742+
let use_nontemporal =
743+
WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch);
744+
if use_nontemporal {
745+
// According to LLVM [1] building a nontemporal store must
746+
// *always* point to a metadata value of the integer 1.
747+
//
748+
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
749+
let one = self.cx.const_i32(1);
750+
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
751+
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
752+
}
737753
}
738754
store
739755
}

library/core/src/intrinsics.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -2386,12 +2386,12 @@ extern "rust-intrinsic" {
23862386
#[rustc_nounwind]
23872387
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;
23882388

2389-
/// Emits a `!nontemporal` store according to LLVM (see their docs).
2390-
/// Probably will never become stable.
2389+
/// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held
2390+
/// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`.
23912391
///
2392-
/// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model!
2393-
/// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic.
2394-
/// See <https://github.com/rust-lang/rust/issues/114582> for some more discussion.
2392+
/// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT`
2393+
/// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered
2394+
/// in ways that are not allowed for regular writes).
23952395
#[rustc_nounwind]
23962396
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
23972397

+17-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,28 @@
11
//@ compile-flags: -O
2+
//@ compile-flags: --target aarch64-unknown-linux-gnu
3+
//@ needs-llvm-components: aarch64
24

3-
#![feature(core_intrinsics)]
5+
#![feature(no_core, lang_items, intrinsics)]
6+
#![no_core]
47
#![crate_type = "lib"]
58

9+
#[lang = "sized"]
10+
pub trait Sized {}
11+
#[lang = "copy"]
12+
pub trait Copy {}
13+
14+
impl Copy for u32 {}
15+
impl<T> Copy for *mut T {}
16+
17+
extern "rust-intrinsic" {
18+
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
19+
}
20+
621
#[no_mangle]
722
pub fn a(a: &mut u32, b: u32) {
823
// CHECK-LABEL: define{{.*}}void @a
924
// CHECK: store i32 %b, ptr %a, align 4, !nontemporal
1025
unsafe {
11-
std::intrinsics::nontemporal_store(a, b);
26+
nontemporal_store(a, b);
1227
}
1328
}

0 commit comments

Comments
 (0)