diff --git a/Cargo.toml b/Cargo.toml index 3f1abd73519..9802386e456 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,5 +2,6 @@ members = [ "crates/core_simd", + "crates/std_float", "crates/test_helpers", ] diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index a103ef115a5..d2ff5f3b1b1 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -26,3 +26,6 @@ features = ["alloc"] [dev-dependencies.test_helpers] path = "../test_helpers" + +[dev-dependencies] +std_float = { path = "../std_float/", features = ["as_crate"] } diff --git a/crates/core_simd/examples/nbody.rs b/crates/core_simd/examples/nbody.rs index 43280feebbd..7b1e6840f64 100644 --- a/crates/core_simd/examples/nbody.rs +++ b/crates/core_simd/examples/nbody.rs @@ -1,11 +1,13 @@ -#![cfg_attr(feature = "std", feature(portable_simd))] +#![feature(portable_simd)] +extern crate std_float; /// Benchmarks game nbody code /// Taken from the `packed_simd` crate /// Run this benchmark with `cargo test --example nbody` -#[cfg(feature = "std")] mod nbody { - use core_simd::*; + use core_simd::simd::*; + #[allow(unused)] // False positive? + use std_float::StdFloat; use std::f64::consts::PI; const SOLAR_MASS: f64 = 4.0 * PI * PI; @@ -167,7 +169,6 @@ mod nbody { } } -#[cfg(feature = "std")] #[cfg(test)] mod tests { // Good enough for demonstration purposes, not going for strictness here. @@ -184,7 +185,6 @@ mod tests { } fn main() { - #[cfg(feature = "std")] { let (energy_before, energy_after) = nbody::run(1000); println!("Energy before: {}", energy_before); diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs index 6a6d26d10a7..0bc241af1f1 100644 --- a/crates/core_simd/src/intrinsics.rs +++ b/crates/core_simd/src/intrinsics.rs @@ -87,29 +87,3 @@ extern "platform-intrinsic" { #[allow(unused)] pub(crate) fn simd_select_bitmask(m: M, a: T, b: T) -> T; } - -#[cfg(feature = "std")] -mod std { - extern "platform-intrinsic" { - // ceil - pub(crate) fn simd_ceil(x: T) -> T; - - // floor - pub(crate) fn simd_floor(x: T) -> T; - - // round - pub(crate) fn simd_round(x: T) -> T; - - // trunc - pub(crate) fn simd_trunc(x: T) -> T; - - // fsqrt - pub(crate) fn simd_fsqrt(x: T) -> T; - - // fma - pub(crate) fn simd_fma(x: T, y: T, z: T) -> T; - } -} - -#[cfg(feature = "std")] -pub(crate) use crate::simd::intrinsics::std::*; diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs index 09789e11492..06ccab3ec49 100644 --- a/crates/core_simd/src/round.rs +++ b/crates/core_simd/src/round.rs @@ -5,47 +5,6 @@ macro_rules! implement { { $type:ty, $int_type:ty } => { - #[cfg(feature = "std")] - impl Simd<$type, LANES> - where - LaneCount: SupportedLaneCount, - { - /// Returns the smallest integer greater than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn ceil(self) -> Self { - unsafe { intrinsics::simd_ceil(self) } - } - - /// Returns the largest integer value less than or equal to each lane. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn floor(self) -> Self { - unsafe { intrinsics::simd_floor(self) } - } - - /// Rounds to the nearest integer value. Ties round toward zero. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn round(self) -> Self { - unsafe { intrinsics::simd_round(self) } - } - - /// Returns the floating point's integer value, with its fractional part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn trunc(self) -> Self { - unsafe { intrinsics::simd_trunc(self) } - } - - /// Returns the floating point's fractional value, with its integer part removed. - #[must_use = "method returns a new vector and does not mutate the original value"] - #[inline] - pub fn fract(self) -> Self { - self - self.trunc() - } - } - impl Simd<$type, LANES> where LaneCount: SupportedLaneCount, diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs index 4a4b23238c4..3528a420351 100644 --- a/crates/core_simd/src/vector/float.rs +++ b/crates/core_simd/src/vector/float.rs @@ -38,29 +38,6 @@ macro_rules! impl_float_vector { unsafe { intrinsics::simd_fabs(self) } } - /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, - /// yielding a more accurate result than an unfused multiply-add. - /// - /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target - /// architecture has a dedicated `fma` CPU instruction. However, this is not always - /// true, and will be heavily dependent on designing algorithms with specific target - /// hardware in mind. - #[cfg(feature = "std")] - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn mul_add(self, a: Self, b: Self) -> Self { - unsafe { intrinsics::simd_fma(self, a, b) } - } - - /// Produces a vector where every lane has the square root value - /// of the equivalently-indexed lane in `self` - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - #[cfg(feature = "std")] - pub fn sqrt(self) -> Self { - unsafe { intrinsics::simd_fsqrt(self) } - } - /// Takes the reciprocal (inverse) of each lane, `1/x`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 43ddde4c55e..4fb9de198ee 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -546,6 +546,8 @@ macro_rules! impl_float_tests { #[cfg(feature = "std")] mod std { + use std_float::StdFloat; + use super::*; test_helpers::test_lanes! { fn sqrt() { diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 11d617a6c2c..1a1bc9ebca7 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -3,6 +3,8 @@ macro_rules! float_rounding_test { { $scalar:tt, $int_scalar:tt } => { mod $scalar { + use std_float::StdFloat; + type Vector = core_simd::Simd<$scalar, LANES>; type Scalar = $scalar; type IntScalar = $int_scalar; diff --git a/crates/std_float/Cargo.toml b/crates/std_float/Cargo.toml new file mode 100644 index 00000000000..82f66b8dcb7 --- /dev/null +++ b/crates/std_float/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "std_float" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +core_simd = { path = "../core_simd" } + +[features] +default = ["as_crate"] +as_crate = [] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs new file mode 100644 index 00000000000..4bd4d4c05e3 --- /dev/null +++ b/crates/std_float/src/lib.rs @@ -0,0 +1,165 @@ +#![cfg_attr(feature = "as_crate", no_std)] // We are std! +#![cfg_attr( + feature = "as_crate", + feature(platform_intrinsics), + feature(portable_simd) +)] +#[cfg(not(feature = "as_crate"))] +use core::simd; +#[cfg(feature = "as_crate")] +use core_simd::simd; + +use simd::{LaneCount, Simd, SupportedLaneCount}; + +#[cfg(feature = "as_crate")] +mod experimental { + pub trait Sealed {} +} + +#[cfg(feature = "as_crate")] +use experimental as sealed; + +use crate::sealed::Sealed; + +// "platform intrinsics" are essentially "codegen intrinsics" +// each of these may be scalarized and lowered to a libm call +extern "platform-intrinsic" { + // ceil + fn simd_ceil(x: T) -> T; + + // floor + fn simd_floor(x: T) -> T; + + // round + fn simd_round(x: T) -> T; + + // trunc + fn simd_trunc(x: T) -> T; + + // fsqrt + fn simd_fsqrt(x: T) -> T; + + // fma + fn simd_fma(x: T, y: T, z: T) -> T; +} + +/// This trait provides a possibly-temporary implementation of float functions +/// that may, in the absence of hardware support, canonicalize to calling an +/// operating system's `math.h` dynamically-loaded library (also known as a +/// shared object). As these conditionally require runtime support, they +/// should only appear in binaries built assuming OS support: `std`. +/// +/// However, there is no reason SIMD types, in general, need OS support, +/// as for many architectures an embedded binary may simply configure that +/// support itself. This means these types must be visible in `core` +/// but have these functions available in `std`. +/// +/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but +/// due to compiler limitations, it is harder to implement this approach for +/// abstract data types like [`Simd`]. From that need, this trait is born. +/// +/// It is possible this trait will be replaced in some manner in the future, +/// when either the compiler or its supporting runtime functions are improved. +/// For now this trait is available to permit experimentation with SIMD float +/// operations that may lack hardware support, such as `mul_add`. +pub trait StdFloat: Sealed + Sized { + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, + /// yielding a more accurate result than an unfused multiply-add. + /// + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target + /// architecture has a dedicated `fma` CPU instruction. However, this is not always + /// true, and will be heavily dependent on designing algorithms with specific target + /// hardware in mind. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn mul_add(self, a: Self, b: Self) -> Self { + unsafe { simd_fma(self, a, b) } + } + + /// Produces a vector where every lane has the square root value + /// of the equivalently-indexed lane in `self` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original value"] + fn sqrt(self) -> Self { + unsafe { simd_fsqrt(self) } + } + + /// Returns the smallest integer greater than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn ceil(self) -> Self { + unsafe { simd_ceil(self) } + } + + /// Returns the largest integer value less than or equal to each lane. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn floor(self) -> Self { + unsafe { simd_floor(self) } + } + + /// Rounds to the nearest integer value. Ties round toward zero. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round(self) -> Self { + unsafe { simd_round(self) } + } + + /// Returns the floating point's integer value, with its fractional part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn trunc(self) -> Self { + unsafe { simd_trunc(self) } + } + + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + fn fract(self) -> Self; +} + +impl Sealed for Simd where LaneCount: SupportedLaneCount {} +impl Sealed for Simd where LaneCount: SupportedLaneCount {} + +// We can safely just use all the defaults. +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +impl StdFloat for Simd +where + LaneCount: SupportedLaneCount, +{ + /// Returns the floating point's fractional value, with its integer part removed. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use simd::*; + + #[test] + fn everything_works() { + let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); + let x2 = x + x; + let _xc = x.ceil(); + let _xf = x.floor(); + let _xr = x.round(); + let _xt = x.trunc(); + let _xfma = x.mul_add(x, x); + let _xsqrt = x.sqrt(); + let _ = x2.abs() * x2; + } +}