diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1ff377fce34..ed1589be4f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,40 +167,33 @@ jobs: RUSTFLAGS: ${{ matrix.rustflags }} cross-tests: - name: "${{ matrix.target }} (via cross)" + name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)" runs-on: ubuntu-latest strategy: fail-fast: false - # TODO: Sadly, we cant configure target-feature in a meaningful way - # because `cross` doesn't tell qemu to enable any non-default cpu - # features, nor does it give us a way to do so. - # - # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch]. - # This is a lot more complex... but in practice it's likely that we can just - # snarf the docker config from around [here][1000-dockerfiles]. - # - # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67 - # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker matrix: target: - - i586-unknown-linux-gnu - # 32-bit arm has a few idiosyncracies like having subnormal flushing - # to zero on by default. Ideally we'd set - armv7-unknown-linux-gnueabihf - - aarch64-unknown-linux-gnu - # Note: The issue above means neither of these mips targets will use - # MSA (mips simd) but MIPS uses a nonstandard binary representation - # for NaNs which makes it worth testing on despite that. + - thumbv7neon-unknown-linux-gnueabihf # includes neon by default + - aarch64-unknown-linux-gnu # includes neon by default + - powerpc-unknown-linux-gnu + - powerpc64le-unknown-linux-gnu # includes altivec by default + - riscv64gc-unknown-linux-gnu + # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing + # non-nightly since https://github.com/rust-lang/rust/pull/113274 # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 - - riscv64gc-unknown-linux-gnu - # TODO this test works, but it appears to time out - # - powerpc-unknown-linux-gnu - # TODO this test is broken, but it appears to be a problem with QEMU, not us. - # - powerpc64le-unknown-linux-gnu - # TODO enable this once a new version of cross is released + # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen. # - powerpc64-unknown-linux-gnu + target_feature: [default] + include: + - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" } + # Fails due to QEMU floating point errors, probably handling subnormals incorrectly. + # This target is somewhat redundant, since ppc64le has altivec as well. + # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" } + # We should test this, but cross currently can't run it + # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } steps: - uses: actions/checkout@v2 @@ -217,11 +210,27 @@ jobs: # being part of the tarball means we can't just use the download/latest # URL :( run: | - CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz + CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz mkdir -p "$HOME/.bin" curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin" echo "$HOME/.bin" >> $GITHUB_PATH + - name: Configure Emulated CPUs + run: | + echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV + # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV + + - name: Configure RUSTFLAGS + shell: bash + run: | + case "${{ matrix.target_feature }}" in + default) + echo "RUSTFLAGS=" >> $GITHUB_ENV;; + *) + echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV + ;; + esac + - name: Test (debug) run: cross test --verbose --target=${{ matrix.target }} diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs index 501c1c5ddd3..d700011ff9c 100644 --- a/crates/core_simd/src/elements/float.rs +++ b/crates/core_simd/src/elements/float.rs @@ -336,7 +336,10 @@ macro_rules! impl_trait { #[inline] fn is_subnormal(self) -> Self::Mask { - self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) + // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero, + // so this comparison must be done with integers. + let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits()); + not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0)) } #[inline] diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs index 3a02f3f01e1..f6ded66e9fc 100644 --- a/crates/core_simd/tests/ops_macros.rs +++ b/crates/core_simd/tests/ops_macros.rs @@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test { { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => { test_helpers::test_lanes! { fn $fn() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( & as core::ops::$trait>::$fn, &$scalar_fn, &|_| true, @@ -31,7 +31,7 @@ macro_rules! impl_binary_op_test { test_helpers::test_lanes! { fn normal() { - test_helpers::test_binary_elementwise( + test_helpers::test_binary_elementwise_flush_subnormals( & as core::ops::$trait>::$fn, &$scalar_fn, &|_, _| true, @@ -39,7 +39,7 @@ macro_rules! impl_binary_op_test { } fn assign() { - test_helpers::test_binary_elementwise( + test_helpers::test_binary_elementwise_flush_subnormals( &|mut a, b| { as core::ops::$trait_assign>::$fn_assign(&mut a, b); a }, &$scalar_fn, &|_, _| true, @@ -96,9 +96,11 @@ macro_rules! impl_common_integer_tests { test_helpers::test_lanes! { fn reduce_sum() { test_helpers::test_1(&|x| { + use test_helpers::subnormals::{flush, flush_in}; test_helpers::prop_assert_biteq! ( $vector::::from_array(x).reduce_sum(), x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add), + flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)), ); Ok(()) }); @@ -106,9 +108,11 @@ macro_rules! impl_common_integer_tests { fn reduce_product() { test_helpers::test_1(&|x| { + use test_helpers::subnormals::{flush, flush_in}; test_helpers::prop_assert_biteq! ( $vector::::from_array(x).reduce_product(), x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul), + flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)), ); Ok(()) }); @@ -433,7 +437,7 @@ macro_rules! impl_float_tests { } fn to_degrees() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::to_degrees, &Scalar::to_degrees, &|_| true, @@ -441,7 +445,7 @@ macro_rules! impl_float_tests { } fn to_radians() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::to_radians, &Scalar::to_radians, &|_| true, @@ -511,7 +515,12 @@ macro_rules! impl_float_tests { } fn simd_clamp() { + if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) { + // https://gitlab.com/qemu-project/qemu/-/issues/1780 + return; + } test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| { + use test_helpers::subnormals::flush_in; for (min, max) in min.iter_mut().zip(max.iter_mut()) { if max < min { core::mem::swap(min, max); @@ -528,8 +537,20 @@ macro_rules! impl_float_tests { for i in 0..LANES { result_scalar[i] = value[i].clamp(min[i], max[i]); } + let mut result_scalar_flush = [Scalar::default(); LANES]; + for i in 0..LANES { + // Comparisons flush-to-zero, but return value selection is _not_ flushed. + let mut value = value[i]; + if flush_in(value) < flush_in(min[i]) { + value = min[i]; + } + if flush_in(value) > flush_in(max[i]) { + value = max[i]; + } + result_scalar_flush[i] = value + } let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array(); - test_helpers::prop_assert_biteq!(result_scalar, result_vector); + test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush); Ok(()) }) } diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index aacf7bd3bcc..191c39e2370 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -43,7 +43,7 @@ macro_rules! float_rounding_test { } fn fract() { - test_helpers::test_unary_elementwise( + test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::fract, &Scalar::fract, &|_| true, diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index 1d2bc8b519a..23dae7c9338 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -4,10 +4,8 @@ version = "0.1.0" edition = "2021" publish = false -[dependencies.proptest] -version = "0.10" -default-features = false -features = ["alloc"] +[dependencies] +proptest = { version = "0.10", default-features = false, features = ["alloc"] } [features] all_lane_counts = [] diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index 7d91260d838..cbc20cda0d6 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -113,6 +113,27 @@ impl core::fmt::Debug for BitEqWrapper<'_, T> { } } +#[doc(hidden)] +pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T); + +impl PartialEq> for BitEqWrapper<'_, T> { + fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool { + self.0.biteq(other.0) || self.0.biteq(other.1) + } +} + +impl core::fmt::Debug for BitEqEitherWrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.0.biteq(self.1) { + self.0.fmt(f) + } else { + self.0.fmt(f)?; + write!(f, " or ")?; + self.1.fmt(f) + } + } +} + #[macro_export] macro_rules! prop_assert_biteq { { $a:expr, $b:expr $(,)? } => { @@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq { let b = $b; proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b)); } - } + }; + { $a:expr, $b:expr, $c:expr $(,)? } => { + { + use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper}; + let a = $a; + let b = $b; + let c = $c; + proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c)); + } + }; } diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index b26cdc311a2..b80c745aaf2 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(stdsimd, powerpc_target_feature)] + pub mod array; #[cfg(target_arch = "wasm32")] @@ -6,6 +8,9 @@ pub mod wasm; #[macro_use] pub mod biteq; +pub mod subnormals; +use subnormals::FlushSubnormals; + /// Specifies the default strategy for testing a type. /// /// This strategy should be what "makes sense" to test. @@ -151,7 +156,6 @@ pub fn test_3< } /// Test a unary vector function against a unary scalar function, applied elementwise. -#[inline(never)] pub fn test_unary_elementwise( fv: &dyn Fn(Vector) -> VectorResult, fs: &dyn Fn(Scalar) -> ScalarResult, @@ -177,6 +181,48 @@ pub fn test_unary_elementwise( + fv: &dyn Fn(Vector) -> VectorResult, + fs: &dyn Fn(Scalar) -> ScalarResult, + check: &dyn Fn([Scalar; LANES]) -> bool, +) where + Scalar: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + let flush = |x: Scalar| subnormals::flush(fs(subnormals::flush_in(x))); + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_v: [ScalarResult; LANES] = fv(x.into()).into(); + let result_s: [ScalarResult; LANES] = x + .iter() + .copied() + .map(fs) + .collect::>() + .try_into() + .unwrap(); + let result_sf: [ScalarResult; LANES] = x + .iter() + .copied() + .map(flush) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s, result_sf); + Ok(()) + }); +} + /// Test a unary vector function against a unary scalar function, applied elementwise. #[inline(never)] pub fn test_unary_mask_elementwise( @@ -204,7 +250,6 @@ pub fn test_unary_mask_elementwise( } /// Test a binary vector function against a binary scalar function, applied elementwise. -#[inline(never)] pub fn test_binary_elementwise< Scalar1, Scalar2, @@ -241,6 +286,85 @@ pub fn test_binary_elementwise< }); } +/// Test a binary vector function against a binary scalar function, applied elementwise. +/// +/// Where subnormals are flushed, use approximate equality. +pub fn test_binary_elementwise_flush_subnormals< + Scalar1, + Scalar2, + ScalarResult, + Vector1, + Vector2, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + let flush = |x: Scalar1, y: Scalar2| { + subnormals::flush(fs(subnormals::flush_in(x), subnormals::flush_in(y))) + }; + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_v: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); + let result_s: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + let result_sf: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| flush(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s, result_sf); + Ok(()) + }); +} + +/// Test a unary vector function against a unary scalar function, applied elementwise. +#[inline(never)] +pub fn test_binary_mask_elementwise( + fv: &dyn Fn(Vector1, Vector2) -> Mask, + fs: &dyn Fn(Scalar1, Scalar2) -> bool, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_v: [bool; LANES] = fv(x.into(), y.into()).into(); + let result_s: [bool; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_biteq!(result_v, result_s); + Ok(()) + }); +} + /// Test a binary vector-scalar function against a binary scalar function, applied elementwise. #[inline(never)] pub fn test_binary_scalar_rhs_elementwise< diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs new file mode 100644 index 00000000000..ec0f1fb24b9 --- /dev/null +++ b/crates/test_helpers/src/subnormals.rs @@ -0,0 +1,91 @@ +pub trait FlushSubnormals: Sized { + fn flush(self) -> Self { + self + } +} + +impl FlushSubnormals for *const T {} +impl FlushSubnormals for *mut T {} + +macro_rules! impl_float { + { $($ty:ty),* } => { + $( + impl FlushSubnormals for $ty { + fn flush(self) -> Self { + let is_f32 = core::mem::size_of::() == 4; + let ppc_flush = is_f32 && cfg!(all( + any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")), + target_feature = "altivec", + not(target_feature = "vsx"), + )); + let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon")); + let flush = ppc_flush || arm_flush; + if flush && self.is_subnormal() { + <$ty>::copysign(0., self) + } else { + self + } + } + } + )* + } +} + +macro_rules! impl_else { + { $($ty:ty),* } => { + $( + impl FlushSubnormals for $ty {} + )* + } +} + +impl_float! { f32, f64 } +impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } + +/// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs. +/// https://gitlab.com/qemu-project/qemu/-/issues/1779 +#[cfg(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +))] +fn in_buggy_qemu() -> bool { + use std::sync::OnceLock; + static BUGGY: OnceLock = OnceLock::new(); + + fn add(x: f32, y: f32) -> f32 { + #[cfg(target_arch = "powerpc")] + use core::arch::powerpc::*; + #[cfg(target_arch = "powerpc64")] + use core::arch::powerpc64::*; + + let array: [f32; 4] = + unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) }; + array[0] + } + + *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative()) +} + +#[cfg(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +))] +pub fn flush_in(x: T) -> T { + if in_buggy_qemu() { + x + } else { + x.flush() + } +} + +#[cfg(not(all( + any(target_arch = "powerpc", target_arch = "powerpc64"), + target_feature = "altivec" +)))] +pub fn flush_in(x: T) -> T { + x.flush() +} + +pub fn flush(x: T) -> T { + x.flush() +}