diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1ff377fce34..ed1589be4f1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -167,40 +167,33 @@ jobs:
             RUSTFLAGS: ${{ matrix.rustflags }}
 
   cross-tests:
-    name: "${{ matrix.target }} (via cross)"
+    name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      # TODO: Sadly, we cant configure target-feature in a meaningful way
-      # because `cross` doesn't tell qemu to enable any non-default cpu
-      # features, nor does it give us a way to do so.
-      #
-      # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
-      # This is a lot more complex... but in practice it's likely that we can just
-      # snarf the docker config from around [here][1000-dockerfiles].
-      #
-      # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
-      # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
 
       matrix:
         target:
-          - i586-unknown-linux-gnu
-          # 32-bit arm has a few idiosyncracies like having subnormal flushing
-          # to zero on by default. Ideally we'd set
           - armv7-unknown-linux-gnueabihf
-          - aarch64-unknown-linux-gnu
-          # Note: The issue above means neither of these mips targets will use
-          # MSA (mips simd) but MIPS uses a nonstandard binary representation
-          # for NaNs which makes it worth testing on despite that.
+          - thumbv7neon-unknown-linux-gnueabihf # includes neon by default
+          - aarch64-unknown-linux-gnu           # includes neon by default
+          - powerpc-unknown-linux-gnu
+          - powerpc64le-unknown-linux-gnu       # includes altivec by default
+          - riscv64gc-unknown-linux-gnu
+          # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
+          # non-nightly since https://github.com/rust-lang/rust/pull/113274
           # - mips-unknown-linux-gnu
           # - mips64-unknown-linux-gnuabi64
-          - riscv64gc-unknown-linux-gnu
-          # TODO this test works, but it appears to time out
-          # - powerpc-unknown-linux-gnu
-          # TODO this test is broken, but it appears to be a problem with QEMU, not us.
-          # - powerpc64le-unknown-linux-gnu
-          # TODO enable this once a new version of cross is released
+          # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
           # - powerpc64-unknown-linux-gnu
+        target_feature: [default]
+        include:
+          - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+          # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
+          # This target is somewhat redundant, since ppc64le has altivec as well.
+          # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
+          # We should test this, but cross currently can't run it
+          # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
 
     steps:
       - uses: actions/checkout@v2
@@ -217,11 +210,27 @@ jobs:
         # being part of the tarball means we can't just use the download/latest
         # URL :(
         run: |
-          CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
+          CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
           mkdir -p "$HOME/.bin"
           curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
           echo "$HOME/.bin" >> $GITHUB_PATH
 
+      - name: Configure Emulated CPUs
+        run: |
+          echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
+          # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
+
+      - name: Configure RUSTFLAGS
+        shell: bash
+        run: |
+          case "${{ matrix.target_feature }}" in
+            default)
+              echo "RUSTFLAGS=" >> $GITHUB_ENV;;
+            *)
+              echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+              ;;
+          esac
+
       - name: Test (debug)
         run: cross test --verbose --target=${{ matrix.target }}
 
diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index 501c1c5ddd3..d700011ff9c 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -336,7 +336,10 @@ macro_rules! impl_trait {
 
             #[inline]
             fn is_subnormal(self) -> Self::Mask {
-                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+                // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
+                // so this comparison must be done with integers.
+                let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
+                not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
             }
 
             #[inline]
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 3a02f3f01e1..f6ded66e9fc 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
     { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
         test_helpers::test_lanes! {
             fn $fn<const LANES: usize>() {
-                test_helpers::test_unary_elementwise(
+                test_helpers::test_unary_elementwise_flush_subnormals(
                     &<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                     &$scalar_fn,
                     &|_| true,
@@ -31,7 +31,7 @@ macro_rules! impl_binary_op_test {
 
             test_helpers::test_lanes! {
                 fn normal<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
                         &$scalar_fn,
                         &|_, _| true,
@@ -39,7 +39,7 @@ macro_rules! impl_binary_op_test {
                 }
 
                 fn assign<const LANES: usize>() {
-                    test_helpers::test_binary_elementwise(
+                    test_helpers::test_binary_elementwise_flush_subnormals(
                         &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
                         &$scalar_fn,
                         &|_, _| true,
@@ -96,9 +96,11 @@ macro_rules! impl_common_integer_tests {
         test_helpers::test_lanes! {
             fn reduce_sum<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_sum(),
                         x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+                        flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
                     );
                     Ok(())
                 });
@@ -106,9 +108,11 @@ macro_rules! impl_common_integer_tests {
 
             fn reduce_product<const LANES: usize>() {
                 test_helpers::test_1(&|x| {
+                    use test_helpers::subnormals::{flush, flush_in};
                     test_helpers::prop_assert_biteq! (
                         $vector::<LANES>::from_array(x).reduce_product(),
                         x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+                        flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
                     );
                     Ok(())
                 });
@@ -433,7 +437,7 @@ macro_rules! impl_float_tests {
                 }
 
                 fn to_degrees<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_degrees,
                         &Scalar::to_degrees,
                         &|_| true,
@@ -441,7 +445,7 @@ macro_rules! impl_float_tests {
                 }
 
                 fn to_radians<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::to_radians,
                         &Scalar::to_radians,
                         &|_| true,
@@ -511,7 +515,12 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_clamp<const LANES: usize>() {
+                    if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
+                        // https://gitlab.com/qemu-project/qemu/-/issues/1780
+                        return;
+                    }
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+                        use test_helpers::subnormals::flush_in;
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {
                                 core::mem::swap(min, max);
@@ -528,8 +537,20 @@ macro_rules! impl_float_tests {
                         for i in 0..LANES {
                             result_scalar[i] = value[i].clamp(min[i], max[i]);
                         }
+                        let mut result_scalar_flush = [Scalar::default(); LANES];
+                        for i in 0..LANES {
+                            // Comparisons flush-to-zero, but return value selection is _not_ flushed.
+                            let mut value = value[i];
+                            if flush_in(value) < flush_in(min[i]) {
+                                value = min[i];
+                            }
+                            if flush_in(value) > flush_in(max[i]) {
+                                value = max[i];
+                            }
+                            result_scalar_flush[i] = value
+                        }
                         let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
-                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+                        test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
                         Ok(())
                     })
                 }
diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
index aacf7bd3bcc..191c39e2370 100644
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@@ -43,7 +43,7 @@ macro_rules! float_rounding_test {
                 }
 
                 fn fract<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
+                    test_helpers::test_unary_elementwise_flush_subnormals(
                         &Vector::<LANES>::fract,
                         &Scalar::fract,
                         &|_| true,
diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml
index 1d2bc8b519a..23dae7c9338 100644
--- a/crates/test_helpers/Cargo.toml
+++ b/crates/test_helpers/Cargo.toml
@@ -4,10 +4,8 @@ version = "0.1.0"
 edition = "2021"
 publish = false
 
-[dependencies.proptest]
-version = "0.10"
-default-features = false
-features = ["alloc"]
+[dependencies]
+proptest = { version = "0.10", default-features = false, features = ["alloc"] }
 
 [features]
 all_lane_counts = []
diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs
index 7d91260d838..cbc20cda0d6 100644
--- a/crates/test_helpers/src/biteq.rs
+++ b/crates/test_helpers/src/biteq.rs
@@ -113,6 +113,27 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
     }
 }
 
+#[doc(hidden)]
+pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
+
+impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
+    fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
+        self.0.biteq(other.0) || self.0.biteq(other.1)
+    }
+}
+
+impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.0.biteq(self.1) {
+            self.0.fmt(f)
+        } else {
+            self.0.fmt(f)?;
+            write!(f, " or ")?;
+            self.1.fmt(f)
+        }
+    }
+}
+
 #[macro_export]
 macro_rules! prop_assert_biteq {
     { $a:expr, $b:expr $(,)? } => {
@@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq {
             let b = $b;
             proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
         }
-    }
+    };
+    { $a:expr, $b:expr, $c:expr $(,)? } => {
+        {
+            use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
+            let a = $a;
+            let b = $b;
+            let c = $c;
+            proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
+        }
+    };
 }
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index b26cdc311a2..b80c745aaf2 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(stdsimd, powerpc_target_feature)]
+
 pub mod array;
 
 #[cfg(target_arch = "wasm32")]
@@ -6,6 +8,9 @@ pub mod wasm;
 #[macro_use]
 pub mod biteq;
 
+pub mod subnormals;
+use subnormals::FlushSubnormals;
+
 /// Specifies the default strategy for testing a type.
 ///
 /// This strategy should be what "makes sense" to test.
@@ -151,7 +156,6 @@ pub fn test_3<
 }
 
 /// Test a unary vector function against a unary scalar function, applied elementwise.
-#[inline(never)]
 pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
     fv: &dyn Fn(Vector) -> VectorResult,
     fs: &dyn Fn(Scalar) -> ScalarResult,
@@ -177,6 +181,48 @@ pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const
     });
 }
 
+/// Test a unary vector function against a unary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_unary_elementwise_flush_subnormals<
+    Scalar,
+    ScalarResult,
+    Vector,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector) -> VectorResult,
+    fs: &dyn Fn(Scalar) -> ScalarResult,
+    check: &dyn Fn([Scalar; LANES]) -> bool,
+) where
+    Scalar: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    let flush = |x: Scalar| subnormals::flush(fs(subnormals::flush_in(x)));
+    test_1(&|x: [Scalar; LANES]| {
+        proptest::prop_assume!(check(x));
+        let result_v: [ScalarResult; LANES] = fv(x.into()).into();
+        let result_s: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .map(fs)
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        let result_sf: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .map(flush)
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s, result_sf);
+        Ok(())
+    });
+}
+
 /// Test a unary vector function against a unary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
@@ -204,7 +250,6 @@ pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
 }
 
 /// Test a binary vector function against a binary scalar function, applied elementwise.
-#[inline(never)]
 pub fn test_binary_elementwise<
     Scalar1,
     Scalar2,
@@ -241,6 +286,85 @@ pub fn test_binary_elementwise<
     });
 }
 
+/// Test a binary vector function against a binary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_binary_elementwise_flush_subnormals<
+    Scalar1,
+    Scalar2,
+    ScalarResult,
+    Vector1,
+    Vector2,
+    VectorResult,
+    const LANES: usize,
+>(
+    fv: &dyn Fn(Vector1, Vector2) -> VectorResult,
+    fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+    Scalar1: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Scalar2: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+    let flush = |x: Scalar1, y: Scalar2| {
+        subnormals::flush(fs(subnormals::flush_in(x), subnormals::flush_in(y)))
+    };
+    test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+        proptest::prop_assume!(check(x, y));
+        let result_v: [ScalarResult; LANES] = fv(x.into(), y.into()).into();
+        let result_s: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| fs(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        let result_sf: [ScalarResult; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| flush(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s, result_sf);
+        Ok(())
+    });
+}
+
+/// Test a unary vector function against a unary scalar function, applied elementwise.
+#[inline(never)]
+pub fn test_binary_mask_elementwise<Scalar1, Scalar2, Vector1, Vector2, Mask, const LANES: usize>(
+    fv: &dyn Fn(Vector1, Vector2) -> Mask,
+    fs: &dyn Fn(Scalar1, Scalar2) -> bool,
+    check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+    Scalar1: Copy + core::fmt::Debug + DefaultStrategy,
+    Scalar2: Copy + core::fmt::Debug + DefaultStrategy,
+    Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+    Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+    Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy,
+{
+    test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+        proptest::prop_assume!(check(x, y));
+        let result_v: [bool; LANES] = fv(x.into(), y.into()).into();
+        let result_s: [bool; LANES] = x
+            .iter()
+            .copied()
+            .zip(y.iter().copied())
+            .map(|(x, y)| fs(x, y))
+            .collect::<Vec<_>>()
+            .try_into()
+            .unwrap();
+        crate::prop_assert_biteq!(result_v, result_s);
+        Ok(())
+    });
+}
+
 /// Test a binary vector-scalar function against a binary scalar function, applied elementwise.
 #[inline(never)]
 pub fn test_binary_scalar_rhs_elementwise<
diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs
new file mode 100644
index 00000000000..ec0f1fb24b9
--- /dev/null
+++ b/crates/test_helpers/src/subnormals.rs
@@ -0,0 +1,91 @@
+pub trait FlushSubnormals: Sized {
+    fn flush(self) -> Self {
+        self
+    }
+}
+
+impl<T> FlushSubnormals for *const T {}
+impl<T> FlushSubnormals for *mut T {}
+
+macro_rules! impl_float {
+    { $($ty:ty),* } => {
+        $(
+        impl FlushSubnormals for $ty {
+            fn flush(self) -> Self {
+                let is_f32 = core::mem::size_of::<Self>() == 4;
+                let ppc_flush = is_f32 && cfg!(all(
+                    any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")),
+                    target_feature = "altivec",
+                    not(target_feature = "vsx"),
+                ));
+                let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
+                let flush = ppc_flush || arm_flush;
+                if flush && self.is_subnormal() {
+                    <$ty>::copysign(0., self)
+                } else {
+                    self
+                }
+            }
+        }
+        )*
+    }
+}
+
+macro_rules! impl_else {
+    { $($ty:ty),* } => {
+        $(
+        impl FlushSubnormals for $ty {}
+        )*
+    }
+}
+
+impl_float! { f32, f64 }
+impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
+
+/// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs.
+/// https://gitlab.com/qemu-project/qemu/-/issues/1779
+#[cfg(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+))]
+fn in_buggy_qemu() -> bool {
+    use std::sync::OnceLock;
+    static BUGGY: OnceLock<bool> = OnceLock::new();
+
+    fn add(x: f32, y: f32) -> f32 {
+        #[cfg(target_arch = "powerpc")]
+        use core::arch::powerpc::*;
+        #[cfg(target_arch = "powerpc64")]
+        use core::arch::powerpc64::*;
+
+        let array: [f32; 4] =
+            unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) };
+        array[0]
+    }
+
+    *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative())
+}
+
+#[cfg(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+    if in_buggy_qemu() {
+        x
+    } else {
+        x.flush()
+    }
+}
+
+#[cfg(not(all(
+    any(target_arch = "powerpc", target_arch = "powerpc64"),
+    target_feature = "altivec"
+)))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+    x.flush()
+}
+
+pub fn flush<T: FlushSubnormals>(x: T) -> T {
+    x.flush()
+}