From 12b1d75de6762ee208b25bc7b4f6311b5cba3683 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 01:56:14 +0000 Subject: [PATCH 1/2] Replace calls to `core::arch` intrinsics with assembly Some backends may replace calls to `core::arch` with multiple calls to `sqrt` [1], which becomes recursive. Help mitigate this by replacing the call with assembly. Results in the same assembly as the current implementation when built with optimizations. [1]: https://github.com/rust-lang/compiler-builtins/issues/649 --- src/math/arch/i686.rs | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/math/arch/i686.rs b/src/math/arch/i686.rs index ad54d8b61..3e1d19bfa 100644 --- a/src/math/arch/i686.rs +++ b/src/math/arch/i686.rs @@ -1,22 +1,27 @@ //! Architecture-specific support for x86-32 and x86-64 with SSE2 -#[cfg(target_arch = "x86")] -use core::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use core::arch::x86_64::*; - -pub fn sqrtf(x: f32) -> f32 { +pub fn sqrtf(mut x: f32) -> f32 { + // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } + core::arch::asm!( + "sqrtss {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x } -pub fn sqrt(x: f64) -> f64 { +pub fn sqrt(mut x: f64) -> f64 { + // SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory + // access or side effects. unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } + core::arch::asm!( + "sqrtsd {x}, {x}", + x = inout(xmm_reg) x, + options(nostack, nomem, pure), + ) + }; + x } From e222b2700e44b181aca7f2617b064556143c2360 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 9 Apr 2025 02:22:15 +0000 Subject: [PATCH 2/2] Resolve small errors identified by recent clippy --- crates/libm-test/src/precision.rs | 1 + src/math/support/env.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs index 8916b43ab..f5fb5f670 100644 --- a/crates/libm-test/src/precision.rs +++ b/crates/libm-test/src/precision.rs @@ -13,6 +13,7 @@ use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; pub struct SpecialCase; /// ULP allowed to differ from the results returned by a test basis. +#[allow(clippy::single_match)] pub fn default_ulp(ctx: &CheckCtx) -> u32 { // ULP compared to the infinite (MPFR) result. let mut ulp = match ctx.base_name { diff --git a/src/math/support/env.rs b/src/math/support/env.rs index 7244381da..c05890d98 100644 --- a/src/math/support/env.rs +++ b/src/math/support/env.rs @@ -70,7 +70,6 @@ impl Status { /// The default result for division is +/-inf based on operand sign. For `logB`, the default /// result is -inf. /// `x / y` when `x != 0.0` and `y == 0.0`, - #[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))] pub const DIVIDE_BY_ZERO: Self = Self(1 << 2);