Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 14c7066

Browse files
authored
Merge pull request #344 from tgross35/select-implementation
Introduce a `select_implementation` macro
2 parents 86a9a75 + 07a52ff commit 14c7066

15 files changed

+178
-117
lines changed

src/math/arch/intrinsics.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// Config is needed for times when this module is available but we don't call everything
2+
#![allow(dead_code)]
3+
4+
pub fn ceil(x: f64) -> f64 {
5+
// SAFETY: safe intrinsic with no preconditions
6+
unsafe { core::intrinsics::ceilf64(x) }
7+
}
8+
9+
pub fn ceilf(x: f32) -> f32 {
10+
// SAFETY: safe intrinsic with no preconditions
11+
unsafe { core::intrinsics::ceilf32(x) }
12+
}
13+
14+
pub fn fabs(x: f64) -> f64 {
15+
// SAFETY: safe intrinsic with no preconditions
16+
unsafe { core::intrinsics::fabsf64(x) }
17+
}
18+
19+
pub fn fabsf(x: f32) -> f32 {
20+
// SAFETY: safe intrinsic with no preconditions
21+
unsafe { core::intrinsics::fabsf32(x) }
22+
}
23+
24+
pub fn floor(x: f64) -> f64 {
25+
// SAFETY: safe intrinsic with no preconditions
26+
unsafe { core::intrinsics::floorf64(x) }
27+
}
28+
29+
pub fn floorf(x: f32) -> f32 {
30+
// SAFETY: safe intrinsic with no preconditions
31+
unsafe { core::intrinsics::floorf32(x) }
32+
}
33+
34+
pub fn sqrt(x: f64) -> f64 {
35+
// SAFETY: safe intrinsic with no preconditions
36+
unsafe { core::intrinsics::sqrtf64(x) }
37+
}
38+
39+
pub fn sqrtf(x: f32) -> f32 {
40+
// SAFETY: safe intrinsic with no preconditions
41+
unsafe { core::intrinsics::sqrtf32(x) }
42+
}
43+
44+
pub fn trunc(x: f64) -> f64 {
45+
// SAFETY: safe intrinsic with no preconditions
46+
unsafe { core::intrinsics::truncf64(x) }
47+
}
48+
49+
pub fn truncf(x: f32) -> f32 {
50+
// SAFETY: safe intrinsic with no preconditions
51+
unsafe { core::intrinsics::truncf32(x) }
52+
}

src/math/arch/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
//! Architecture-specific routines and operations.
2+
//!
3+
//! LLVM will already optimize calls to some of these in cases that there are hardware
4+
//! instructions. Providing an implementation here just ensures that the faster implementation
5+
//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
6+
//! well as improving things when these routines are called as part of other implementations.
7+
8+
#[cfg(intrinsics_enabled)]
9+
pub mod intrinsics;

src/math/ceil.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
88
/// Finds the nearest integer greater than or equal to `x`.
99
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
1010
pub fn ceil(x: f64) -> f64 {
11-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
12-
// `f64.ceil` native instruction, so we can leverage this for both code size
13-
// and speed.
14-
llvm_intrinsically_optimized! {
15-
#[cfg(target_arch = "wasm32")] {
16-
return unsafe { ::core::intrinsics::ceilf64(x) }
17-
}
11+
select_implementation! {
12+
name: ceil,
13+
use_intrinsic: target_arch = "wasm32",
14+
args: x,
1815
}
16+
1917
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
2018
{
2119
//use an alternative implementation on x86, because the

src/math/ceilf.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@ use core::f32;
55
/// Finds the nearest integer greater than or equal to `x`.
66
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
77
pub fn ceilf(x: f32) -> f32 {
8-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
9-
// `f32.ceil` native instruction, so we can leverage this for both code size
10-
// and speed.
11-
llvm_intrinsically_optimized! {
12-
#[cfg(target_arch = "wasm32")] {
13-
return unsafe { ::core::intrinsics::ceilf32(x) }
14-
}
8+
select_implementation! {
9+
name: ceilf,
10+
use_intrinsic: target_arch = "wasm32",
11+
args: x,
1512
}
13+
1614
let mut ui = x.to_bits();
1715
let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
1816

src/math/fabs.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@ use core::u64;
55
/// by direct manipulation of the bit representation of `x`.
66
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
77
pub fn fabs(x: f64) -> f64 {
8-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
9-
// `f64.abs` native instruction, so we can leverage this for both code size
10-
// and speed.
11-
llvm_intrinsically_optimized! {
12-
#[cfg(target_arch = "wasm32")] {
13-
return unsafe { ::core::intrinsics::fabsf64(x) }
14-
}
8+
select_implementation! {
9+
name: fabs,
10+
use_intrinsic: target_arch = "wasm32",
11+
args: x,
1512
}
13+
1614
f64::from_bits(x.to_bits() & (u64::MAX / 2))
1715
}
1816

src/math/fabsf.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
/// by direct manipulation of the bit representation of `x`.
44
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
55
pub fn fabsf(x: f32) -> f32 {
6-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
7-
// `f32.abs` native instruction, so we can leverage this for both code size
8-
// and speed.
9-
llvm_intrinsically_optimized! {
10-
#[cfg(target_arch = "wasm32")] {
11-
return unsafe { ::core::intrinsics::fabsf32(x) }
12-
}
6+
select_implementation! {
7+
name: fabsf,
8+
use_intrinsic: target_arch = "wasm32",
9+
args: x,
1310
}
11+
1412
f32::from_bits(x.to_bits() & 0x7fffffff)
1513
}
1614

src/math/floor.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
88
/// Finds the nearest integer less than or equal to `x`.
99
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
1010
pub fn floor(x: f64) -> f64 {
11-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
12-
// `f64.floor` native instruction, so we can leverage this for both code size
13-
// and speed.
14-
llvm_intrinsically_optimized! {
15-
#[cfg(target_arch = "wasm32")] {
16-
return unsafe { ::core::intrinsics::floorf64(x) }
17-
}
11+
select_implementation! {
12+
name: floor,
13+
use_intrinsic: target_arch = "wasm32",
14+
args: x,
1815
}
16+
1917
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
2018
{
2119
//use an alternative implementation on x86, because the

src/math/floorf.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@ use core::f32;
55
/// Finds the nearest integer less than or equal to `x`.
66
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
77
pub fn floorf(x: f32) -> f32 {
8-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
9-
// `f32.floor` native instruction, so we can leverage this for both code size
10-
// and speed.
11-
llvm_intrinsically_optimized! {
12-
#[cfg(target_arch = "wasm32")] {
13-
return unsafe { ::core::intrinsics::floorf32(x) }
14-
}
8+
select_implementation! {
9+
name: floorf,
10+
use_intrinsic: target_arch = "wasm32",
11+
args: x,
1512
}
13+
1614
let mut ui = x.to_bits();
1715
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
1816

src/math/mod.rs

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,37 @@ macro_rules! div {
7474
};
7575
}
7676

77-
macro_rules! llvm_intrinsically_optimized {
78-
(#[cfg($($clause:tt)*)] $e:expr) => {
79-
#[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
80-
{
81-
if true { // thwart the dead code lint
82-
$e
83-
}
84-
}
85-
};
86-
}
77+
// Private modules
78+
#[macro_use]
79+
mod support;
80+
mod arch;
81+
mod expo2;
82+
mod fenv;
83+
mod k_cos;
84+
mod k_cosf;
85+
mod k_expo2;
86+
mod k_expo2f;
87+
mod k_sin;
88+
mod k_sinf;
89+
mod k_tan;
90+
mod k_tanf;
91+
mod rem_pio2;
92+
mod rem_pio2_large;
93+
mod rem_pio2f;
94+
95+
// Private re-imports
96+
use self::expo2::expo2;
97+
use self::k_cos::k_cos;
98+
use self::k_cosf::k_cosf;
99+
use self::k_expo2::k_expo2;
100+
use self::k_expo2f::k_expo2f;
101+
use self::k_sin::k_sin;
102+
use self::k_sinf::k_sinf;
103+
use self::k_tan::k_tan;
104+
use self::k_tanf::k_tanf;
105+
use self::rem_pio2::rem_pio2;
106+
use self::rem_pio2_large::rem_pio2_large;
107+
use self::rem_pio2f::rem_pio2f;
87108

88109
// Public modules
89110
mod acos;
@@ -301,35 +322,6 @@ pub use self::tgammaf::tgammaf;
301322
pub use self::trunc::trunc;
302323
pub use self::truncf::truncf;
303324

304-
// Private modules
305-
mod expo2;
306-
mod fenv;
307-
mod k_cos;
308-
mod k_cosf;
309-
mod k_expo2;
310-
mod k_expo2f;
311-
mod k_sin;
312-
mod k_sinf;
313-
mod k_tan;
314-
mod k_tanf;
315-
mod rem_pio2;
316-
mod rem_pio2_large;
317-
mod rem_pio2f;
318-
319-
// Private re-imports
320-
use self::expo2::expo2;
321-
use self::k_cos::k_cos;
322-
use self::k_cosf::k_cosf;
323-
use self::k_expo2::k_expo2;
324-
use self::k_expo2f::k_expo2f;
325-
use self::k_sin::k_sin;
326-
use self::k_sinf::k_sinf;
327-
use self::k_tan::k_tan;
328-
use self::k_tanf::k_tanf;
329-
use self::rem_pio2::rem_pio2;
330-
use self::rem_pio2_large::rem_pio2_large;
331-
use self::rem_pio2f::rem_pio2f;
332-
333325
#[inline]
334326
fn get_high_word(x: f64) -> u32 {
335327
(x.to_bits() >> 32) as u32

src/math/sqrt.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,18 +81,12 @@ use core::f64;
8181
/// The square root of `x` (f64).
8282
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
8383
pub fn sqrt(x: f64) -> f64 {
84-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
85-
// `f64.sqrt` native instruction, so we can leverage this for both code size
86-
// and speed.
87-
llvm_intrinsically_optimized! {
88-
#[cfg(target_arch = "wasm32")] {
89-
return if x < 0.0 {
90-
f64::NAN
91-
} else {
92-
unsafe { ::core::intrinsics::sqrtf64(x) }
93-
}
94-
}
84+
select_implementation! {
85+
name: sqrt,
86+
use_intrinsic: target_arch = "wasm32",
87+
args: x,
9588
}
89+
9690
#[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
9791
{
9892
// Note: This path is unlikely since LLVM will usually have already

src/math/sqrtf.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,12 @@
1616
/// The square root of `x` (f32).
1717
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
1818
pub fn sqrtf(x: f32) -> f32 {
19-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
20-
// `f32.sqrt` native instruction, so we can leverage this for both code size
21-
// and speed.
22-
llvm_intrinsically_optimized! {
23-
#[cfg(target_arch = "wasm32")] {
24-
return if x < 0.0 {
25-
::core::f32::NAN
26-
} else {
27-
unsafe { ::core::intrinsics::sqrtf32(x) }
28-
}
29-
}
19+
select_implementation! {
20+
name: sqrtf,
21+
use_intrinsic: target_arch = "wasm32",
22+
args: x,
3023
}
24+
3125
#[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
3226
{
3327
// Note: This path is unlikely since LLVM will usually have already

src/math/support/macros.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/// Choose among using an intrinsic (if available) and falling back to the default function body.
2+
/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
3+
/// function.
4+
///
5+
/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
6+
/// in `intrinsic_available`.
7+
///
8+
/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
9+
/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
10+
macro_rules! select_implementation {
11+
(
12+
name: $fname:ident,
13+
// Configuration meta for when to call intrinsics and let LLVM figure it out
14+
$( use_intrinsic: $use_intrinsic:meta, )?
15+
args: $($arg:ident),+ ,
16+
) => {
17+
// FIXME: these use paths that are a pretty fragile (`super`). We should figure out
18+
// something better w.r.t. how this is vendored into compiler-builtins.
19+
20+
// Never use intrinsics if we are forcing soft floats, and only enable with the
21+
// `unstable-intrinsics` feature.
22+
#[cfg(intrinsics_enabled)]
23+
select_implementation! {
24+
@cfg $( $use_intrinsic )?;
25+
if true {
26+
return super::arch::intrinsics::$fname( $($arg),+ );
27+
}
28+
}
29+
};
30+
31+
// Coalesce helper to construct an expression only if a config is provided
32+
(@cfg ; $ex:expr) => { };
33+
(@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
34+
}

src/math/support/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#[macro_use]
2+
pub mod macros;

src/math/trunc.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@ use core::f64;
22

33
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
44
pub fn trunc(x: f64) -> f64 {
5-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6-
// `f64.trunc` native instruction, so we can leverage this for both code size
7-
// and speed.
8-
llvm_intrinsically_optimized! {
9-
#[cfg(target_arch = "wasm32")] {
10-
return unsafe { ::core::intrinsics::truncf64(x) }
11-
}
5+
select_implementation! {
6+
name: trunc,
7+
use_intrinsic: target_arch = "wasm32",
8+
args: x,
129
}
10+
1311
let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
1412

1513
let mut i: u64 = x.to_bits();

src/math/truncf.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@ use core::f32;
22

33
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
44
pub fn truncf(x: f32) -> f32 {
5-
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6-
// `f32.trunc` native instruction, so we can leverage this for both code size
7-
// and speed.
8-
llvm_intrinsically_optimized! {
9-
#[cfg(target_arch = "wasm32")] {
10-
return unsafe { ::core::intrinsics::truncf32(x) }
11-
}
5+
select_implementation! {
6+
name: truncf,
7+
use_intrinsic: target_arch = "wasm32",
8+
args: x,
129
}
10+
1311
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
1412

1513
let mut i: u32 = x.to_bits();

0 commit comments

Comments
 (0)