Skip to content

Commit f97eda0

Browse files
committed
Update x86 codegen
1 parent fed2a62 commit f97eda0

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

coresimd/macros.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Utility macros.
22
3+
#[allow(unused)]
34
macro_rules! constify_imm8 {
45
($imm8:expr, $expand:ident) => {
56
#[allow(overflowing_literals)]

coresimd/x86/sse.rs

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,18 +1144,20 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
11441144
/// ```
11451145
#[inline]
11461146
#[target_feature(enable = "sse")]
1147-
// TODO: generates MOVHPD if the CPU supports SSE2.
1148-
// #[cfg_attr(test, assert_instr(movhps))]
1149-
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movhpd))]
1150-
// 32-bit codegen does not generate `movhps` or `movhpd`, but instead
1151-
// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
11521147
#[cfg_attr(
1153-
all(test, target_arch = "x86", target_feature = "sse2"),
1154-
assert_instr(movlhps)
1148+
all(
1149+
test,
1150+
any(
1151+
target_arch = "x86_64",
1152+
all(target_arch = "x86", target_feature = "sse2")
1153+
)
1154+
),
1155+
assert_instr(movhpd)
11551156
)]
1157+
// FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
11561158
#[cfg_attr(
11571159
all(test, target_arch = "x86", not(target_feature = "sse2")),
1158-
assert_instr(unpcklps)
1160+
assert_instr(shufps)
11591161
)]
11601162
// TODO: This function is actually not limited to floats, but that's what
11611163
// what matches the C type most closely: (__m128, *const __m64) -> __m128
@@ -1202,20 +1204,16 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
12021204
/// ```
12031205
#[inline]
12041206
#[target_feature(enable = "sse")]
1205-
// TODO: generates MOVLPD if the CPU supports SSE2.
1206-
// #[cfg_attr(test, assert_instr(movlps))]
12071207
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))]
1208-
// On 32-bit targets with SSE2, it just generates two `movsd`.
12091208
#[cfg_attr(
12101209
all(test, target_arch = "x86", target_feature = "sse2"),
1211-
assert_instr(movsd)
1210+
assert_instr(movlpd)
12121211
)]
1213-
// It should really generate "movlps", but oh well...
1212+
// FIXME: On 32-bit targets without SSE2, it just generates two `movss`...
12141213
#[cfg_attr(
12151214
all(test, target_arch = "x86", not(target_feature = "sse2")),
12161215
assert_instr(movss)
12171216
)]
1218-
// TODO: Like _mm_loadh_pi, this also isn't limited to floats.
12191217
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
12201218
let q = p as *const f32x2;
12211219
let b: f32x2 = *q;

0 commit comments

Comments
 (0)