@@ -1098,7 +1098,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
1098
1098
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps)
1099
1099
#[ inline]
1100
1100
#[ target_feature( enable = "sse" ) ]
1101
- #[ cfg_attr( test, assert_instr( movmskps) ) ]
1101
+ // FIXME: LLVM9 trunk has the following bug:
1102
+ // https://github.com/rust-lang/stdarch/issues/794
1103
+ // so we only temporarily test this on i686 and x86_64 but not on i586:
1104
+ #[ cfg_attr( all( test, target_feature = "sse2" ) , assert_instr( movmskps) ) ]
1102
1105
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1103
1106
pub unsafe fn _mm_movemask_ps ( a : __m128 ) -> i32 {
1104
1107
movmskps ( a)
@@ -1109,21 +1112,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
1109
1112
/// from `a`.
1110
1113
#[ inline]
1111
1114
#[ target_feature( enable = "sse" ) ]
1112
- #[ cfg_attr(
1113
- all(
1114
- test,
1115
- any(
1116
- target_arch = "x86_64" ,
1117
- all( target_arch = "x86" , target_feature = "sse2" )
1118
- )
1119
- ) ,
1120
- assert_instr( movhps)
1121
- ) ]
1122
- // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
1123
- #[ cfg_attr(
1124
- all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1125
- assert_instr( shufps)
1126
- ) ]
1115
+ #[ cfg_attr( test, assert_instr( movhps) ) ]
1127
1116
// TODO: this function is actually not limited to floats, but that's what
1128
1117
// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
1129
1118
pub unsafe fn _mm_loadh_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
@@ -1138,15 +1127,7 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
1138
1127
#[ inline]
1139
1128
#[ target_feature( enable = "sse" ) ]
1140
1129
#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movlps) ) ]
1141
- #[ cfg_attr(
1142
- all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1143
- assert_instr( movlps)
1144
- ) ]
1145
- // FIXME: On 32-bit targets without SSE2, it just generates two `movss`...
1146
- #[ cfg_attr(
1147
- all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1148
- assert_instr( movss)
1149
- ) ]
1130
+ #[ cfg_attr( test, assert_instr( movlps) ) ]
1150
1131
pub unsafe fn _mm_loadl_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
1151
1132
let q = p as * const f32x2 ;
1152
1133
let b: f32x2 = * q;
0 commit comments