@@ -1144,18 +1144,20 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
1144
1144
/// ```
1145
1145
#[ inline]
1146
1146
#[ target_feature( enable = "sse" ) ]
1147
- // TODO: generates MOVHPD if the CPU supports SSE2.
1148
- // #[cfg_attr(test, assert_instr(movhps))]
1149
- #[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movhpd) ) ]
1150
- // 32-bit codegen does not generate `movhps` or `movhpd`, but instead
1151
- // `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
1152
1147
#[ cfg_attr(
1153
- all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1154
- assert_instr( movlhps)
1148
+ all(
1149
+ test,
1150
+ any(
1151
+ target_arch = "x86_64" ,
1152
+ all( target_arch = "x86" , target_feature = "sse2" )
1153
+ )
1154
+ ) ,
1155
+ assert_instr( movhpd)
1155
1156
) ]
1157
+ // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
1156
1158
#[ cfg_attr(
1157
1159
all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1158
- assert_instr( unpcklps )
1160
+ assert_instr( shufps )
1159
1161
) ]
1160
1162
// TODO: This function is actually not limited to floats, but that's what
1161
1163
// what matches the C type most closely: (__m128, *const __m64) -> __m128
@@ -1202,20 +1204,16 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
1202
1204
/// ```
1203
1205
#[ inline]
1204
1206
#[ target_feature( enable = "sse" ) ]
1205
- // TODO: generates MOVLPD if the CPU supports SSE2.
1206
- // #[cfg_attr(test, assert_instr(movlps))]
1207
1207
#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movlpd) ) ]
1208
- // On 32-bit targets with SSE2, it just generates two `movsd`.
1209
1208
#[ cfg_attr(
1210
1209
all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1211
- assert_instr( movsd )
1210
+ assert_instr( movlpd )
1212
1211
) ]
1213
- // It should really generate "movlps", but oh well ...
1212
+ // FIXME: On 32-bit targets without SSE2, it just generates two `movss` ...
1214
1213
#[ cfg_attr(
1215
1214
all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1216
1215
assert_instr( movss)
1217
1216
) ]
1218
- // TODO: Like _mm_loadh_pi, this also isn't limited to floats.
1219
1217
pub unsafe fn _mm_loadl_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
1220
1218
let q = p as * const f32x2 ;
1221
1219
let b: f32x2 = * q;
0 commit comments