Skip to content

Commit f0b4f26

Browse files
committed
Optimize masking by always calculating it in i8 space
1 parent 34e54b4 commit f0b4f26

File tree

1 file changed

+8
-12
lines changed

1 file changed

+8
-12
lines changed

crates/core_simd/src/vector.rs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ where
324324
+ core::ops::Add<<T as SimdElement>::Mask, Output = <T as SimdElement>::Mask>,
325325
Simd<<T as SimdElement>::Mask, N>: SimdPartialOrd,
326326
Mask<<T as SimdElement>::Mask, N>: core::ops::BitAnd<Output = Mask<<T as SimdElement>::Mask, N>>
327-
+ core::convert::From<<Simd<<T as SimdElement>::Mask, N> as SimdPartialEq>::Mask>,
327+
+ core::convert::From<Mask<i8, N>>,
328328
{
329329
Self::load_or(slice, Default::default())
330330
}
@@ -339,7 +339,7 @@ where
339339
+ core::ops::Add<<T as SimdElement>::Mask, Output = <T as SimdElement>::Mask>,
340340
Simd<<T as SimdElement>::Mask, N>: SimdPartialOrd,
341341
Mask<<T as SimdElement>::Mask, N>: core::ops::BitAnd<Output = Mask<<T as SimdElement>::Mask, N>>
342-
+ core::convert::From<<Simd<<T as SimdElement>::Mask, N> as SimdPartialEq>::Mask>,
342+
+ core::convert::From<Mask<i8, N>>,
343343
{
344344
Self::load_select(slice, Mask::splat(true), or)
345345
}
@@ -355,7 +355,7 @@ where
355355
+ core::ops::Add<<T as SimdElement>::Mask, Output = <T as SimdElement>::Mask>,
356356
Simd<<T as SimdElement>::Mask, N>: SimdPartialOrd,
357357
Mask<<T as SimdElement>::Mask, N>: core::ops::BitAnd<Output = Mask<<T as SimdElement>::Mask, N>>
358-
+ core::convert::From<<Simd<<T as SimdElement>::Mask, N> as SimdPartialEq>::Mask>,
358+
+ core::convert::From<Mask<i8, N>>,
359359
{
360360
Self::load_select(slice, enable, Default::default())
361361
}
@@ -370,7 +370,7 @@ where
370370
+ core::ops::Add<<T as SimdElement>::Mask, Output = <T as SimdElement>::Mask>,
371371
Simd<<T as SimdElement>::Mask, N>: SimdPartialOrd,
372372
Mask<<T as SimdElement>::Mask, N>: core::ops::BitAnd<Output = Mask<<T as SimdElement>::Mask, N>>
373-
+ core::convert::From<<Simd<<T as SimdElement>::Mask, N> as SimdPartialEq>::Mask>,
373+
+ core::convert::From<Mask<i8, N>>,
374374
{
375375
if USE_BRANCH {
376376
if core::intrinsics::likely(enable.all() && slice.len() > N) {
@@ -599,7 +599,7 @@ where
599599
+ core::ops::Add<<T as SimdElement>::Mask, Output = <T as SimdElement>::Mask>,
600600
Simd<<T as SimdElement>::Mask, N>: SimdPartialOrd,
601601
Mask<<T as SimdElement>::Mask, N>: core::ops::BitAnd<Output = Mask<<T as SimdElement>::Mask, N>>
602-
+ core::convert::From<<Simd<<T as SimdElement>::Mask, N> as SimdPartialEq>::Mask>,
602+
+ core::convert::From<Mask<i8, N>>,
603603
{
604604
if USE_BRANCH {
605605
if core::intrinsics::likely(enable.all() && slice.len() > N) {
@@ -1143,14 +1143,10 @@ where
11431143
M: MaskElement + Default + core::convert::From<i8> + core::ops::Add<M, Output = M>,
11441144
Simd<M, N>: SimdPartialOrd,
11451145
// <Simd<M, N> as SimdPartialEq>::Mask: Mask<M, N>,
1146-
Mask<M, N>: core::ops::BitAnd<Output = Mask<M, N>>
1147-
+ core::convert::From<<Simd<M, N> as SimdPartialEq>::Mask>,
1146+
Mask<M, N>: core::ops::BitAnd<Output = Mask<M, N>> + core::convert::From<Mask<i8, N>>,
11481147
{
1149-
let index = index::<M, N>();
1150-
enable
1151-
& Mask::<M, N>::from(
1152-
index.simd_lt(Simd::splat(M::from(i8::try_from(len).unwrap_or(i8::MAX)))),
1153-
)
1148+
let index = index::<i8, N>();
1149+
enable & Mask::<M, N>::from(index.simd_lt(Simd::splat(i8::try_from(len).unwrap_or(i8::MAX))))
11541150
}
11551151

11561152
// This function matches the semantics of the `bzhi` instruction on x86 BMI2

0 commit comments

Comments
 (0)