Skip to content

Commit a06cb4c

Browse files
authored
Implement avx512bf16 intrinsics (#998)
1 parent 11fd33d commit a06cb4c

File tree

9 files changed

+1180
-4
lines changed

9 files changed

+1180
-4
lines changed

ci/docker/i586-unknown-linux-gnu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM ubuntu:20.04
22
RUN apt-get update && apt-get install -y --no-install-recommends \
33
gcc-multilib \
44
libc6-dev \

ci/docker/i686-unknown-linux-gnu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM ubuntu:20.04
22
RUN apt-get update && apt-get install -y --no-install-recommends \
33
gcc-multilib \
44
libc6-dev \

ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM ubuntu:20.04
22
RUN apt-get update && apt-get install -y --no-install-recommends \
33
gcc \
44
libc6-dev \
@@ -10,4 +10,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1010

1111
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
1212
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
13+
ENV SKIP_TESTS="avx512bf16"
1314
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"

ci/docker/x86_64-unknown-linux-gnu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:18.04
1+
FROM ubuntu:20.04
22
RUN apt-get update && apt-get install -y --no-install-recommends \
33
gcc \
44
libc6-dev \

ci/run.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ cargo_test() {
5656
;;
5757
esac
5858

59+
if [ "$SKIP_TESTS" != "" ]; then
60+
cmd="$cmd --skip "$SKIP_TESTS
61+
fi
5962
$cmd
6063
}
6164

crates/core_arch/src/x86/avx512bf16.rs

Lines changed: 1018 additions & 0 deletions
Large diffs are not rendered by default.

crates/core_arch/src/x86/mod.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,37 @@ types! {
296296
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
297297
/// "ps" which is used for `__m512`.
298298
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
299+
300+
/// 128-bit wide set of eight 'u16' types, x86-specific
301+
///
302+
/// This type is representing a 128-bit SIMD register which internally is consisted of
303+
/// eight packed `u16` instances. It's purpose is for bf16 related intrinsic
304+
/// implementations.
305+
pub struct __m128bh(u16, u16, u16, u16, u16, u16, u16, u16);
306+
307+
/// 256-bit wide set of 16 'u16' types, x86-specific
308+
///
309+
/// This type is the same as the `__m128bh` type defined by Intel,
310+
/// representing a 256-bit SIMD register which internally is consisted of
311+
/// 16 packed `u16` instances. It's purpose is for bf16 related intrinsic
312+
/// implementations.
313+
pub struct __m256bh(
314+
u16, u16, u16, u16, u16, u16, u16, u16,
315+
u16, u16, u16, u16, u16, u16, u16, u16
316+
);
317+
318+
/// 512-bit wide set of 32 'u16' types, x86-specific
319+
///
320+
/// This type is the same as the `__m128bh` type defined by Intel,
321+
/// representing a 512-bit SIMD register which internally is consisted of
322+
/// 32 packed `u16` instances. It's purpose is for bf16 related intrinsic
323+
/// implementations.
324+
pub struct __m512bh(
325+
u16, u16, u16, u16, u16, u16, u16, u16,
326+
u16, u16, u16, u16, u16, u16, u16, u16,
327+
u16, u16, u16, u16, u16, u16, u16, u16,
328+
u16, u16, u16, u16, u16, u16, u16, u16
329+
);
299330
}
300331

301332
/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer
@@ -602,6 +633,105 @@ impl m512dExt for __m512d {
602633
}
603634
}
604635

636+
#[allow(non_camel_case_types)]
637+
#[unstable(feature = "stdsimd_internal", issue = "none")]
638+
pub(crate) trait m128bhExt: Sized {
639+
fn as_m128bh(self) -> __m128bh;
640+
641+
#[inline]
642+
fn as_u16x8(self) -> crate::core_arch::simd::u16x8 {
643+
unsafe { transmute(self.as_m128bh()) }
644+
}
645+
646+
#[inline]
647+
fn as_i16x8(self) -> crate::core_arch::simd::i16x8 {
648+
unsafe { transmute(self.as_m128bh()) }
649+
}
650+
651+
#[inline]
652+
fn as_u32x4(self) -> crate::core_arch::simd::u32x4 {
653+
unsafe { transmute(self.as_m128bh()) }
654+
}
655+
656+
#[inline]
657+
fn as_i32x4(self) -> crate::core_arch::simd::i32x4 {
658+
unsafe { transmute(self.as_m128bh()) }
659+
}
660+
}
661+
662+
impl m128bhExt for __m128bh {
663+
#[inline]
664+
fn as_m128bh(self) -> Self {
665+
self
666+
}
667+
}
668+
669+
#[allow(non_camel_case_types)]
670+
#[unstable(feature = "stdsimd_internal", issue = "none")]
671+
pub(crate) trait m256bhExt: Sized {
672+
fn as_m256bh(self) -> __m256bh;
673+
674+
#[inline]
675+
fn as_u16x16(self) -> crate::core_arch::simd::u16x16 {
676+
unsafe { transmute(self.as_m256bh()) }
677+
}
678+
679+
#[inline]
680+
fn as_i16x16(self) -> crate::core_arch::simd::i16x16 {
681+
unsafe { transmute(self.as_m256bh()) }
682+
}
683+
684+
#[inline]
685+
fn as_u32x8(self) -> crate::core_arch::simd::u32x8 {
686+
unsafe { transmute(self.as_m256bh()) }
687+
}
688+
689+
#[inline]
690+
fn as_i32x8(self) -> crate::core_arch::simd::i32x8 {
691+
unsafe { transmute(self.as_m256bh()) }
692+
}
693+
}
694+
695+
impl m256bhExt for __m256bh {
696+
#[inline]
697+
fn as_m256bh(self) -> Self {
698+
self
699+
}
700+
}
701+
702+
#[allow(non_camel_case_types)]
703+
#[unstable(feature = "stdsimd_internal", issue = "none")]
704+
pub(crate) trait m512bhExt: Sized {
705+
fn as_m512bh(self) -> __m512bh;
706+
707+
#[inline]
708+
fn as_u16x32(self) -> crate::core_arch::simd::u16x32 {
709+
unsafe { transmute(self.as_m512bh()) }
710+
}
711+
712+
#[inline]
713+
fn as_i16x32(self) -> crate::core_arch::simd::i16x32 {
714+
unsafe { transmute(self.as_m512bh()) }
715+
}
716+
717+
#[inline]
718+
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
719+
unsafe { transmute(self.as_m512bh()) }
720+
}
721+
722+
#[inline]
723+
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
724+
unsafe { transmute(self.as_m512bh()) }
725+
}
726+
}
727+
728+
impl m512bhExt for __m512bh {
729+
#[inline]
730+
fn as_m512bh(self) -> Self {
731+
self
732+
}
733+
}
734+
605735
mod eflags;
606736
pub use self::eflags::*;
607737

@@ -725,3 +855,6 @@ pub use self::rtm::*;
725855

726856
mod f16c;
727857
pub use self::f16c::*;
858+
859+
mod avx512bf16;
860+
pub use self::avx512bf16::*;

crates/stdarch-verify/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,15 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
137137
syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
138138
// x86 ...
139139
"__m128" => quote! { &M128 },
140+
"__m128bh" => quote! { &M128BH },
140141
"__m128d" => quote! { &M128D },
141142
"__m128i" => quote! { &M128I },
142143
"__m256" => quote! { &M256 },
144+
"__m256bh" => quote! { &M256BH },
143145
"__m256d" => quote! { &M256D },
144146
"__m256i" => quote! { &M256I },
145147
"__m512" => quote! { &M512 },
148+
"__m512bh" => quote! { &M512BH },
146149
"__m512d" => quote! { &M512D },
147150
"__m512i" => quote! { &M512I },
148151
"__mmask8" => quote! { &MMASK8 },

crates/stdarch-verify/tests/x86-intel.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,15 @@ static ORDERING: Type = Type::Ordering;
4545

4646
static M64: Type = Type::M64;
4747
static M128: Type = Type::M128;
48+
static M128BH: Type = Type::M128BH;
4849
static M128I: Type = Type::M128I;
4950
static M128D: Type = Type::M128D;
5051
static M256: Type = Type::M256;
52+
static M256BH: Type = Type::M256BH;
5153
static M256I: Type = Type::M256I;
5254
static M256D: Type = Type::M256D;
5355
static M512: Type = Type::M512;
56+
static M512BH: Type = Type::M512BH;
5457
static M512I: Type = Type::M512I;
5558
static M512D: Type = Type::M512D;
5659
static MMASK8: Type = Type::MMASK8;
@@ -75,12 +78,15 @@ enum Type {
7578
ConstPtr(&'static Type),
7679
M64,
7780
M128,
81+
M128BH,
7882
M128D,
7983
M128I,
8084
M256,
85+
M256BH,
8186
M256D,
8287
M256I,
8388
M512,
89+
M512BH,
8490
M512D,
8591
M512I,
8692
MMASK8,
@@ -493,6 +499,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
493499
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
494500
// Intel's architecture manuals.
495501
"fp16c" => String::from("f16c"),
502+
"avx512_bf16" => String::from("avx512bf16"),
503+
// The XML file names VNNI as "avx512_bf16", while Rust calls
504+
// it "avx512bf16".
496505
_ => cpuid,
497506
};
498507
let fixed_cpuid = fixup_cpuid(cpuid);
@@ -693,12 +702,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
693702
(&Type::PrimUnsigned(8), "unsigned char") => {}
694703
(&Type::M64, "__m64") => {}
695704
(&Type::M128, "__m128") => {}
705+
(&Type::M128BH, "__m128bh") => {}
696706
(&Type::M128I, "__m128i") => {}
697707
(&Type::M128D, "__m128d") => {}
698708
(&Type::M256, "__m256") => {}
709+
(&Type::M256BH, "__m256bh") => {}
699710
(&Type::M256I, "__m256i") => {}
700711
(&Type::M256D, "__m256d") => {}
701712
(&Type::M512, "__m512") => {}
713+
(&Type::M512BH, "__m512bh") => {}
702714
(&Type::M512I, "__m512i") => {}
703715
(&Type::M512D, "__m512d") => {}
704716
(&Type::MMASK64, "__mmask64") => {}
@@ -726,12 +738,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
726738
(&Type::MutPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
727739
(&Type::MutPtr(&Type::M64), "__m64*") => {}
728740
(&Type::MutPtr(&Type::M128), "__m128*") => {}
741+
(&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
729742
(&Type::MutPtr(&Type::M128I), "__m128i*") => {}
730743
(&Type::MutPtr(&Type::M128D), "__m128d*") => {}
731744
(&Type::MutPtr(&Type::M256), "__m256*") => {}
745+
(&Type::MutPtr(&Type::M256BH), "__m256bh*") => {}
732746
(&Type::MutPtr(&Type::M256I), "__m256i*") => {}
733747
(&Type::MutPtr(&Type::M256D), "__m256d*") => {}
734748
(&Type::MutPtr(&Type::M512), "__m512*") => {}
749+
(&Type::MutPtr(&Type::M512BH), "__m512bh*") => {}
735750
(&Type::MutPtr(&Type::M512I), "__m512i*") => {}
736751
(&Type::MutPtr(&Type::M512D), "__m512d*") => {}
737752

@@ -754,12 +769,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
754769
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "void const*") => {}
755770
(&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
756771
(&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
772+
(&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
757773
(&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
758774
(&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
759775
(&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
776+
(&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
760777
(&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
761778
(&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
762779
(&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
780+
(&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {}
763781
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
764782
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
765783
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}

0 commit comments

Comments
 (0)