Skip to content

Commit a0defe0

Browse files
committed
Auto merge of #3072 - eduardosm:llvm.ctpop, r=saethlin
Implement `llvm.ctpop.v*` intrinsics Tested through x86 avx512vpopcntdq and avx512bitalg functions. I picked them from rust-lang/miri#2057 (comment), which looked easy.
2 parents ce33ca0 + b075a9d commit a0defe0

File tree

3 files changed

+280
-0
lines changed

3 files changed

+280
-0
lines changed

src/tools/miri/src/shims/foreign_items.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,29 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
10321032
}
10331033
}
10341034

1035+
// Used to implement the x86 `_mm{,256,512}_popcnt_epi{8,16,32,64}` and wasm
1036+
// `{i,u}8x16_popcnt` functions.
1037+
name if name.starts_with("llvm.ctpop.v") => {
1038+
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
1039+
1040+
let (op, op_len) = this.operand_to_simd(op)?;
1041+
let (dest, dest_len) = this.place_to_simd(dest)?;
1042+
1043+
assert_eq!(dest_len, op_len);
1044+
1045+
for i in 0..dest_len {
1046+
let op = this.read_immediate(&this.project_index(&op, i)?)?;
1047+
// Use `to_uint` to get a zero-extended `u128`. Those
1048+
// extra zeros will not affect `count_ones`.
1049+
let res = op.to_scalar().to_uint(op.layout.size)?.count_ones();
1050+
1051+
this.write_scalar(
1052+
Scalar::from_uint(res, op.layout.size),
1053+
&this.project_index(&dest, i)?,
1054+
)?;
1055+
}
1056+
}
1057+
10351058
name if name.starts_with("llvm.x86.sse.") => {
10361059
return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic(
10371060
this, link_name, abi, args, dest,
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any additional target are added to CI should be ignored here
3+
//@ignore-target-aarch64
4+
//@ignore-target-arm
5+
//@ignore-target-avr
6+
//@ignore-target-s390x
7+
//@ignore-target-thumbv7em
8+
//@ignore-target-wasm32
9+
//@compile-flags: -C target-feature=+avx512bitalg,+avx512f,+avx512vl
10+
11+
#![feature(avx512_target_feature)]
12+
#![feature(stdsimd)]
13+
14+
#[cfg(target_arch = "x86")]
15+
use std::arch::x86::*;
16+
#[cfg(target_arch = "x86_64")]
17+
use std::arch::x86_64::*;
18+
use std::mem::transmute;
19+
20+
fn main() {
21+
assert!(is_x86_feature_detected!("avx512bitalg"));
22+
assert!(is_x86_feature_detected!("avx512f"));
23+
assert!(is_x86_feature_detected!("avx512vl"));
24+
25+
unsafe {
26+
test_avx512bitalg();
27+
}
28+
}
29+
30+
// Some of the constants in the tests below are just bit patterns. They should not
31+
// be interpreted as integers; signedness does not make sense for them, but
32+
// __mXXXi happens to be defined in terms of signed integers.
33+
#[allow(overflowing_literals)]
34+
#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")]
35+
unsafe fn test_avx512bitalg() {
36+
// Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
37+
38+
#[target_feature(enable = "avx512bitalg,avx512f")]
39+
unsafe fn test_mm512_popcnt_epi16() {
40+
let test_data = _mm512_set_epi16(
41+
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
42+
0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
43+
1024, 2048,
44+
);
45+
let actual_result = _mm512_popcnt_epi16(test_data);
46+
let reference_result = _mm512_set_epi16(
47+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
48+
1, 1, 1, 1, 1, 1,
49+
);
50+
assert_eq_m512i(actual_result, reference_result);
51+
}
52+
test_mm512_popcnt_epi16();
53+
54+
#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")]
55+
unsafe fn test_mm256_popcnt_epi16() {
56+
let test_data = _mm256_set_epi16(
57+
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
58+
0x3F_FF, 0x7F_FF,
59+
);
60+
let actual_result = _mm256_popcnt_epi16(test_data);
61+
let reference_result =
62+
_mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
63+
assert_eq_m256i(actual_result, reference_result);
64+
}
65+
test_mm256_popcnt_epi16();
66+
67+
#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")]
68+
unsafe fn test_mm_popcnt_epi16() {
69+
let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
70+
let actual_result = _mm_popcnt_epi16(test_data);
71+
let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
72+
assert_eq_m128i(actual_result, reference_result);
73+
}
74+
test_mm_popcnt_epi16();
75+
76+
#[target_feature(enable = "avx512bitalg,avx512f")]
77+
unsafe fn test_mm512_popcnt_epi8() {
78+
let test_data = _mm512_set_epi8(
79+
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
80+
217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
81+
140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
82+
225, 21, 249, 211, 155, 228, 70,
83+
);
84+
let actual_result = _mm512_popcnt_epi8(test_data);
85+
let reference_result = _mm512_set_epi8(
86+
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
87+
2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
88+
3, 6, 5, 5, 4, 3,
89+
);
90+
assert_eq_m512i(actual_result, reference_result);
91+
}
92+
test_mm512_popcnt_epi8();
93+
94+
#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")]
95+
unsafe fn test_mm256_popcnt_epi8() {
96+
let test_data = _mm256_set_epi8(
97+
0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
98+
217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
99+
);
100+
let actual_result = _mm256_popcnt_epi8(test_data);
101+
let reference_result = _mm256_set_epi8(
102+
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
103+
2, 4, 4,
104+
);
105+
assert_eq_m256i(actual_result, reference_result);
106+
}
107+
test_mm256_popcnt_epi8();
108+
109+
#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")]
110+
unsafe fn test_mm_popcnt_epi8() {
111+
let test_data =
112+
_mm_set_epi8(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64);
113+
let actual_result = _mm_popcnt_epi8(test_data);
114+
let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
115+
assert_eq_m128i(actual_result, reference_result);
116+
}
117+
test_mm_popcnt_epi8();
118+
}
119+
120+
#[track_caller]
121+
unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
122+
assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b))
123+
}
124+
125+
#[track_caller]
126+
unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
127+
assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
128+
}
129+
130+
#[track_caller]
131+
unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
132+
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
133+
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any additional target are added to CI should be ignored here
3+
//@ignore-target-aarch64
4+
//@ignore-target-arm
5+
//@ignore-target-avr
6+
//@ignore-target-s390x
7+
//@ignore-target-thumbv7em
8+
//@ignore-target-wasm32
9+
//@compile-flags: -C target-feature=+avx512vpopcntdq,+avx512f,+avx512vl
10+
11+
#![feature(avx512_target_feature)]
12+
#![feature(stdsimd)]
13+
14+
#[cfg(target_arch = "x86")]
15+
use std::arch::x86::*;
16+
#[cfg(target_arch = "x86_64")]
17+
use std::arch::x86_64::*;
18+
use std::mem::transmute;
19+
20+
fn main() {
21+
assert!(is_x86_feature_detected!("avx512vpopcntdq"));
22+
assert!(is_x86_feature_detected!("avx512f"));
23+
assert!(is_x86_feature_detected!("avx512vl"));
24+
25+
unsafe {
26+
test_avx512vpopcntdq();
27+
}
28+
}
29+
30+
#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")]
31+
unsafe fn test_avx512vpopcntdq() {
32+
// Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
33+
34+
#[target_feature(enable = "avx512vpopcntdq,avx512f")]
35+
unsafe fn test_mm512_popcnt_epi32() {
36+
let test_data = _mm512_set_epi32(
37+
0,
38+
1,
39+
-1,
40+
2,
41+
7,
42+
0xFF_FE,
43+
0x7F_FF_FF_FF,
44+
-100,
45+
0x40_00_00_00,
46+
103,
47+
371,
48+
552,
49+
432_948,
50+
818_826_998,
51+
255,
52+
256,
53+
);
54+
let actual_result = _mm512_popcnt_epi32(test_data);
55+
let reference_result =
56+
_mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1);
57+
assert_eq_m512i(actual_result, reference_result);
58+
}
59+
test_mm512_popcnt_epi32();
60+
61+
#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")]
62+
unsafe fn test_mm256_popcnt_epi32() {
63+
let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
64+
let actual_result = _mm256_popcnt_epi32(test_data);
65+
let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
66+
assert_eq_m256i(actual_result, reference_result);
67+
}
68+
test_mm256_popcnt_epi32();
69+
70+
#[target_feature(enable = "avx512vpopcntdq,avx512f,avx512vl")]
71+
unsafe fn test_mm_popcnt_epi32() {
72+
let test_data = _mm_set_epi32(0, 1, -1, -100);
73+
let actual_result = _mm_popcnt_epi32(test_data);
74+
let reference_result = _mm_set_epi32(0, 1, 32, 28);
75+
assert_eq_m128i(actual_result, reference_result);
76+
}
77+
test_mm_popcnt_epi32();
78+
79+
#[target_feature(enable = "avx512vpopcntdq,avx512f")]
80+
unsafe fn test_mm512_popcnt_epi64() {
81+
let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
82+
let actual_result = _mm512_popcnt_epi64(test_data);
83+
let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
84+
assert_eq_m512i(actual_result, reference_result);
85+
}
86+
test_mm512_popcnt_epi64();
87+
88+
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
89+
unsafe fn test_mm256_popcnt_epi64() {
90+
let test_data = _mm256_set_epi64x(0, 1, -1, -100);
91+
let actual_result = _mm256_popcnt_epi64(test_data);
92+
let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
93+
assert_eq_m256i(actual_result, reference_result);
94+
}
95+
test_mm256_popcnt_epi64();
96+
97+
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
98+
unsafe fn test_mm_popcnt_epi64() {
99+
let test_data = _mm_set_epi64x(0, 1);
100+
let actual_result = _mm_popcnt_epi64(test_data);
101+
let reference_result = _mm_set_epi64x(0, 1);
102+
assert_eq_m128i(actual_result, reference_result);
103+
let test_data = _mm_set_epi64x(-1, -100);
104+
let actual_result = _mm_popcnt_epi64(test_data);
105+
let reference_result = _mm_set_epi64x(64, 60);
106+
assert_eq_m128i(actual_result, reference_result);
107+
}
108+
test_mm_popcnt_epi64();
109+
}
110+
111+
#[track_caller]
112+
unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
113+
assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b))
114+
}
115+
116+
#[track_caller]
117+
unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
118+
assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
119+
}
120+
121+
#[track_caller]
122+
unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
123+
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
124+
}

0 commit comments

Comments
 (0)