|
| 1 | +// Ignore everything except x86 and x86_64 |
| 2 | +// Any additional target are added to CI should be ignored here |
| 3 | +//@ignore-target-aarch64 |
| 4 | +//@ignore-target-arm |
| 5 | +//@ignore-target-avr |
| 6 | +//@ignore-target-s390x |
| 7 | +//@ignore-target-thumbv7em |
| 8 | +//@ignore-target-wasm32 |
| 9 | +//@compile-flags: -C target-feature=+avx512bitalg,+avx512f,+avx512vl |
| 10 | + |
| 11 | +#![feature(avx512_target_feature)] |
| 12 | +#![feature(stdsimd)] |
| 13 | + |
| 14 | +#[cfg(target_arch = "x86")] |
| 15 | +use std::arch::x86::*; |
| 16 | +#[cfg(target_arch = "x86_64")] |
| 17 | +use std::arch::x86_64::*; |
| 18 | +use std::mem::transmute; |
| 19 | + |
| 20 | +fn main() { |
| 21 | + assert!(is_x86_feature_detected!("avx512bitalg")); |
| 22 | + assert!(is_x86_feature_detected!("avx512f")); |
| 23 | + assert!(is_x86_feature_detected!("avx512vl")); |
| 24 | + |
| 25 | + unsafe { |
| 26 | + test_avx512bitalg(); |
| 27 | + } |
| 28 | +} |
| 29 | + |
| 30 | +// Some of the constants in the tests below are just bit patterns. They should not |
| 31 | +// be interpreted as integers; signedness does not make sense for them, but |
| 32 | +// __mXXXi happens to be defined in terms of signed integers. |
| 33 | +#[allow(overflowing_literals)] |
| 34 | +#[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] |
| 35 | +unsafe fn test_avx512bitalg() { |
| 36 | + // Mostly copied from library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs |
| 37 | + |
| 38 | + #[target_feature(enable = "avx512bitalg,avx512f")] |
| 39 | + unsafe fn test_mm512_popcnt_epi16() { |
| 40 | + let test_data = _mm512_set_epi16( |
| 41 | + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
| 42 | + 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, |
| 43 | + 1024, 2048, |
| 44 | + ); |
| 45 | + let actual_result = _mm512_popcnt_epi16(test_data); |
| 46 | + let reference_result = _mm512_set_epi16( |
| 47 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1, |
| 48 | + 1, 1, 1, 1, 1, 1, |
| 49 | + ); |
| 50 | + assert_eq_m512i(actual_result, reference_result); |
| 51 | + } |
| 52 | + test_mm512_popcnt_epi16(); |
| 53 | + |
| 54 | + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] |
| 55 | + unsafe fn test_mm256_popcnt_epi16() { |
| 56 | + let test_data = _mm256_set_epi16( |
| 57 | + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
| 58 | + 0x3F_FF, 0x7F_FF, |
| 59 | + ); |
| 60 | + let actual_result = _mm256_popcnt_epi16(test_data); |
| 61 | + let reference_result = |
| 62 | + _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| 63 | + assert_eq_m256i(actual_result, reference_result); |
| 64 | + } |
| 65 | + test_mm256_popcnt_epi16(); |
| 66 | + |
| 67 | + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] |
| 68 | + unsafe fn test_mm_popcnt_epi16() { |
| 69 | + let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); |
| 70 | + let actual_result = _mm_popcnt_epi16(test_data); |
| 71 | + let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| 72 | + assert_eq_m128i(actual_result, reference_result); |
| 73 | + } |
| 74 | + test_mm_popcnt_epi16(); |
| 75 | + |
| 76 | + #[target_feature(enable = "avx512bitalg,avx512f")] |
| 77 | + unsafe fn test_mm512_popcnt_epi8() { |
| 78 | + let test_data = _mm512_set_epi8( |
| 79 | + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
| 80 | + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, |
| 81 | + 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, |
| 82 | + 225, 21, 249, 211, 155, 228, 70, |
| 83 | + ); |
| 84 | + let actual_result = _mm512_popcnt_epi8(test_data); |
| 85 | + let reference_result = _mm512_set_epi8( |
| 86 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
| 87 | + 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4, |
| 88 | + 3, 6, 5, 5, 4, 3, |
| 89 | + ); |
| 90 | + assert_eq_m512i(actual_result, reference_result); |
| 91 | + } |
| 92 | + test_mm512_popcnt_epi8(); |
| 93 | + |
| 94 | + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] |
| 95 | + unsafe fn test_mm256_popcnt_epi8() { |
| 96 | + let test_data = _mm256_set_epi8( |
| 97 | + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
| 98 | + 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, |
| 99 | + ); |
| 100 | + let actual_result = _mm256_popcnt_epi8(test_data); |
| 101 | + let reference_result = _mm256_set_epi8( |
| 102 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
| 103 | + 2, 4, 4, |
| 104 | + ); |
| 105 | + assert_eq_m256i(actual_result, reference_result); |
| 106 | + } |
| 107 | + test_mm256_popcnt_epi8(); |
| 108 | + |
| 109 | + #[target_feature(enable = "avx512bitalg,avx512f,avx512vl")] |
| 110 | + unsafe fn test_mm_popcnt_epi8() { |
| 111 | + let test_data = |
| 112 | + _mm_set_epi8(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64); |
| 113 | + let actual_result = _mm_popcnt_epi8(test_data); |
| 114 | + let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1); |
| 115 | + assert_eq_m128i(actual_result, reference_result); |
| 116 | + } |
| 117 | + test_mm_popcnt_epi8(); |
| 118 | +} |
| 119 | + |
| 120 | +#[track_caller] |
| 121 | +unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { |
| 122 | + assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b)) |
| 123 | +} |
| 124 | + |
| 125 | +#[track_caller] |
| 126 | +unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { |
| 127 | + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) |
| 128 | +} |
| 129 | + |
| 130 | +#[track_caller] |
| 131 | +unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { |
| 132 | + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) |
| 133 | +} |
0 commit comments