From bb46c728c6c99bdb7a246c30aa70985240e0d4f3 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Tue, 30 Jul 2019 16:05:50 +0200 Subject: [PATCH 1/8] Migrate Azure to the rust-lang2 org --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 212ec27def..232132527b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -5,7 +5,7 @@ resources: - repository: rustinfra type: github name: rust-lang/simpleinfra - endpoint: rust-lang + endpoint: gnzlbg trigger: ["master"] pr: ["master"] From 52271f43aac5269fe1b6ed52722d3f83b0617718 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Wed, 31 Jul 2019 15:18:49 +0200 Subject: [PATCH 2/8] LLVM9 started emitting more single precision floating-point intrinsics for double-precision operations --- crates/core_arch/src/x86/sse.rs | 6 +++--- crates/core_arch/src/x86/sse2.rs | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 3160ac57b8..0f8d6dec74 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1117,7 +1117,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { all(target_arch = "x86", target_feature = "sse2") ) ), - assert_instr(movhpd) + assert_instr(movhps) )] // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps` #[cfg_attr( @@ -1137,10 +1137,10 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { /// is copied from the upper half of `a`. #[inline] #[target_feature(enable = "sse")] -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))] +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlps))] #[cfg_attr( all(test, target_arch = "x86", target_feature = "sse2"), - assert_instr(movlpd) + assert_instr(movlps) )] // FIXME: On 32-bit targets without SSE2, it just generates two `movss`... #[cfg_attr( diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 0408c5ae07..d4eba34473 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2553,7 +2553,7 @@ pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(movhpd))] +#[cfg_attr(test, assert_instr(movhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(simd_extract(a, 0), *mem_addr) @@ -2566,7 +2566,7 @@ pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(movlpd))] +#[cfg_attr(test, assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(*mem_addr, simd_extract(a, 1)) @@ -2675,7 +2675,7 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhpd))] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 1); @@ -2725,7 +2725,7 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(movapd))] +#[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { let a = _mm_load_pd(mem_addr); @@ -2758,7 +2758,7 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] +#[cfg_attr(test, assert_instr(shufps, imm8 = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { From a2a7fdd7ca3c6e8bf5169bae1051f99d1a77f025 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 1 Aug 2019 14:00:22 +0200 Subject: [PATCH 3/8] White-list new codegen for _mm_broadcastq_epi64 Opened #791 . --- crates/core_arch/src/x86/avx2.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 84f3364b92..e137f0ce59 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -641,16 +641,14 @@ pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64) #[inline] #[target_feature(enable = "avx2")] -#[cfg_attr(test, assert_instr(vpbroadcastq))] +// FIXME: https://github.com/rust-lang/stdarch/issues/791 +#[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i64x2(); - let ret = simd_shuffle2(a.as_i64x2(), zero, [0_u32; 2]); + let ret = simd_shuffle2(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]); transmute::(ret) } -// N.B. `simd_shuffle4` with integer data types for `a` and `b` is -// often compiled to `vbroadcastsd`. /// Broadcasts the low packed 64-bit integer from `a` to all elements of /// the 256-bit returned value. /// @@ -660,8 +658,7 @@ pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0_u32; 4]); + let ret = simd_shuffle4(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]); transmute::(ret) } From adf19bf334c98954a07f521d97820f92738716bc Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 1 Aug 2019 14:14:21 +0200 Subject: [PATCH 4/8] Update codegen of _mm256_set1_epi64x for x86 32-bit --- crates/core_arch/src/x86/avx.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 20f95d9497..90867e4ad7 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -2741,8 +2741,8 @@ pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi64x) #[inline] #[target_feature(enable = "avx")] -//#[cfg_attr(test, assert_instr(vmovddup))] -#[cfg_attr(test, assert_instr(vinsertf128))] +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))] +#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { From 4ab872d2ff8827a80874f6f523ca7659ee85423a Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 1 Aug 2019 14:29:46 +0200 Subject: [PATCH 5/8] Update LLVM9 code generation on Windows --- crates/core_arch/src/x86/sse2.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index d4eba34473..b797d9fe88 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2758,7 +2758,8 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(shufps, imm8 = 1))] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(shufps, imm8 = 1))] +#[cfg_attr(all(test, target_os = "windows"), assert_instr(shufpd, imm8 = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { @@ -2777,7 +2778,8 @@ pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(movsd))] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movsd))] +#[cfg_attr(all(test, target_os = "windows"), assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) From 0df846981c112bc61b5aac749d78a6f07a4b8908 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 1 Aug 2019 15:36:43 +0200 Subject: [PATCH 6/8] formatting --- crates/core_arch/src/aarch64/crc.rs | 1 - crates/core_arch/src/aarch64/neon.rs | 1 - crates/core_arch/src/acle/dsp.rs | 1 - crates/core_arch/src/powerpc/altivec.rs | 1 - 4 files changed, 4 deletions(-) diff --git a/crates/core_arch/src/aarch64/crc.rs b/crates/core_arch/src/aarch64/crc.rs index 278a785627..32dddab811 100644 --- a/crates/core_arch/src/aarch64/crc.rs +++ b/crates/core_arch/src/aarch64/crc.rs @@ -138,5 +138,4 @@ mod tests { assert_eq!(__crc32cd(0, 0), 0); assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); } - } diff --git a/crates/core_arch/src/aarch64/neon.rs b/crates/core_arch/src/aarch64/neon.rs index 46af4567b3..2ddd97273c 100644 --- a/crates/core_arch/src/aarch64/neon.rs +++ b/crates/core_arch/src/aarch64/neon.rs @@ -1980,7 +1980,6 @@ mod tests { test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64])); test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); - } #[cfg(test)] diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/acle/dsp.rs index 03cc082697..49986fa6ef 100644 --- a/crates/core_arch/src/acle/dsp.rs +++ b/crates/core_arch/src/acle/dsp.rs @@ -381,5 +381,4 @@ mod tests { assert_eq!(super::__smlawt(a, transmute(b), c), r); } } - } diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index ce43a5725b..3b6ee2ea19 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -1832,7 +1832,6 @@ mod endian { { a.vec_mulo(b) } - } pub use self::endian::*; From c6500b910f552464f265844d64e6b4e361b0cad5 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 1 Aug 2019 18:00:42 +0200 Subject: [PATCH 7/8] Disable Game Boy Advance build job temporarily --- azure-pipelines.yml | 56 ++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 232132527b..0ae0c48d03 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -188,34 +188,34 @@ jobs: - script: cargo test --manifest-path crates/stdarch-verify/Cargo.toml displayName: Automatic verification - - job: GameBoyAdvance - dependsOn: StyleAndDocs - pool: - vmImage: ubuntu-16.04 - steps: - - template: ci/azure-install-rust.yml - - script: rustup component add rust-src - displayName: Add rust-src - - script: (test -x $HOME/.cargo/bin/cargo-xbuild || cargo install cargo-xbuild) - displayName: Add cargo-xbuild +# - job: GameBoyAdvance +# dependsOn: StyleAndDocs +# pool: +# vmImage: ubuntu-16.04 +# steps: +# - template: ci/azure-install-rust.yml +# - script: rustup component add rust-src +# displayName: Add rust-src +# - script: (test -x $HOME/.cargo/bin/cargo-xbuild || cargo install cargo-xbuild) +# displayName: Add cargo-xbuild # Obtain the devkitPro tools, using `target/` as a temp directory. This # is required because we need to use their linker. `lld` uses the `BLX` # instruction, which was not available in thumb state code until ARMv5. - - script: | - mkdir -p target - cd target - wget https://github.com/devkitPro/pacman/releases/download/devkitpro-pacman-1.0.1/devkitpro-pacman.deb - sudo dpkg -i devkitpro-pacman.deb - sudo dkp-pacman -Sy - sudo dkp-pacman -Syu - sudo dkp-pacman -S -v --noconfirm gba-tools devkitARM - export PATH="$PATH:/opt/devkitpro/devkitARM/bin" - export PATH="$PATH:/opt/devkitpro/tools/bin" - cd .. - # Pull the target spec up into the current directory and then build - mv ci/gba.json gba.json - cargo xbuild -p core_arch --target gba.json - variables: - NORUN: 1 - NOSTD: 1 - NO_DOCKER: 1 +# - script: | +# mkdir -p target +# cd target +# wget https://github.com/devkitPro/pacman/releases/download/devkitpro-pacman-1.0.1/devkitpro-pacman.deb +# sudo dpkg -i devkitpro-pacman.deb +# sudo dkp-pacman -Sy +# sudo dkp-pacman -Syu +# sudo dkp-pacman -S -v --noconfirm gba-tools devkitARM +# export PATH="$PATH:/opt/devkitpro/devkitARM/bin" +# export PATH="$PATH:/opt/devkitpro/tools/bin" +# cd .. +# # Pull the target spec up into the current directory and then build +# mv ci/gba.json gba.json +# cargo xbuild -p core_arch --target gba.json +# variables: +# NORUN: 1 +# NOSTD: 1 +# NO_DOCKER: 1 From 30c0513432b7ce142052db23d03fab434ee5dff9 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 2 Aug 2019 09:44:03 +0200 Subject: [PATCH 8/8] Update i586-unknown-linux-gnu codegen with LLVM9 bugfixes --- crates/core_arch/src/x86/sse.rs | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 0f8d6dec74..e5b1915345 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1098,7 +1098,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps) #[inline] #[target_feature(enable = "sse")] -#[cfg_attr(test, assert_instr(movmskps))] +// FIXME: LLVM9 trunk has the following bug: +// https://github.com/rust-lang/stdarch/issues/794 +// so we only temporarily test this on i686 and x86_64 but not on i586: +#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) @@ -1109,21 +1112,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { /// from `a`. #[inline] #[target_feature(enable = "sse")] -#[cfg_attr( - all( - test, - any( - target_arch = "x86_64", - all(target_arch = "x86", target_feature = "sse2") - ) - ), - assert_instr(movhps) -)] -// FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps` -#[cfg_attr( - all(test, target_arch = "x86", not(target_feature = "sse2")), - assert_instr(shufps) -)] +#[cfg_attr(test, assert_instr(movhps))] // TODO: this function is actually not limited to floats, but that's what // what matches the C type most closely: `(__m128, *const __m64) -> __m128`. pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { @@ -1137,16 +1126,7 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { /// is copied from the upper half of `a`. #[inline] #[target_feature(enable = "sse")] -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlps))] -#[cfg_attr( - all(test, target_arch = "x86", target_feature = "sse2"), - assert_instr(movlps) -)] -// FIXME: On 32-bit targets without SSE2, it just generates two `movss`... -#[cfg_attr( - all(test, target_arch = "x86", not(target_feature = "sse2")), - assert_instr(movss) -)] +#[cfg_attr(test, assert_instr(movlps))] pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { let q = p as *const f32x2; let b: f32x2 = *q;