From 21b425986655b253cfbeb2d5858793cd49b3a3c1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 1 Apr 2021 10:40:08 +0800 Subject: [PATCH 01/37] Start working on architectures appear in Rustdoc related to #1055 --- crates/core_arch/src/acle/hints.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 280aa00cf8..c4c72cf2eb 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -9,7 +9,8 @@ /// low-power state until one of a number of asynchronous events occurs. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] +#[doc(cfg(target_arch = "arm"))] #[inline(always)] pub unsafe fn __wfi() { hint(HINT_WFI); @@ -22,7 +23,8 @@ pub unsafe fn __wfi() { /// another processor. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] +#[doc(cfg(target_arch = "arm"))] #[inline(always)] pub unsafe fn __wfe() { hint(HINT_WFE); @@ -34,7 +36,8 @@ pub unsafe fn __wfe() { /// system. It is a NOP on a uniprocessor system. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] +#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sev() { hint(HINT_SEV); @@ -49,7 +52,9 @@ pub unsafe fn __sev() { #[cfg(any( target_feature = "v8", // 32-bit ARMv8 target_arch = "aarch64", // AArch64 + doc, ))] +#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sevl() { hint(HINT_SEVL); @@ -62,7 +67,8 @@ pub unsafe fn __sevl() { /// improve overall system performance. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] +#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __yield() { hint(HINT_YIELD); From 8c6fdc5a1e33c9ca18cc96703296223ad0758f78 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Apr 2021 21:34:15 +0800 Subject: [PATCH 02/37] Don't limit cargo doc unnecessarily Suggested by https://github.com/rust-lang/stdarch/pull/1104#discussion_r605346158 The blanket inclusions mentioned in the comment are here: https://github.com/rust-lang/stdarch/blob/8a5da46643f27f14059891e6ef8e91598dc78247/crates/core_arch/src/mod.rs#L52:L52 --- crates/core_arch/src/acle/hints.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index c4c72cf2eb..60e3395972 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -10,7 +10,6 @@ // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] -#[doc(cfg(target_arch = "arm"))] #[inline(always)] pub unsafe fn __wfi() { hint(HINT_WFI); @@ -24,7 +23,6 @@ pub unsafe fn __wfi() { // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] -#[doc(cfg(target_arch = "arm"))] #[inline(always)] pub unsafe fn __wfe() { hint(HINT_WFE); @@ -37,7 +35,6 @@ pub unsafe fn __wfe() { // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] -#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sev() { hint(HINT_SEV); @@ -68,7 +65,6 @@ pub unsafe fn __sevl() { // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] -#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __yield() { hint(HINT_YIELD); From f91d45f52bff0bcb8f0d54a1e9082e5baa7dfccf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Apr 2021 22:10:08 +0800 Subject: [PATCH 03/37] Move arm/crc module to acle/crc as it is shared between ARM and AArch64 --- crates/core_arch/src/{arm => acle}/crc.rs | 0 crates/core_arch/src/acle/mod.rs | 3 +++ crates/core_arch/src/arm/mod.rs | 4 +--- 3 files changed, 4 insertions(+), 3 deletions(-) rename crates/core_arch/src/{arm => acle}/crc.rs (100%) diff --git a/crates/core_arch/src/arm/crc.rs b/crates/core_arch/src/acle/crc.rs similarity index 100% rename from crates/core_arch/src/arm/crc.rs rename to crates/core_arch/src/acle/crc.rs diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 5f29decf5a..11cedc9123 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -119,6 +119,9 @@ mod simd32; ))] pub use self::simd32::*; +#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] +pub mod crc; + mod sealed { pub trait Dmb { unsafe fn __dmb(&self); diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index fd0cb2cf8d..2e79fa61cd 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -25,9 +25,7 @@ mod neon; pub use self::neon::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -mod crc; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use self::crc::*; +pub use crate::core_arch::acle::crc::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod crypto; From dbc0820d2e0f698ab1566d8a6b1570033ac3f32b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 09:38:23 +0800 Subject: [PATCH 04/37] Conform moved CRC module to existing conventions; make it appear in docs --- crates/core_arch/src/acle/mod.rs | 6 ++++-- crates/core_arch/src/arm/mod.rs | 3 --- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 11cedc9123..46ede76984 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -119,8 +119,10 @@ mod simd32; ))] pub use self::simd32::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub mod crc; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +mod crc; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use crc::*; mod sealed { pub trait Dmb { diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 2e79fa61cd..de1eb3ca88 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -24,9 +24,6 @@ mod neon; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use crate::core_arch::acle::crc::*; - #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod crypto; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] From d589823030ca897af22393a143a400bbc231b760 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 10:56:51 +0800 Subject: [PATCH 05/37] Move arm/crypto to acle/arm_shared --- crates/core_arch/src/{arm => acle}/crypto.rs | 0 crates/core_arch/src/acle/mod.rs | 5 +++++ crates/core_arch/src/arm/mod.rs | 4 ---- 3 files changed, 5 insertions(+), 4 deletions(-) rename crates/core_arch/src/{arm => acle}/crypto.rs (100%) diff --git a/crates/core_arch/src/arm/crypto.rs b/crates/core_arch/src/acle/crypto.rs similarity index 100% rename from crates/core_arch/src/arm/crypto.rs rename to crates/core_arch/src/acle/crypto.rs diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 46ede76984..57a95fec17 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -124,6 +124,11 @@ mod crc; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use crc::*; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +mod crypto; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use self::crypto::*; + mod sealed { pub trait Dmb { unsafe fn __dmb(&self); diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index de1eb3ca88..3bb3e62a6e 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -24,10 +24,6 @@ mod neon; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -mod crypto; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use self::crypto::*; pub use crate::core_arch::acle::*; From 553e32d854e67f4774d28188fec09c31b85e99cd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:01:21 +0800 Subject: [PATCH 06/37] Move acle/sat to arm/ as it is only available there --- crates/core_arch/src/acle/mod.rs | 7 ------- crates/core_arch/src/arm/mod.rs | 8 +++++++- crates/core_arch/src/{acle => arm}/sat.rs | 0 3 files changed, 7 insertions(+), 8 deletions(-) rename crates/core_arch/src/{acle => arm}/sat.rs (100%) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 57a95fec17..bec63e67ee 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -90,13 +90,6 @@ mod dsp; ))] pub use self::dsp::*; -// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] -mod sat; - -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] -pub use self::sat::*; - // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated #[cfg(all( diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 3bb3e62a6e..adf8fe3b8c 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -14,6 +14,13 @@ pub use self::armclang::*; mod v6; pub use self::v6::*; +// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +mod sat; + +#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +pub use self::sat::*; + #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] @@ -24,7 +31,6 @@ mod neon; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; - pub use crate::core_arch::acle::*; #[cfg(test)] diff --git a/crates/core_arch/src/acle/sat.rs b/crates/core_arch/src/arm/sat.rs similarity index 100% rename from crates/core_arch/src/acle/sat.rs rename to crates/core_arch/src/arm/sat.rs From 75792de8fa08fec2eb0bdf65585d4ef8c93eeea8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:04:44 +0800 Subject: [PATCH 07/37] Make 'sat' show up in docs all the time. --- crates/core_arch/src/arm/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index adf8fe3b8c..c5bd837ddf 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -15,10 +15,10 @@ mod v6; pub use self::v6::*; // Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +#[cfg(any(all(not(target_arch = "aarch64"), target_feature = "v6",), doc))] mod sat; -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] +#[cfg(any(all(not(target_arch = "aarch64"), target_feature = "v6",), doc))] pub use self::sat::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] From 74632670834798893388560a2021ca26bdca9fcb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:16:52 +0800 Subject: [PATCH 08/37] Move acle/dsp to arm; fix tests by skipping dsp arm functions --- crates/core_arch/src/acle/mod.rs | 25 ----------------------- crates/core_arch/src/{acle => arm}/dsp.rs | 0 crates/core_arch/src/arm/mod.rs | 25 +++++++++++++++++++++++ crates/stdarch-verify/tests/arm.rs | 23 ++++++++++++++++++++- 4 files changed, 47 insertions(+), 26 deletions(-) rename crates/core_arch/src/{acle => arm}/dsp.rs (100%) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index bec63e67ee..e12c233681 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -65,31 +65,6 @@ mod ex; pub use self::ex::*; -// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) -// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see -// section 5.4.7) -// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on -// '+v5te' rather than on '+dsp' -#[cfg(all( - not(target_arch = "aarch64"), - any( - // >= v5TE but excludes v7-M - all(target_feature = "v5te", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp"), - ) -))] -mod dsp; - -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v5te", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp"), - ) -))] -pub use self::dsp::*; - // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated #[cfg(all( diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/arm/dsp.rs similarity index 100% rename from crates/core_arch/src/acle/dsp.rs rename to crates/core_arch/src/arm/dsp.rs diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index c5bd837ddf..35af58bec2 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -21,6 +21,31 @@ mod sat; #[cfg(any(all(not(target_arch = "aarch64"), target_feature = "v6",), doc))] pub use self::sat::*; +// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) +// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see +// section 5.4.7) +// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on +// '+v5te' rather than on '+dsp' +#[cfg(all( + not(target_arch = "aarch64"), + any( + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + ) +))] +mod dsp; + +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v5te", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), + ) +))] +pub use self::dsp::*; + #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 03972c0c4b..61d61b5b24 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -371,6 +371,21 @@ fn verify_all_signatures() { "vsriq_n_p8", "vsri_n_p16", "vsriq_n_p16", + "__smulbb", + "__smultb", + "__smulbt", + "__smultt", + "__smulwb", + "__smulwt", + "__qadd", + "__qsub", + "__qdbl", + "__smlabb", + "__smlabt", + "__smlatb", + "__smlatt", + "__smlawb", + "__smlawt", ]; if !skip.contains(&rust.name) { println!( @@ -396,7 +411,13 @@ fn verify_all_signatures() { } // Skip some intrinsics that are present in GCC and Clang but // are missing from the official documentation. - let skip_intrinsic_verify = ["vmov_n_p64", "vmovq_n_p64", "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64"]; + let skip_intrinsic_verify = [ + "vmov_n_p64", + "vmovq_n_p64", + "vreinterpret_p64_s64", + "vreinterpret_f32_p64", + "vreinterpretq_f32_p64", + ]; let arm = match map.get(rust.name) { Some(i) => i, None => { From 00a0bbd1e6c153ceae9c176e09698542d2cf1cd5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:19:25 +0800 Subject: [PATCH 09/37] Make arm/dsp functions show up in docs --- crates/core_arch/src/arm/mod.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 35af58bec2..48ceb2496d 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -26,7 +26,7 @@ pub use self::sat::*; // section 5.4.7) // Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on // '+v5te' rather than on '+dsp' -#[cfg(all( +#[cfg(any(all( not(target_arch = "aarch64"), any( // >= v5TE but excludes v7-M @@ -34,15 +34,18 @@ pub use self::sat::*; // v7E-M all(target_feature = "mclass", target_feature = "dsp"), ) -))] +), doc))] mod dsp; -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v5te", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp"), - ) +#[cfg(any( + all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v5te", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp"), + ) + ), + doc ))] pub use self::dsp::*; From 0610befe443f0779c7babd4f2750d26c5f038edf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:28:39 +0800 Subject: [PATCH 10/37] Fix use of 'dsp' module within simd32 (it was moved to /arm) --- crates/core_arch/src/acle/simd32.rs | 2 +- crates/core_arch/src/arm/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 04183d4b88..5cae2fc2aa 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -65,7 +65,7 @@ #[cfg(test)] use stdarch_test::assert_instr; -use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute}; +use crate::{core_arch::arm::dsp::int16x2_t, mem::transmute}; types! { /// ARM-specific 32-bit wide vector of four packed `i8`. diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 48ceb2496d..06bfb11269 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -35,7 +35,7 @@ pub use self::sat::*; all(target_feature = "mclass", target_feature = "dsp"), ) ), doc))] -mod dsp; +pub(crate) mod dsp; #[cfg(any( all( From 4a101b82cbde7c648404cf8bf83133a17b8111c2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 3 Apr 2021 11:41:51 +0800 Subject: [PATCH 11/37] Move acle/simd32 to arm/simd32; fix tests suite --- crates/core_arch/src/acle/mod.rs | 22 ----------------- crates/core_arch/src/arm/mod.rs | 22 +++++++++++++++++ crates/core_arch/src/{acle => arm}/simd32.rs | 0 crates/stdarch-verify/src/lib.rs | 1 + crates/stdarch-verify/tests/arm.rs | 26 ++++++++++++++++++++ 5 files changed, 49 insertions(+), 22 deletions(-) rename crates/core_arch/src/{acle => arm}/simd32.rs (100%) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index e12c233681..a9094adffc 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -65,28 +65,6 @@ mod ex; pub use self::ex::*; -// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says -// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated -#[cfg(all( - not(target_arch = "aarch64"), - any( - // v7-A, v7-R - all(target_feature = "v6", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp") - ) -))] -mod simd32; - -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v6", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp") - ) -))] -pub use self::simd32::*; - #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] mod crc; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 06bfb11269..3afa2e900e 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -49,6 +49,28 @@ pub(crate) mod dsp; ))] pub use self::dsp::*; +// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says +// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated +#[cfg(all( + not(target_arch = "aarch64"), + any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp") + ) +))] +mod simd32; + +#[cfg(all( + not(target_arch = "aarch64"), + any( + all(target_feature = "v6", not(target_feature = "mclass")), + all(target_feature = "mclass", target_feature = "dsp") + ) +))] +pub use self::simd32::*; + #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/arm/simd32.rs similarity index 100% rename from crates/core_arch/src/acle/simd32.rs rename to crates/core_arch/src/arm/simd32.rs diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index 48b402737f..3aa3b7e62d 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -225,6 +225,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "int64x1_t" => quote! { &I64X1 }, "int64x2_t" => quote! { &I64X2 }, "uint8x8_t" => quote! { &U8X8 }, + "uint8x4_t" => quote! { &U8X4 }, "uint8x8x2_t" => quote! { &U8X8X2 }, "uint8x16x2_t" => quote! { &U8X16X2 }, "uint8x16x3_t" => quote! { &U8X16X3 }, diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 61d61b5b24..890a077d54 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -149,6 +149,7 @@ static U8X16X2: Type = Type::U(8, 16, 2); static U8X16X3: Type = Type::U(8, 16, 3); static U8X16X4: Type = Type::U(8, 16, 4); static U8X8: Type = Type::U(8, 8, 1); +static U8X4: Type = Type::U(8, 4, 1); static U8X8X2: Type = Type::U(8, 8, 2); static U8X8X3: Type = Type::U(8, 8, 3); static U8X8X4: Type = Type::U(8, 8, 4); @@ -386,6 +387,30 @@ fn verify_all_signatures() { "__smlatt", "__smlawb", "__smlawt", + "__qadd8", + "__qsub8", + "__qsub16", + "__qadd16", + "__qasx", + "__qsax", + "__sadd16", + "__sadd8", + "__smlad", + "__smlsd", + "__sasx", + "__sel", + "__shadd8", + "__shadd16", + "__shsub8", + "__usub8", + "__ssub8", + "__shsub16", + "__smuad", + "__smuadx", + "__smusd", + "__smusdx", + "__usad8", + "__usada8", ]; if !skip.contains(&rust.name) { println!( @@ -427,6 +452,7 @@ fn verify_all_signatures() { // TODO: we still need to verify these intrinsics or find a // reference for them, need to figure out where though! if !rust.file.ends_with("dsp.rs\"") + && !rust.file.ends_with("simd32.rs\"") && !rust.file.ends_with("cmsis.rs\"") && !rust.file.ends_with("v6.rs\"") && !rust.file.ends_with("v7.rs\"") From 3263581feb2ff83d2cec2dcb3561155f6ebab65e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Apr 2021 20:50:21 +0800 Subject: [PATCH 12/37] Move arm/neon to acle/neon (soon arm_shared) --- crates/core_arch/src/aarch64/neon/mod.rs | 6 ++--- crates/core_arch/src/acle/mod.rs | 8 ++++++ .../src/{arm => acle}/neon/generated.rs | 0 .../src/{arm => acle}/neon/load_tests.rs | 0 .../core_arch/src/{arm => acle}/neon/mod.rs | 0 .../neon/shift_and_insert_tests.rs | 0 .../{arm => acle}/neon/table_lookup_tests.rs | 0 crates/core_arch/src/arm/mod.rs | 6 ----- crates/stdarch-verify/tests/arm.rs | 25 +++++++++++++++++++ 9 files changed, 36 insertions(+), 9 deletions(-) rename crates/core_arch/src/{arm => acle}/neon/generated.rs (100%) rename crates/core_arch/src/{arm => acle}/neon/load_tests.rs (100%) rename crates/core_arch/src/{arm => acle}/neon/mod.rs (100%) rename crates/core_arch/src/{arm => acle}/neon/shift_and_insert_tests.rs (100%) rename crates/core_arch/src/{arm => acle}/neon/table_lookup_tests.rs (100%) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 719aa2ed60..2c35abfb9f 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -4219,13 +4219,13 @@ mod tests { #[cfg(test)] #[cfg(target_endian = "little")] -#[path = "../../arm/neon/table_lookup_tests.rs"] +#[path = "../../acle/neon/table_lookup_tests.rs"] mod table_lookup_tests; #[cfg(test)] -#[path = "../../arm/neon/shift_and_insert_tests.rs"] +#[path = "../../acle/neon/shift_and_insert_tests.rs"] mod shift_and_insert_tests; #[cfg(test)] -#[path = "../../arm/neon/load_tests.rs"] +#[path = "../../acle/neon/load_tests.rs"] mod load_tests; diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index a9094adffc..9375e9a3da 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -47,6 +47,9 @@ //! //! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) +// Only for 'neon' submodule +#![allow(non_camel_case_types)] + // 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported // via CP15 instructions. See Section 10.1 of ACLE mod barrier; @@ -75,6 +78,11 @@ mod crypto; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::crypto::*; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +mod neon; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use self::neon::*; + mod sealed { pub trait Dmb { unsafe fn __dmb(&self); diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/acle/neon/generated.rs similarity index 100% rename from crates/core_arch/src/arm/neon/generated.rs rename to crates/core_arch/src/acle/neon/generated.rs diff --git a/crates/core_arch/src/arm/neon/load_tests.rs b/crates/core_arch/src/acle/neon/load_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/load_tests.rs rename to crates/core_arch/src/acle/neon/load_tests.rs diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs similarity index 100% rename from crates/core_arch/src/arm/neon/mod.rs rename to crates/core_arch/src/acle/neon/mod.rs diff --git a/crates/core_arch/src/arm/neon/shift_and_insert_tests.rs b/crates/core_arch/src/acle/neon/shift_and_insert_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/shift_and_insert_tests.rs rename to crates/core_arch/src/acle/neon/shift_and_insert_tests.rs diff --git a/crates/core_arch/src/arm/neon/table_lookup_tests.rs b/crates/core_arch/src/acle/neon/table_lookup_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/table_lookup_tests.rs rename to crates/core_arch/src/acle/neon/table_lookup_tests.rs diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 3afa2e900e..ef4045e5e7 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -5,7 +5,6 @@ //! //! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics -#![allow(non_camel_case_types)] mod armclang; @@ -76,11 +75,6 @@ mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub use self::v7::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] -mod neon; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] -pub use self::neon::*; - pub use crate::core_arch::acle::*; #[cfg(test)] diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 890a077d54..f4b95e5a2d 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -411,6 +411,31 @@ fn verify_all_signatures() { "__smusdx", "__usad8", "__usada8", + "vld1_s8", + "vld1q_s8", + "vld1q_s8", + "vld1_s16", + "vld1q_s16", + "vld1_s32", + "vld1q_s32", + "vld1_s64", + "vld1q_s64", + "vld1_u8", + "vld1q_u8", + "vld1_u16", + "vld1q_u16", + "vld1_u32", + "vld1q_u32", + "vld1_u64", + "vld1q_u64", + "vld1_p8", + "vld1q_p8", + "vld1_p16", + "vld1q_p16", + "vld1_f32", + "vld1q_f32", + "vld1_f64", + "vld1q_f64", ]; if !skip.contains(&rust.name) { println!( From 4ae11b350b9c592f3d0a8c9883b82f1b0ff8c560 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Apr 2021 21:29:34 +0800 Subject: [PATCH 13/37] The code generator now places output in acle. --- crates/stdarch-gen/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 3251b9f6bf..8dd6faaf3f 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -1429,7 +1429,7 @@ mod test { let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) .join("src") - .join("arm") + .join("acle") .join("neon"); std::fs::create_dir_all(&arm_out_path)?; From 02de8486155903411f6943f955dedab02e9a6f01 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 7 Apr 2021 12:52:30 +0800 Subject: [PATCH 14/37] =?UTF-8?q?Move=20ARM=20specific=20neon=20functions?= =?UTF-8?q?=20in=20acle/neon/=E2=80=A6=20moved=20to=20arm/neon.rs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core_arch/src/acle/crypto.rs | 2 +- crates/core_arch/src/acle/mod.rs | 2 +- crates/core_arch/src/acle/neon/mod.rs | 1390 +------------------------ crates/core_arch/src/arm/mod.rs | 3 + crates/core_arch/src/arm/neon.rs | 1383 ++++++++++++++++++++++++ crates/stdarch-verify/tests/arm.rs | 12 + 6 files changed, 1412 insertions(+), 1380 deletions(-) create mode 100644 crates/core_arch/src/arm/neon.rs diff --git a/crates/core_arch/src/acle/crypto.rs b/crates/core_arch/src/acle/crypto.rs index 8361e39646..40f18c3793 100644 --- a/crates/core_arch/src/acle/crypto.rs +++ b/crates/core_arch/src/acle/crypto.rs @@ -1,4 +1,4 @@ -use crate::core_arch::arm::{uint32x4_t, uint8x16_t}; +use crate::core_arch::acle::{uint32x4_t, uint8x16_t}; #[allow(improper_ctypes)] extern "C" { diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 9375e9a3da..6f45c0f27b 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -79,7 +79,7 @@ mod crypto; pub use self::crypto::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] -mod neon; +pub(crate) mod neon; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index cdb0f82c9f..11e0113bb9 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -5,8 +5,6 @@ mod generated; #[rustfmt::skip] pub use self::generated::*; -#[cfg(target_arch = "arm")] -use crate::mem::align_of; use crate::{ core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, }; @@ -213,74 +211,74 @@ extern "C" { target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" )] - fn vpaddl_s8_(a: int8x8_t) -> int16x4_t; + pub(crate) fn vpaddl_s8_(a: int8x8_t) -> int16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" )] - fn vpaddl_s16_(a: int16x4_t) -> int32x2_t; + pub(crate) fn vpaddl_s16_(a: int16x4_t) -> int32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" )] - fn vpaddl_s32_(a: int32x2_t) -> int64x1_t; + pub(crate) fn vpaddl_s32_(a: int32x2_t) -> int64x1_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" )] - fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t; + pub(crate) fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" )] - fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t; + pub(crate) fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" )] - fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t; + pub(crate) fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" )] - fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t; + pub(crate) fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" )] - fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t; + pub(crate) fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" )] - fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t; + pub(crate) fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" )] - fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t; + pub(crate) fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" )] - fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t; + pub(crate) fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" )] - fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t; + pub(crate) fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v8i8")] @@ -309,301 +307,6 @@ extern "C" { fn vclzq_s32_(a: int32x4_t) -> int32x4_t; } -#[cfg(target_arch = "arm")] -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.arm.neon.vbsl.v8i8"] - fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vbsl.v16i8"] - fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"] - fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; - #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"] - fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; - #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"] - fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; - #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"] - fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; - #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"] - fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; - #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"] - fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; - - #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"] - fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; - #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"] - fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; - #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"] - fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; - #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"] - fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; - #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"] - fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; - #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"] - fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; - - #[link_name = "llvm.arm.neon.vtbl1"] - fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl2"] - fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl3"] - fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl4"] - fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - - #[link_name = "llvm.arm.neon.vtbx1"] - fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx2"] - fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx3"] - fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx4"] - fn vtbx4( - a: int8x8_t, - b: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - e: int8x8_t, - ) -> int8x8_t; - - #[link_name = "llvm.arm.neon.vshiftins.v8i8"] - fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, shift: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vshiftins.v16i8"] - fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, shift: int8x16_t) -> int8x16_t; - #[link_name = "llvm.arm.neon.vshiftins.v4i16"] - fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, shift: int16x4_t) -> int16x4_t; - #[link_name = "llvm.arm.neon.vshiftins.v8i16"] - fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, shift: int16x8_t) -> int16x8_t; - #[link_name = "llvm.arm.neon.vshiftins.v2i32"] - fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, shift: int32x2_t) -> int32x2_t; - #[link_name = "llvm.arm.neon.vshiftins.v4i32"] - fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, shift: int32x4_t) -> int32x4_t; - #[link_name = "llvm.arm.neon.vshiftins.v1i64"] - fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, shift: int64x1_t) -> int64x1_t; - #[link_name = "llvm.arm.neon.vshiftins.v2i64"] - fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, shift: int64x2_t) -> int64x2_t; - - #[link_name = "llvm.arm.neon.vld1.v8i8.p0i8"] - fn vld1_v8i8(addr: *const i8, align: i32) -> int8x8_t; - #[link_name = "llvm.arm.neon.vld1.v16i8.p0i8"] - fn vld1q_v16i8(addr: *const i8, align: i32) -> int8x16_t; - #[link_name = "llvm.arm.neon.vld1.v4i16.p0i8"] - fn vld1_v4i16(addr: *const i8, align: i32) -> int16x4_t; - #[link_name = "llvm.arm.neon.vld1.v8i16.p0i8"] - fn vld1q_v8i16(addr: *const i8, align: i32) -> int16x8_t; - #[link_name = "llvm.arm.neon.vld1.v2i32.p0i8"] - fn vld1_v2i32(addr: *const i8, align: i32) -> int32x2_t; - #[link_name = "llvm.arm.neon.vld1.v4i32.p0i8"] - fn vld1q_v4i32(addr: *const i8, align: i32) -> int32x4_t; - #[link_name = "llvm.arm.neon.vld1.v1i64.p0i8"] - fn vld1_v1i64(addr: *const i8, align: i32) -> int64x1_t; - #[link_name = "llvm.arm.neon.vld1.v2i64.p0i8"] - fn vld1q_v2i64(addr: *const i8, align: i32) -> int64x2_t; - #[link_name = "llvm.arm.neon.vld1.v2f32.p0i8"] - fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t; - #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"] - fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t; -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - vld1_v8i8(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - vld1q_v16i8(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - vld1_v4i16(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - vld1q_v8i16(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - vld1_v2i32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - vld1q_v4i32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - vld1_v1i64(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.64"))] -pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - vld1q_v2i64(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(vld1_v2i32(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(vld1q_v4i32(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(vld1_v1i64(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(vld1q_v2i64(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - vld1_v2f32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - vld1q_v4f32(ptr as *const i8, align_of::() as i32) -} - /// Load one single-element structure to one lane of one register. #[inline] #[target_feature(enable = "neon")] @@ -2157,210 +1860,6 @@ pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { vpaddlq_u32_(a) } -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s8_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s16_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s32_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s8_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s16_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s32_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u8_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u16_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u32_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u8_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u16_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u32_(b), a) - } -} - /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] @@ -3367,304 +2866,6 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vpmaxf_v2f32(a, b) } -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vtbl1(a, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vtbl2(a.0, a.1, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - vtbl3(a.0, a.1, a.2, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - vtbl4(a.0, a.1, a.2, a.3, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - vtbx1(a, b, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - vtbx2(a, b.0, b.1, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - vtbx3(a, b.0, b.1, b.2, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - vtbx4(a, b.0, b.1, b.2, b.3, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - /// Move vector element to general-purpose register #[inline] #[target_feature(enable = "neon")] @@ -4503,29 +3704,6 @@ pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_ a } -// These float-to-int implementations have undefined behaviour when `a` overflows -// the destination type. Clang has the same problem: https://llvm.org/PR47510 - -/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon")] -#[target_feature(enable = "v7")] -#[cfg_attr(test, assert_instr("vcvt.s32.f32"))] -pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { - transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a))) -} - -/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon")] -#[target_feature(enable = "v7")] -#[cfg_attr(test, assert_instr("vcvt.u32.f32"))] -pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { - transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a))) -} - /// Population count per byte. #[inline] #[target_feature(enable = "neon")] @@ -4581,532 +3759,6 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { transmute(vcntq_s8_(transmute(a))) } -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_imm3!(N); - let n = N as i8; - vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_imm3!(N); - let n = N as i8; - vshiftins_v16i8( - a, - b, - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - ) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_imm4!(N); - let n = N as i16; - vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_imm4!(N); - let n = N as i16; - vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - vshiftins_v2i32(a, b, int32x2_t(N, N)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - vshiftins_v4i32(a, b, int32x4_t(N, N, N, N)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - vshiftins_v1i64(a, b, int64x1_t(N as i64)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - vshiftins_v2i64(a, b, int64x2_t(N as i64, N as i64)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - transmute(vshiftins_v2i32(transmute(a), transmute(b), int32x2_t(N, N))) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t(N, N, N, N), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t(N as i64), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t(N as i64, N as i64), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} - -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - vshiftins_v16i8( - a, - b, - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - ) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - vshiftins_v2i32(a, b, int32x2_t(-N, -N)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - vshiftins_v4i32(a, b, int32x4_t(-N, -N, -N, -N)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - vshiftins_v1i64(a, b, int64x1_t(-N as i64)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - vshiftins_v2i64(a, b, int64x2_t(-N as i64, -N as i64)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t(-N, -N), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t(-N, -N, -N, -N), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t(-N as i64), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t(-N as i64, -N as i64), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} - /// Reversing vector elements (swap endianness) #[inline] #[target_feature(enable = "neon")] @@ -5855,24 +4507,6 @@ mod tests { assert_eq!(r, e) } - #[cfg(target_arch = "arm")] - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_s32_f32() { - let f = f32x4::new(-1., 2., 3., 4.); - let e = i32x4::new(-1, 2, 3, 4); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); - assert_eq!(r, e); - } - - #[cfg(target_arch = "arm")] - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_u32_f32() { - let f = f32x4::new(1., 2., 3., 4.); - let e = u32x4::new(1, 2, 3, 4); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vget_lane_u8() { let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index ef4045e5e7..61260044f0 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -80,6 +80,9 @@ pub use crate::core_arch::acle::*; #[cfg(test)] use stdarch_test::assert_instr; +mod neon; +pub use neon::*; + /// Generates the trap instruction `UDF` #[cfg(target_arch = "arm")] #[cfg_attr(test, assert_instr(udf))] diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs new file mode 100644 index 0000000000..38cb48f7e4 --- /dev/null +++ b/crates/core_arch/src/arm/neon.rs @@ -0,0 +1,1383 @@ +#[rustfmt::skip] +use crate::core_arch::acle::neon::*; +use crate::core_arch::simd_llvm::*; +#[allow(unused_imports)] +use crate::mem::transmute; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(non_camel_case_types)] +pub(crate) type p8 = u8; +#[allow(non_camel_case_types)] +pub(crate) type p16 = u16; + +#[cfg(target_arch = "arm")] +use crate::mem::align_of; + +#[cfg(target_arch = "arm")] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.arm.neon.vbsl.v8i8"] + fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vbsl.v16i8"] + fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"] + fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; + #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"] + fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; + #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"] + fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; + #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"] + fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; + #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"] + fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; + #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"] + fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; + + #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"] + fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; + #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"] + fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; + #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"] + fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; + #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"] + fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; + #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"] + fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; + #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"] + fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; + + #[link_name = "llvm.arm.neon.vtbl1"] + fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl2"] + fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl3"] + fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl4"] + fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + + #[link_name = "llvm.arm.neon.vtbx1"] + fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx2"] + fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx3"] + fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx4"] + fn vtbx4( + a: int8x8_t, + b: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + ) -> int8x8_t; + + #[link_name = "llvm.arm.neon.vshiftins.v8i8"] + fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, shift: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vshiftins.v16i8"] + fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, shift: int8x16_t) -> int8x16_t; + #[link_name = "llvm.arm.neon.vshiftins.v4i16"] + fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, shift: int16x4_t) -> int16x4_t; + #[link_name = "llvm.arm.neon.vshiftins.v8i16"] + fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, shift: int16x8_t) -> int16x8_t; + #[link_name = "llvm.arm.neon.vshiftins.v2i32"] + fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, shift: int32x2_t) -> int32x2_t; + #[link_name = "llvm.arm.neon.vshiftins.v4i32"] + fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, shift: int32x4_t) -> int32x4_t; + #[link_name = "llvm.arm.neon.vshiftins.v1i64"] + fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, shift: int64x1_t) -> int64x1_t; + #[link_name = "llvm.arm.neon.vshiftins.v2i64"] + fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, shift: int64x2_t) -> int64x2_t; + + #[link_name = "llvm.arm.neon.vld1.v8i8.p0i8"] + fn vld1_v8i8(addr: *const i8, align: i32) -> int8x8_t; + #[link_name = "llvm.arm.neon.vld1.v16i8.p0i8"] + fn vld1q_v16i8(addr: *const i8, align: i32) -> int8x16_t; + #[link_name = "llvm.arm.neon.vld1.v4i16.p0i8"] + fn vld1_v4i16(addr: *const i8, align: i32) -> int16x4_t; + #[link_name = "llvm.arm.neon.vld1.v8i16.p0i8"] + fn vld1q_v8i16(addr: *const i8, align: i32) -> int16x8_t; + #[link_name = "llvm.arm.neon.vld1.v2i32.p0i8"] + fn vld1_v2i32(addr: *const i8, align: i32) -> int32x2_t; + #[link_name = "llvm.arm.neon.vld1.v4i32.p0i8"] + fn vld1q_v4i32(addr: *const i8, align: i32) -> int32x4_t; + #[link_name = "llvm.arm.neon.vld1.v1i64.p0i8"] + fn vld1_v1i64(addr: *const i8, align: i32) -> int64x1_t; + #[link_name = "llvm.arm.neon.vld1.v2i64.p0i8"] + fn vld1q_v2i64(addr: *const i8, align: i32) -> int64x2_t; + #[link_name = "llvm.arm.neon.vld1.v2f32.p0i8"] + fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t; + #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"] + fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t; +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { + vld1_v8i8(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { + vld1q_v16i8(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { + vld1_v4i16(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { + vld1q_v8i16(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { + vld1_v2i32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { + vld1q_v4i32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { + vld1_v1i64(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.64"))] +pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { + vld1q_v2i64(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + transmute(vld1_v2i32(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + transmute(vld1q_v4i32(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { + transmute(vld1_v1i64(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + transmute(vld1q_v2i64(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + vld1_v2f32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + vld1q_v4f32(ptr as *const i8, align_of::() as i32) +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + #[cfg(target_arch = "arm")] + { + vpadal_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + #[cfg(target_arch = "arm")] + { + vpadal_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + #[cfg(target_arch = "arm")] + { + vpadal_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s32_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + #[cfg(target_arch = "arm")] + { + vpadal_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + #[cfg(target_arch = "arm")] + { + vpadal_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + #[cfg(target_arch = "arm")] + { + vpadal_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u32_(b), a) + } +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + vtbx4(a, b.0, b.1, b.2, b.3, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +// These float-to-int implementations have undefined behaviour when `a` overflows +// the destination type. Clang has the same problem: https://llvm.org/PR47510 + +/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +#[cfg_attr(test, assert_instr("vcvt.s32.f32"))] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a))) +} + +/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +#[cfg_attr(test, assert_instr("vcvt.u32.f32"))] +pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { + transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a))) +} + +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + let n = N as i8; + vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + let n = N as i8; + vshiftins_v16i8( + a, + b, + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + ) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + let n = N as i16; + vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + let n = N as i16; + vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + vshiftins_v2i32(a, b, int32x2_t(N, N)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + vshiftins_v4i32(a, b, int32x4_t(N, N, N, N)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + vshiftins_v1i64(a, b, int64x1_t(N as i64)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + vshiftins_v2i64(a, b, int64x2_t(N as i64, N as i64)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + transmute(vshiftins_v2i32(transmute(a), transmute(b), int32x2_t(N, N))) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t(N, N, N, N), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t(N as i64), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t(N as i64, N as i64), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} + +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + vshiftins_v16i8( + a, + b, + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + ) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + vshiftins_v2i32(a, b, int32x2_t(-N, -N)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + vshiftins_v4i32(a, b, int32x4_t(-N, -N, -N, -N)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + vshiftins_v1i64(a, b, int64x1_t(-N as i64)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + vshiftins_v2i64(a, b, int64x2_t(-N as i64, -N as i64)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + transmute(vshiftins_v2i32( + transmute(a), + transmute(b), + int32x2_t(-N, -N), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t(-N, -N, -N, -N), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t(-N as i64), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t(-N as i64, -N as i64), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} + +#[cfg(test)] +mod tests { + #[cfg(target_arch = "arm")] + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_s32_f32() { + let f = f32x4::new(-1., 2., 3., 4.); + let e = i32x4::new(-1, 2, 3, 4); + let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); + assert_eq!(r, e); + } + + #[cfg(target_arch = "arm")] + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_u32_f32() { + let f = f32x4::new(1., 2., 3., 4.); + let e = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); + assert_eq!(r, e); + } +} diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index f4b95e5a2d..ed6b311a38 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -436,6 +436,18 @@ fn verify_all_signatures() { "vld1q_f32", "vld1_f64", "vld1q_f64", + "vpadal_s8", + "vpadal_s16", + "vpadal_s32", + "vpadalq_s8", + "vpadalq_s16", + "vpadalq_s32", + "vpadal_u8", + "vpadal_u16", + "vpadal_u32", + "vpadalq_u8", + "vpadalq_u16", + "vpadalq_u32", ]; if !skip.contains(&rust.name) { println!( From 2c48570385235b8bc4ec0783ac394dbbf4df9945 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Apr 2021 10:44:08 +0800 Subject: [PATCH 15/37] Move aarch64/arm neon functions to acle/arm_shared --- crates/core_arch/src/acle/neon/mod.rs | 204 ++++++++++++++++++++++++++ crates/core_arch/src/arm/neon.rs | 204 -------------------------- 2 files changed, 204 insertions(+), 204 deletions(-) diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index 11e0113bb9..b89e724c96 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -4119,6 +4119,210 @@ pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + #[cfg(target_arch = "arm")] + { + vpadal_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + #[cfg(target_arch = "arm")] + { + vpadal_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + #[cfg(target_arch = "arm")] + { + vpadal_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s32_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + #[cfg(target_arch = "arm")] + { + vpadalq_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + #[cfg(target_arch = "arm")] + { + vpadal_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + #[cfg(target_arch = "arm")] + { + vpadal_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + #[cfg(target_arch = "arm")] + { + vpadal_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + #[cfg(target_arch = "arm")] + { + vpadalq_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u32_(b), a) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index 38cb48f7e4..6ee49e1b68 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -310,210 +310,6 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { vld1q_v4f32(ptr as *const i8, align_of::() as i32) } -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s8_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s16_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s32_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s8_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s16_(b), a) - } -} - -/// Signed Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s32_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u8_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u16_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u32_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u8_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u16_(b), a) - } -} - -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u32_(b), a) - } -} - /// Table look-up #[inline] #[cfg(target_arch = "arm")] From e22224d2cb30fcd0d090864b8d37d5098877e33a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Apr 2021 15:52:50 +0800 Subject: [PATCH 16/37] Fix build errors --- crates/core_arch/src/arm/neon.rs | 1 - crates/core_arch/src/macros.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index 6ee49e1b68..4d3d5f3ff3 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -1,4 +1,3 @@ -#[rustfmt::skip] use crate::core_arch::acle::neon::*; use crate::core_arch::simd_llvm::*; #[allow(unused_imports)] diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index b8cda93d5a..534d17de91 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -89,6 +89,6 @@ macro_rules! types { #[allow(non_camel_case_types)] #[repr(simd)] #[allow(clippy::missing_inline_in_public_items)] - pub struct $name($($fields)*); + pub struct $name(pub(crate) $($fields)*); )*) } From df00775ccecf9b2f05032eddb84bd890c4df5d4c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Apr 2021 16:27:26 +0800 Subject: [PATCH 17/37] pub(crate) fields to allow arm/neon to access types declared in acle/mod --- crates/core_arch/src/acle/neon/mod.rs | 54 +++++++++++++-------------- crates/core_arch/src/macros.rs | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index 48cc76c706..93be123666 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -19,67 +19,67 @@ pub(crate) type p128 = u128; types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. - pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); + pub struct int8x8_t(pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8); /// ARM-specific 64-bit wide vector of eight packed `u8`. - pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + pub struct uint8x8_t(pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8); /// ARM-specific 64-bit wide polynomial vector of eight packed `p8`. - pub struct poly8x8_t(p8, p8, p8, p8, p8, p8, p8, p8); + pub struct poly8x8_t(pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8); /// ARM-specific 64-bit wide vector of four packed `i16`. - pub struct int16x4_t(i16, i16, i16, i16); + pub struct int16x4_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); /// ARM-specific 64-bit wide vector of four packed `u16`. - pub struct uint16x4_t(u16, u16, u16, u16); + pub struct uint16x4_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. // pub struct float16x4_t(f16, f16, f16, f16); /// ARM-specific 64-bit wide vector of four packed `p16`. - pub struct poly16x4_t(p16, p16, p16, p16); + pub struct poly16x4_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); /// ARM-specific 64-bit wide vector of two packed `i32`. - pub struct int32x2_t(i32, i32); + pub struct int32x2_t(pub(crate) i32, pub(crate) i32); /// ARM-specific 64-bit wide vector of two packed `u32`. - pub struct uint32x2_t(u32, u32); + pub struct uint32x2_t(pub(crate) u32, pub(crate) u32); /// ARM-specific 64-bit wide vector of two packed `f32`. - pub struct float32x2_t(f32, f32); + pub struct float32x2_t(pub(crate) f32, pub(crate) f32); /// ARM-specific 64-bit wide vector of one packed `i64`. - pub struct int64x1_t(i64); + pub struct int64x1_t(pub(crate) i64); /// ARM-specific 64-bit wide vector of one packed `u64`. - pub struct uint64x1_t(u64); + pub struct uint64x1_t(pub(crate) u64); /// ARM-specific 64-bit wide vector of one packed `p64`. - pub struct poly64x1_t(p64); + pub struct poly64x1_t(pub(crate) p64); /// ARM-specific 128-bit wide vector of sixteen packed `i8`. pub struct int8x16_t( - i8, i8, i8, i8, i8, i8 ,i8, i8, - i8, i8, i8, i8, i8, i8 ,i8, i8, + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, ); /// ARM-specific 128-bit wide vector of sixteen packed `u8`. pub struct uint8x16_t( - u8, u8 ,u8, u8, u8, u8 ,u8, u8, - u8, u8 ,u8, u8, u8, u8 ,u8, u8, + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, ); /// ARM-specific 128-bit wide vector of sixteen packed `p8`. pub struct poly8x16_t( - p8, p8, p8, p8, p8, p8, p8, p8, - p8, p8, p8, p8, p8, p8, p8, p8, + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, ); /// ARM-specific 128-bit wide vector of eight packed `i16`. - pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); + pub struct int16x8_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); /// ARM-specific 128-bit wide vector of eight packed `u16`. - pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + pub struct uint16x8_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); /// ARM-specific 128-bit wide vector of eight packed `p16`. - pub struct poly16x8_t(p16, p16, p16, p16, p16, p16, p16, p16); + pub struct poly16x8_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); /// ARM-specific 128-bit wide vector of four packed `i32`. - pub struct int32x4_t(i32, i32, i32, i32); + pub struct int32x4_t(pub(crate) i32, pub(crate) i32, pub(crate) i32, pub(crate) i32); /// ARM-specific 128-bit wide vector of four packed `u32`. - pub struct uint32x4_t(u32, u32, u32, u32); + pub struct uint32x4_t(pub(crate) u32, pub(crate) u32, pub(crate) u32, pub(crate) u32); /// ARM-specific 128-bit wide vector of four packed `f32`. - pub struct float32x4_t(f32, f32, f32, f32); + pub struct float32x4_t(pub(crate) f32, pub(crate) f32, pub(crate) f32, pub(crate) f32); /// ARM-specific 128-bit wide vector of two packed `i64`. - pub struct int64x2_t(i64, i64); + pub struct int64x2_t(pub(crate) i64, pub(crate) i64); /// ARM-specific 128-bit wide vector of two packed `u64`. - pub struct uint64x2_t(u64, u64); + pub struct uint64x2_t(pub(crate) u64, pub(crate) u64); /// ARM-specific 128-bit wide vector of two packed `p64`. - pub struct poly64x2_t(p64, p64); + pub struct poly64x2_t(pub(crate) p64, pub(crate) p64); } /// ARM-specific type containing two `int8x8_t` vectors. diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 534d17de91..b8cda93d5a 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -89,6 +89,6 @@ macro_rules! types { #[allow(non_camel_case_types)] #[repr(simd)] #[allow(clippy::missing_inline_in_public_items)] - pub struct $name(pub(crate) $($fields)*); + pub struct $name($($fields)*); )*) } From 3b3cabaf8933ac9480fe0db70d4ed8b0f62d2619 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Apr 2021 17:21:04 +0800 Subject: [PATCH 18/37] Add missing use statements for types --- crates/core_arch/src/arm/neon.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index 4d3d5f3ff3..5d8a3fdf52 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -1,6 +1,10 @@ +#[allow(unused_imports)] use crate::core_arch::acle::neon::*; +#[allow(unused_imports)] use crate::core_arch::simd_llvm::*; #[allow(unused_imports)] +use crate::core_arch::simd::{i32x4, u32x4, f32x4}; +#[allow(unused_imports)] use crate::mem::transmute; #[cfg(test)] From f14ce234a767f434786db6d44cdf98cecdb34939 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Apr 2021 17:42:50 +0800 Subject: [PATCH 19/37] Fix build on ARM --- crates/core_arch/src/acle/crypto.rs | 1 + crates/core_arch/src/acle/mod.rs | 10 +++--- crates/core_arch/src/acle/neon/mod.rs | 48 +++++++++++++++++---------- crates/core_arch/src/arm/mod.rs | 3 +- crates/core_arch/src/arm/neon.rs | 29 +++++++++------- 5 files changed, 54 insertions(+), 37 deletions(-) diff --git a/crates/core_arch/src/acle/crypto.rs b/crates/core_arch/src/acle/crypto.rs index 40f18c3793..111d00a77d 100644 --- a/crates/core_arch/src/acle/crypto.rs +++ b/crates/core_arch/src/acle/crypto.rs @@ -191,6 +191,7 @@ pub unsafe fn vsha256su1q_u32( #[cfg(test)] mod tests { + use super::*; use crate::core_arch::{arm::*, simd::*}; use std::mem; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 6f45c0f27b..7a3e30fafe 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -68,19 +68,17 @@ mod ex; pub use self::ex::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +#[cfg(any(target_feature = "v7", doc))] mod crc; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +#[cfg(any(target_feature = "v7", doc))] pub use crc::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +#[cfg(any(target_feature = "v7", doc))] mod crypto; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +#[cfg(any(arget_feature = "v7", doc))] pub use self::crypto::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub(crate) mod neon; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; mod sealed { diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index 93be123666..558ee78fa0 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -646,8 +646,13 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { #[cfg(target_arch = "aarch64")] - use crate::core_arch::aarch64::vld1_s64; - vld1_s64(ptr) + { + crate::core_arch::aarch64::vld1_s64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_s64(ptr) + } } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -735,8 +740,13 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { #[cfg(target_arch = "aarch64")] - use crate::core_arch::aarch64::vld1_u64; - vld1_u64(ptr) + { + crate::core_arch::aarch64::vld1_u64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_u64(ptr) + } } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -4065,7 +4075,7 @@ pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { #[cfg(target_arch = "arm")] { - vpadal_s8_(a, b) + crate::core_arch::arm::neon::vpadal_s8_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4082,7 +4092,7 @@ pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { #[cfg(target_arch = "arm")] { - vpadal_s16_(a, b) + crate::core_arch::arm::neon::vpadal_s16_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4099,7 +4109,7 @@ pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { #[cfg(target_arch = "arm")] { - vpadal_s32_(a, b) + crate::core_arch::arm::neon::vpadal_s32_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4116,7 +4126,7 @@ pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { #[cfg(target_arch = "arm")] { - vpadalq_s8_(a, b) + crate::core_arch::arm::neon::vpadalq_s8_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4133,7 +4143,7 @@ pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { #[cfg(target_arch = "arm")] { - vpadalq_s16_(a, b) + crate::core_arch::arm::neon::vpadalq_s16_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4150,7 +4160,7 @@ pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { #[cfg(target_arch = "arm")] { - vpadalq_s32_(a, b) + crate::core_arch::arm::neon::vpadalq_s32_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4167,7 +4177,7 @@ pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { #[cfg(target_arch = "arm")] { - vpadal_u8_(a, b) + crate::core_arch::arm::neon::vpadal_u8_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4184,7 +4194,7 @@ pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { #[cfg(target_arch = "arm")] { - vpadal_u16_(a, b) + crate::core_arch::arm::neon::vpadal_u16_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4201,7 +4211,7 @@ pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { #[cfg(target_arch = "arm")] { - vpadal_u32_(a, b) + crate::core_arch::arm::neon::vpadal_u32_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4218,7 +4228,7 @@ pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { #[cfg(target_arch = "arm")] { - vpadalq_u8_(a, b) + crate::core_arch::arm::neon::vpadalq_u8_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4235,7 +4245,7 @@ pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { #[cfg(target_arch = "arm")] { - vpadalq_u16_(a, b) + crate::core_arch::arm::neon::vpadalq_u16_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4252,7 +4262,7 @@ pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { #[cfg(target_arch = "arm")] { - vpadalq_u32_(a, b) + crate::core_arch::arm::neon::vpadalq_u32_(a, b) } #[cfg(target_arch = "aarch64")] { @@ -4264,7 +4274,11 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { mod tests { use super::*; use crate::core_arch::arm::test_support::*; - use crate::core_arch::{arm::*, simd::*}; + use crate::core_arch::simd::*; + #[cfg(target_arch = "arm")] + use crate::core_arch::arm::*; + #[cfg(target_arch = "aarch64")] + use crate::core_arch::aarch64::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 61260044f0..378fe55b6e 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -80,7 +80,7 @@ pub use crate::core_arch::acle::*; #[cfg(test)] use stdarch_test::assert_instr; -mod neon; +pub(crate) mod neon; pub use neon::*; /// Generates the trap instruction `UDF` @@ -92,5 +92,4 @@ pub unsafe fn udf() -> ! { } #[cfg(test)] -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub(crate) mod test_support; diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index 5d8a3fdf52..c4a2cbeb6f 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -26,30 +26,30 @@ extern "C" { #[link_name = "llvm.arm.neon.vbsl.v16i8"] fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"] - fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; + pub(crate) fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"] - fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; + pub(crate) fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"] - fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; + pub(crate) fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"] - fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; + pub(crate) fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"] - fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; + pub(crate) fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"] - fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; + pub(crate) fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"] - fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; + pub(crate) fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"] - fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; + pub(crate) fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"] - fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; + pub(crate) fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"] - fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; + pub(crate) fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"] - fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; + pub(crate) fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"] - fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; + pub(crate) fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; #[link_name = "llvm.arm.neon.vtbl1"] fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; @@ -1162,6 +1162,11 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x #[cfg(test)] mod tests { + use super::*; + use crate::core_arch::{arm::*, simd::*}; + use stdarch_test::simd_test; + use crate::mem::transmute; + #[cfg(target_arch = "arm")] #[simd_test(enable = "neon")] unsafe fn test_vcvtq_s32_f32() { From ff8935528ac50ca49797068dcacf5a596d43cda1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Apr 2021 09:06:17 +0800 Subject: [PATCH 20/37] Apply cargo fmt --- crates/core_arch/src/acle/neon/mod.rs | 6 +++--- crates/core_arch/src/arm/neon.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index c232ffbf16..75aa7af913 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -4273,12 +4273,12 @@ pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { #[cfg(test)] mod tests { use super::*; + #[cfg(target_arch = "aarch64")] + use crate::core_arch::aarch64::*; use crate::core_arch::arm::test_support::*; - use crate::core_arch::simd::*; #[cfg(target_arch = "arm")] use crate::core_arch::arm::*; - #[cfg(target_arch = "aarch64")] - use crate::core_arch::aarch64::*; + use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index c4a2cbeb6f..c86bbae711 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -1,9 +1,9 @@ #[allow(unused_imports)] use crate::core_arch::acle::neon::*; #[allow(unused_imports)] -use crate::core_arch::simd_llvm::*; +use crate::core_arch::simd::{f32x4, i32x4, u32x4}; #[allow(unused_imports)] -use crate::core_arch::simd::{i32x4, u32x4, f32x4}; +use crate::core_arch::simd_llvm::*; #[allow(unused_imports)] use crate::mem::transmute; @@ -1164,8 +1164,8 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x mod tests { use super::*; use crate::core_arch::{arm::*, simd::*}; - use stdarch_test::simd_test; use crate::mem::transmute; + use stdarch_test::simd_test; #[cfg(target_arch = "arm")] #[simd_test(enable = "neon")] From e201bf4c70b762275c39b186d3e9e532a0a09576 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Apr 2021 18:21:25 +0800 Subject: [PATCH 21/37] Apply suggestions from code review This should fix all build errors. Co-authored-by: Amanieu d'Antras --- crates/core_arch/src/acle/hints.rs | 1 - crates/core_arch/src/acle/mod.rs | 6 ++- crates/core_arch/src/arm/mod.rs | 66 ++++++++++++++---------------- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/acle/hints.rs index 60e3395972..3f6e48bbfb 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/acle/hints.rs @@ -51,7 +51,6 @@ pub unsafe fn __sev() { target_arch = "aarch64", // AArch64 doc, ))] -#[doc(cfg(target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sevl() { hint(HINT_SEVL); diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 7a3e30fafe..6b9b4907d8 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -68,9 +68,9 @@ mod ex; pub use self::ex::*; -#[cfg(any(target_feature = "v7", doc))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] mod crc; -#[cfg(any(target_feature = "v7", doc))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use crc::*; #[cfg(any(target_feature = "v7", doc))] @@ -78,7 +78,9 @@ mod crypto; #[cfg(any(arget_feature = "v7", doc))] pub use self::crypto::*; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub(crate) mod neon; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; mod sealed { diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 378fe55b6e..82210638b9 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -14,10 +14,10 @@ mod v6; pub use self::v6::*; // Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) -#[cfg(any(all(not(target_arch = "aarch64"), target_feature = "v6",), doc))] +#[cfg(any(target_feature = "v6", doc))] mod sat; -#[cfg(any(all(not(target_arch = "aarch64"), target_feature = "v6",), doc))] +#[cfg(any(target_feature = "v6", doc))] pub use self::sat::*; // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) @@ -25,54 +25,47 @@ pub use self::sat::*; // section 5.4.7) // Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on // '+v5te' rather than on '+dsp' -#[cfg(any(all( - not(target_arch = "aarch64"), - any( - // >= v5TE but excludes v7-M - all(target_feature = "v5te", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp"), - ) -), doc))] +#[cfg(any( + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, +))] pub(crate) mod dsp; #[cfg(any( - all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v5te", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp"), - ) - ), - doc + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, ))] pub use self::dsp::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated -#[cfg(all( - not(target_arch = "aarch64"), - any( - // v7-A, v7-R - all(target_feature = "v6", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp") - ) +#[cfg(any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, ))] mod simd32; -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v6", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp") - ) +#[cfg(any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, ))] pub use self::simd32::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] +#[cfg(any(target_feature = "v7", doc))] mod v7; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] +#[cfg(any(target_feature = "v7", doc))] pub use self::v7::*; pub use crate::core_arch::acle::*; @@ -80,7 +73,9 @@ pub use crate::core_arch::acle::*; #[cfg(test)] use stdarch_test::assert_instr; +#[cfg(any(target_feature = "v7", doc))] pub(crate) mod neon; +#[cfg(any(target_feature = "v7", doc))] pub use neon::*; /// Generates the trap instruction `UDF` @@ -92,4 +87,5 @@ pub unsafe fn udf() -> ! { } #[cfg(test)] +#[cfg(any(target_feature = "v7", doc))] pub(crate) mod test_support; From 0f186e3478d75616c9b7c02a8eb51484c86613ef Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Apr 2021 18:22:34 +0800 Subject: [PATCH 22/37] Apply review suggestions It seems this one was missed in the batch. Co-authored-by: Amanieu d'Antras --- crates/core_arch/src/acle/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 6b9b4907d8..6f45c0f27b 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -73,9 +73,9 @@ mod crc; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use crc::*; -#[cfg(any(target_feature = "v7", doc))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] mod crypto; -#[cfg(any(arget_feature = "v7", doc))] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::crypto::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] From bd42df8c7943e571a2c559c70fde2ba56acb8d87 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 11:21:28 +0800 Subject: [PATCH 23/37] move arm::test_support to acle::test_support As suggested in the PR comment. --- crates/core_arch/src/aarch64/neon/mod.rs | 2 +- crates/core_arch/src/acle/mod.rs | 4 ++++ crates/core_arch/src/acle/neon/mod.rs | 2 +- crates/core_arch/src/{arm => acle}/test_support.rs | 0 crates/core_arch/src/arm/mod.rs | 4 ---- 5 files changed, 6 insertions(+), 6 deletions(-) rename crates/core_arch/src/{arm => acle}/test_support.rs (100%) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index c02cd973b0..dd88afe852 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -2812,7 +2812,7 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x #[cfg(test)] mod tests { use crate::core_arch::aarch64::test_support::*; - use crate::core_arch::arm::test_support::*; + use crate::core_arch::acle::test_support::*; use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*}; use std::mem::transmute; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/acle/mod.rs index 6f45c0f27b..5e630f8fbe 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/acle/mod.rs @@ -83,6 +83,10 @@ pub(crate) mod neon; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] pub use self::neon::*; +#[cfg(test)] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub(crate) mod test_support; + mod sealed { pub trait Dmb { unsafe fn __dmb(&self); diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/acle/neon/mod.rs index 75aa7af913..7e2ed97708 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/acle/neon/mod.rs @@ -4275,7 +4275,7 @@ mod tests { use super::*; #[cfg(target_arch = "aarch64")] use crate::core_arch::aarch64::*; - use crate::core_arch::arm::test_support::*; + use crate::core_arch::acle::test_support::*; #[cfg(target_arch = "arm")] use crate::core_arch::arm::*; use crate::core_arch::simd::*; diff --git a/crates/core_arch/src/arm/test_support.rs b/crates/core_arch/src/acle/test_support.rs similarity index 100% rename from crates/core_arch/src/arm/test_support.rs rename to crates/core_arch/src/acle/test_support.rs diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 82210638b9..4300cf2000 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -85,7 +85,3 @@ pub use neon::*; pub unsafe fn udf() -> ! { crate::intrinsics::abort() } - -#[cfg(test)] -#[cfg(any(target_feature = "v7", doc))] -pub(crate) mod test_support; From f141aa6e486b27780cbf42a6f5635d3c29b90d32 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 14:05:34 +0800 Subject: [PATCH 24/37] Don't ignore unused imports anymore --- crates/core_arch/src/arm/neon.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index c86bbae711..fc7ac22402 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -1,10 +1,6 @@ -#[allow(unused_imports)] use crate::core_arch::acle::neon::*; -#[allow(unused_imports)] use crate::core_arch::simd::{f32x4, i32x4, u32x4}; -#[allow(unused_imports)] use crate::core_arch::simd_llvm::*; -#[allow(unused_imports)] use crate::mem::transmute; #[cfg(test)] From f2c4eac78dcd62f39dfcfd71c1f539ec6f8aac20 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 14:11:02 +0800 Subject: [PATCH 25/37] Remove all `#[cfg(target_arch = "arm")]` as it's redundant --- crates/core_arch/src/arm/mod.rs | 2 +- crates/core_arch/src/arm/neon.rs | 92 -------------------------------- 2 files changed, 1 insertion(+), 93 deletions(-) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 4300cf2000..5e36d53e4f 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -32,7 +32,7 @@ pub use self::sat::*; all(target_feature = "mclass", target_feature = "dsp"), doc, ))] -pub(crate) mod dsp; +pub mod dsp; #[cfg(any( // >= v5TE but excludes v7-M diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index fc7ac22402..15fda87048 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -11,10 +11,8 @@ pub(crate) type p8 = u8; #[allow(non_camel_case_types)] pub(crate) type p16 = u16; -#[cfg(target_arch = "arm")] use crate::mem::align_of; -#[cfg(target_arch = "arm")] #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.arm.neon.vbsl.v8i8"] @@ -113,7 +111,6 @@ extern "C" { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { @@ -122,7 +119,6 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { @@ -131,7 +127,6 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { @@ -140,7 +135,6 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { @@ -149,7 +143,6 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { @@ -158,7 +151,6 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.32"))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { @@ -167,7 +159,6 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { @@ -176,7 +167,6 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.64"))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { @@ -185,7 +175,6 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { @@ -194,7 +183,6 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { @@ -203,7 +191,6 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { @@ -212,7 +199,6 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { @@ -221,7 +207,6 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { @@ -230,7 +215,6 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.32"))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { @@ -239,7 +223,6 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { @@ -248,7 +231,6 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.64"))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { @@ -257,7 +239,6 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { @@ -266,7 +247,6 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.8"))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { @@ -275,7 +255,6 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { @@ -284,7 +263,6 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.16"))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { @@ -293,7 +271,6 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { @@ -302,7 +279,6 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { /// Load multiple single-element structures to one, two, three, or four registers. #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vld1.32"))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { @@ -311,7 +287,6 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -321,7 +296,6 @@ pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -331,7 +305,6 @@ pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -341,7 +314,6 @@ pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -351,7 +323,6 @@ pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -361,7 +332,6 @@ pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -371,7 +341,6 @@ pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -381,7 +350,6 @@ pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -396,7 +364,6 @@ pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -411,7 +378,6 @@ pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -421,7 +387,6 @@ pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -437,7 +402,6 @@ pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { /// Table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] @@ -453,7 +417,6 @@ pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -463,7 +426,6 @@ pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -473,7 +435,6 @@ pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -483,7 +444,6 @@ pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -493,7 +453,6 @@ pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -508,7 +467,6 @@ pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -523,7 +481,6 @@ pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -533,7 +490,6 @@ pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -549,7 +505,6 @@ pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -565,7 +520,6 @@ pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -575,7 +529,6 @@ pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -592,7 +545,6 @@ pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t /// Extended table look-up #[inline] -#[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] @@ -612,7 +564,6 @@ pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t /// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon")] #[target_feature(enable = "v7")] #[cfg_attr(test, assert_instr("vcvt.s32.f32"))] @@ -622,7 +573,6 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { /// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon")] #[target_feature(enable = "v7")] #[cfg_attr(test, assert_instr("vcvt.u32.f32"))] @@ -632,7 +582,6 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -643,7 +592,6 @@ pub unsafe fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -658,7 +606,6 @@ pub unsafe fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -669,7 +616,6 @@ pub unsafe fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -680,7 +626,6 @@ pub unsafe fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -690,7 +635,6 @@ pub unsafe fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -700,7 +644,6 @@ pub unsafe fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -710,7 +653,6 @@ pub unsafe fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -720,7 +662,6 @@ pub unsafe fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -735,7 +676,6 @@ pub unsafe fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -750,7 +690,6 @@ pub unsafe fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -765,7 +704,6 @@ pub unsafe fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -780,7 +718,6 @@ pub unsafe fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -790,7 +727,6 @@ pub unsafe fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -804,7 +740,6 @@ pub unsafe fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -818,7 +753,6 @@ pub unsafe fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -832,7 +766,6 @@ pub unsafe fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -847,7 +780,6 @@ pub unsafe fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -862,7 +794,6 @@ pub unsafe fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -877,7 +808,6 @@ pub unsafe fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4 } /// Shift Left and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsli.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -893,7 +823,6 @@ pub unsafe fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -904,7 +833,6 @@ pub unsafe fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -919,7 +847,6 @@ pub unsafe fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -930,7 +857,6 @@ pub unsafe fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -941,7 +867,6 @@ pub unsafe fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -951,7 +876,6 @@ pub unsafe fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -961,7 +885,6 @@ pub unsafe fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -971,7 +894,6 @@ pub unsafe fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -981,7 +903,6 @@ pub unsafe fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -996,7 +917,6 @@ pub unsafe fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1011,7 +931,6 @@ pub unsafe fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1026,7 +945,6 @@ pub unsafe fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1041,7 +959,6 @@ pub unsafe fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1055,7 +972,6 @@ pub unsafe fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.32", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1069,7 +985,6 @@ pub unsafe fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1083,7 +998,6 @@ pub unsafe fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.64", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1097,7 +1011,6 @@ pub unsafe fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1112,7 +1025,6 @@ pub unsafe fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.8", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1127,7 +1039,6 @@ pub unsafe fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1142,7 +1053,6 @@ pub unsafe fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4 } /// Shift Right and Insert (immediate) #[inline] -#[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr("vsri.16", N = 1))] #[rustc_legacy_const_generics(2)] @@ -1163,7 +1073,6 @@ mod tests { use crate::mem::transmute; use stdarch_test::simd_test; - #[cfg(target_arch = "arm")] #[simd_test(enable = "neon")] unsafe fn test_vcvtq_s32_f32() { let f = f32x4::new(-1., 2., 3., 4.); @@ -1172,7 +1081,6 @@ mod tests { assert_eq!(r, e); } - #[cfg(target_arch = "arm")] #[simd_test(enable = "neon")] unsafe fn test_vcvtq_u32_f32() { let f = f32x4::new(1., 2., 3., 4.); From 8794cae20edbbfac367f13f3d18e0d0b977ddb98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 14:19:13 +0800 Subject: [PATCH 26/37] rename 'acle' to 'arm_shared' --- crates/core_arch/src/aarch64/mod.rs | 2 +- crates/core_arch/src/aarch64/neon/mod.rs | 8 ++++---- crates/core_arch/src/arm/mod.rs | 2 +- crates/core_arch/src/arm/neon.rs | 2 +- .../core_arch/src/{acle => arm_shared}/barrier/common.rs | 0 crates/core_arch/src/{acle => arm_shared}/barrier/cp15.rs | 0 crates/core_arch/src/{acle => arm_shared}/barrier/mod.rs | 0 .../src/{acle => arm_shared}/barrier/not_mclass.rs | 0 crates/core_arch/src/{acle => arm_shared}/barrier/v8.rs | 0 crates/core_arch/src/{acle => arm_shared}/crc.rs | 0 crates/core_arch/src/{acle => arm_shared}/crypto.rs | 2 +- crates/core_arch/src/{acle => arm_shared}/ex.rs | 0 crates/core_arch/src/{acle => arm_shared}/hints.rs | 0 crates/core_arch/src/{acle => arm_shared}/mod.rs | 0 .../core_arch/src/{acle => arm_shared}/neon/generated.rs | 0 .../core_arch/src/{acle => arm_shared}/neon/load_tests.rs | 0 crates/core_arch/src/{acle => arm_shared}/neon/mod.rs | 2 +- .../{acle => arm_shared}/neon/shift_and_insert_tests.rs | 0 .../src/{acle => arm_shared}/neon/table_lookup_tests.rs | 0 .../src/{acle => arm_shared}/registers/aarch32.rs | 0 .../core_arch/src/{acle => arm_shared}/registers/mod.rs | 0 .../core_arch/src/{acle => arm_shared}/registers/v6m.rs | 0 .../core_arch/src/{acle => arm_shared}/registers/v7m.rs | 0 crates/core_arch/src/{acle => arm_shared}/test_support.rs | 0 crates/core_arch/src/mod.rs | 2 +- crates/stdarch-gen/src/main.rs | 2 +- crates/stdarch-test/src/lib.rs | 2 +- 27 files changed, 12 insertions(+), 12 deletions(-) rename crates/core_arch/src/{acle => arm_shared}/barrier/common.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/barrier/cp15.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/barrier/mod.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/barrier/not_mclass.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/barrier/v8.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/crc.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/crypto.rs (99%) rename crates/core_arch/src/{acle => arm_shared}/ex.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/hints.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/mod.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/neon/generated.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/neon/load_tests.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/neon/mod.rs (99%) rename crates/core_arch/src/{acle => arm_shared}/neon/shift_and_insert_tests.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/neon/table_lookup_tests.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/registers/aarch32.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/registers/mod.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/registers/v6m.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/registers/v7m.rs (100%) rename crates/core_arch/src/{acle => arm_shared}/test_support.rs (100%) diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index f6d0fc9dbd..dbb1fd8419 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -21,7 +21,7 @@ pub use self::crc::*; mod prefetch; pub use self::prefetch::*; -pub use super::acle::*; +pub use super::arm_shared::*; #[cfg(test)] use stdarch_test::assert_instr; diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index dd88afe852..7ae7fd7d0f 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -2812,7 +2812,7 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x #[cfg(test)] mod tests { use crate::core_arch::aarch64::test_support::*; - use crate::core_arch::acle::test_support::*; + use crate::core_arch::arm_shared::test_support::*; use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*}; use std::mem::transmute; use stdarch_test::simd_test; @@ -4261,13 +4261,13 @@ mod tests { #[cfg(test)] #[cfg(target_endian = "little")] -#[path = "../../acle/neon/table_lookup_tests.rs"] +#[path = "../../arm_shared/neon/table_lookup_tests.rs"] mod table_lookup_tests; #[cfg(test)] -#[path = "../../acle/neon/shift_and_insert_tests.rs"] +#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"] mod shift_and_insert_tests; #[cfg(test)] -#[path = "../../acle/neon/load_tests.rs"] +#[path = "../../arm_shared/neon/load_tests.rs"] mod load_tests; diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 5e36d53e4f..69b898e464 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -68,7 +68,7 @@ mod v7; #[cfg(any(target_feature = "v7", doc))] pub use self::v7::*; -pub use crate::core_arch::acle::*; +pub use crate::core_arch::arm_shared::*; #[cfg(test)] use stdarch_test::assert_instr; diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs index 15fda87048..6bb1d0bfd4 100644 --- a/crates/core_arch/src/arm/neon.rs +++ b/crates/core_arch/src/arm/neon.rs @@ -1,4 +1,4 @@ -use crate::core_arch::acle::neon::*; +use crate::core_arch::arm_shared::neon::*; use crate::core_arch::simd::{f32x4, i32x4, u32x4}; use crate::core_arch::simd_llvm::*; use crate::mem::transmute; diff --git a/crates/core_arch/src/acle/barrier/common.rs b/crates/core_arch/src/arm_shared/barrier/common.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/common.rs rename to crates/core_arch/src/arm_shared/barrier/common.rs diff --git a/crates/core_arch/src/acle/barrier/cp15.rs b/crates/core_arch/src/arm_shared/barrier/cp15.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/cp15.rs rename to crates/core_arch/src/arm_shared/barrier/cp15.rs diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/arm_shared/barrier/mod.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/mod.rs rename to crates/core_arch/src/arm_shared/barrier/mod.rs diff --git a/crates/core_arch/src/acle/barrier/not_mclass.rs b/crates/core_arch/src/arm_shared/barrier/not_mclass.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/not_mclass.rs rename to crates/core_arch/src/arm_shared/barrier/not_mclass.rs diff --git a/crates/core_arch/src/acle/barrier/v8.rs b/crates/core_arch/src/arm_shared/barrier/v8.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/v8.rs rename to crates/core_arch/src/arm_shared/barrier/v8.rs diff --git a/crates/core_arch/src/acle/crc.rs b/crates/core_arch/src/arm_shared/crc.rs similarity index 100% rename from crates/core_arch/src/acle/crc.rs rename to crates/core_arch/src/arm_shared/crc.rs diff --git a/crates/core_arch/src/acle/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs similarity index 99% rename from crates/core_arch/src/acle/crypto.rs rename to crates/core_arch/src/arm_shared/crypto.rs index 111d00a77d..02f8c1d0c1 100644 --- a/crates/core_arch/src/acle/crypto.rs +++ b/crates/core_arch/src/arm_shared/crypto.rs @@ -1,4 +1,4 @@ -use crate::core_arch::acle::{uint32x4_t, uint8x16_t}; +use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t}; #[allow(improper_ctypes)] extern "C" { diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/arm_shared/ex.rs similarity index 100% rename from crates/core_arch/src/acle/ex.rs rename to crates/core_arch/src/arm_shared/ex.rs diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/arm_shared/hints.rs similarity index 100% rename from crates/core_arch/src/acle/hints.rs rename to crates/core_arch/src/arm_shared/hints.rs diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/arm_shared/mod.rs similarity index 100% rename from crates/core_arch/src/acle/mod.rs rename to crates/core_arch/src/arm_shared/mod.rs diff --git a/crates/core_arch/src/acle/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs similarity index 100% rename from crates/core_arch/src/acle/neon/generated.rs rename to crates/core_arch/src/arm_shared/neon/generated.rs diff --git a/crates/core_arch/src/acle/neon/load_tests.rs b/crates/core_arch/src/arm_shared/neon/load_tests.rs similarity index 100% rename from crates/core_arch/src/acle/neon/load_tests.rs rename to crates/core_arch/src/arm_shared/neon/load_tests.rs diff --git a/crates/core_arch/src/acle/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs similarity index 99% rename from crates/core_arch/src/acle/neon/mod.rs rename to crates/core_arch/src/arm_shared/neon/mod.rs index 7e2ed97708..3c87862166 100644 --- a/crates/core_arch/src/acle/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -4275,9 +4275,9 @@ mod tests { use super::*; #[cfg(target_arch = "aarch64")] use crate::core_arch::aarch64::*; - use crate::core_arch::acle::test_support::*; #[cfg(target_arch = "arm")] use crate::core_arch::arm::*; + use crate::core_arch::arm_shared::test_support::*; use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/acle/neon/shift_and_insert_tests.rs b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs similarity index 100% rename from crates/core_arch/src/acle/neon/shift_and_insert_tests.rs rename to crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs diff --git a/crates/core_arch/src/acle/neon/table_lookup_tests.rs b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs similarity index 100% rename from crates/core_arch/src/acle/neon/table_lookup_tests.rs rename to crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs diff --git a/crates/core_arch/src/acle/registers/aarch32.rs b/crates/core_arch/src/arm_shared/registers/aarch32.rs similarity index 100% rename from crates/core_arch/src/acle/registers/aarch32.rs rename to crates/core_arch/src/arm_shared/registers/aarch32.rs diff --git a/crates/core_arch/src/acle/registers/mod.rs b/crates/core_arch/src/arm_shared/registers/mod.rs similarity index 100% rename from crates/core_arch/src/acle/registers/mod.rs rename to crates/core_arch/src/arm_shared/registers/mod.rs diff --git a/crates/core_arch/src/acle/registers/v6m.rs b/crates/core_arch/src/arm_shared/registers/v6m.rs similarity index 100% rename from crates/core_arch/src/acle/registers/v6m.rs rename to crates/core_arch/src/arm_shared/registers/v6m.rs diff --git a/crates/core_arch/src/acle/registers/v7m.rs b/crates/core_arch/src/arm_shared/registers/v7m.rs similarity index 100% rename from crates/core_arch/src/acle/registers/v7m.rs rename to crates/core_arch/src/arm_shared/registers/v7m.rs diff --git a/crates/core_arch/src/acle/test_support.rs b/crates/core_arch/src/arm_shared/test_support.rs similarity index 100% rename from crates/core_arch/src/acle/test_support.rs rename to crates/core_arch/src/arm_shared/test_support.rs diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 5b25687c3d..c5ff21c428 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -5,7 +5,7 @@ mod macros; #[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))] -mod acle; +mod arm_shared; mod simd; diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index c37986c6c5..1babd33744 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -2253,7 +2253,7 @@ mod test { let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) .join("src") - .join("acle") + .join("arm_shared") .join("neon"); std::fs::create_dir_all(&arm_out_path)?; diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 70797e17c8..408d7190e9 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -121,7 +121,7 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // in some cases exceed the limit. "cvtpi2ps" => 25, - // core_arch/src/acle/simd32 + // core_arch/src/arm_shared/simd32 "usad8" => 27, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, From 213aaa94a695aa0f7ed951e222cc2ed0371d14ce Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 15:31:14 +0800 Subject: [PATCH 27/37] Move __dbg intrinsic to 'arm' (from arm_shared) --- crates/core_arch/src/arm/mod.rs | 20 +++++++++++++++++++- crates/core_arch/src/arm_shared/hints.rs | 21 --------------------- crates/stdarch-verify/tests/arm.rs | 2 ++ 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 69b898e464..435bcb77d3 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -79,9 +79,27 @@ pub(crate) mod neon; pub use neon::*; /// Generates the trap instruction `UDF` -#[cfg(target_arch = "arm")] #[cfg_attr(test, assert_instr(udf))] #[inline] pub unsafe fn udf() -> ! { crate::intrinsics::abort() } + +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(any(target_feature = "v7", doc))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __dbg() { + static_assert_imm4!(IMM4); + dbg(IMM4); +} diff --git a/crates/core_arch/src/arm_shared/hints.rs b/crates/core_arch/src/arm_shared/hints.rs index 3f6e48bbfb..160a7ab207 100644 --- a/crates/core_arch/src/arm_shared/hints.rs +++ b/crates/core_arch/src/arm_shared/hints.rs @@ -69,27 +69,6 @@ pub unsafe fn __yield() { hint(HINT_YIELD); } -/// Generates a DBG instruction. -/// -/// This provides a hint to debugging and related systems. The argument must be -/// a constant integer from 0 to 15 inclusive. See implementation documentation -/// for the effect (if any) of this instruction and the meaning of the -/// argument. This is available only when compliling for AArch32. -// Section 10.1 of ACLE says that the supported arches are: 7, 7-M -// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and -// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A -// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 -// "Thumb instruction set support" -#[cfg(target_feature = "v7")] -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __dbg() { - static_assert_imm4!(IMM4); - dbg(IMM4); -} - /// Generates an unspecified no-op instruction. /// /// Note that not all architectures provide a distinguished NOP instruction. On diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index ed6b311a38..6c26c280f7 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -448,6 +448,7 @@ fn verify_all_signatures() { "vpadalq_u8", "vpadalq_u16", "vpadalq_u32", + "__dbg", ]; if !skip.contains(&rust.name) { println!( @@ -479,6 +480,7 @@ fn verify_all_signatures() { "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64", + "__dbg", ]; let arm = match map.get(rust.name) { Some(i) => i, From bb176aa2fd5c3dab77d4423b9dd039754aec35b3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 15:48:35 +0800 Subject: [PATCH 28/37] Add target_arch = "aarch64" and doc configs for each function individually I thought it would/should have worked to put it on the top of the module then, at least the target_arch one --- crates/core_arch/src/arm_shared/ex.rs | 30 ++++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/arm_shared/ex.rs b/crates/core_arch/src/arm_shared/ex.rs index 0426c65186..a812cc4414 100644 --- a/crates/core_arch/src/arm_shared/ex.rs +++ b/crates/core_arch/src/arm_shared/ex.rs @@ -5,9 +5,11 @@ // Not supported: v5, v6-M // NOTE: there's no dedicated CLREX instruction in v6 ( u8 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i8"] @@ -36,9 +40,11 @@ pub unsafe fn __ldrexb(p: *const u8) -> u8 { /// Executes a exclusive LDR instruction for 16 bit value. // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(target_feature = "aarch64")] +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __ldrexh(p: *const u16) -> u16 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i16"] @@ -51,9 +57,11 @@ pub unsafe fn __ldrexh(p: *const u16) -> u16 { /// Executes a exclusive LDR instruction for 32 bit value. // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M +#[cfg(target_feature = "aarch64")] #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M + doc ))] pub unsafe fn __ldrex(p: *const u32) -> u32 { extern "C" { @@ -69,9 +77,11 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // supported: v6K, v7-M, v7-A, v7-R // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(target_feature = "aarch64")] +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i8"] @@ -86,9 +96,11 @@ pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(target_feature = "aarch64")] +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i16"] @@ -103,9 +115,11 @@ pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M +#[cfg(target_feature = "aarch64")] #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M + doc ))] pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 { extern "C" { From 230f0bef1cc808b3ebd3c5697c57a91c4796b943 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 16:01:43 +0800 Subject: [PATCH 29/37] Fix build? --- crates/core_arch/src/arm/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 435bcb77d3..d978879f4b 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -97,6 +97,8 @@ pub unsafe fn udf() -> ! { // and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 // "Thumb instruction set support" #[cfg(any(target_feature = "v7", doc))] +#[cfg(any(target_arch = "arm", doc))] +#[doc(cfg(target_arch = "arm"))] #[inline(always)] #[rustc_legacy_const_generics(0)] pub unsafe fn __dbg() { From dc373ed210a9870bf76aae03efbe297827277033 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 16:20:34 +0800 Subject: [PATCH 30/37] Split aarch64 parts in arm/armclang.rs into aarch64/armclang.rs --- crates/core_arch/src/aarch64/armclang.rs | 24 +++++++++++++++++++++++ crates/core_arch/src/aarch64/mod.rs | 4 ++++ crates/core_arch/src/arm/armclang.rs | 25 ------------------------ 3 files changed, 28 insertions(+), 25 deletions(-) create mode 100644 crates/core_arch/src/aarch64/armclang.rs diff --git a/crates/core_arch/src/aarch64/armclang.rs b/crates/core_arch/src/aarch64/armclang.rs new file mode 100644 index 0000000000..3afa241307 --- /dev/null +++ b/crates/core_arch/src/aarch64/armclang.rs @@ -0,0 +1,24 @@ +//! ARM compiler specific intrinsics +//! +//! # References +//! +//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref] +//! +//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Inserts a breakpoint instruction. +/// +/// `VAL` is a compile-time constant integer in range `[0, 65535]`. +/// +/// The breakpoint instruction inserted is `BRK` on A64. +#[cfg(all(target_arch = "aarch64", not(doc)))] +#[cfg_attr(test, assert_instr(brk, VAL = 0))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __breakpoint() { + static_assert_imm16!(VAL); + asm!("brk {}", const VAL); +} diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index dbb1fd8419..0411fc1068 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -23,6 +23,10 @@ pub use self::prefetch::*; pub use super::arm_shared::*; +mod armclang; + +pub use self::armclang::*; + #[cfg(test)] use stdarch_test::assert_instr; diff --git a/crates/core_arch/src/arm/armclang.rs b/crates/core_arch/src/arm/armclang.rs index aa4bab49f1..41d0138165 100644 --- a/crates/core_arch/src/arm/armclang.rs +++ b/crates/core_arch/src/arm/armclang.rs @@ -1,28 +1,3 @@ -//! ARM compiler specific intrinsics -//! -//! # References -//! -//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref] -//! -//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 - -#[cfg(test)] -use stdarch_test::assert_instr; - -/// Inserts a breakpoint instruction. -/// -/// `VAL` is a compile-time constant integer in range `[0, 65535]`. -/// -/// The breakpoint instruction inserted is `BRK` on A64. -#[cfg(all(target_arch = "aarch64", not(doc)))] -#[cfg_attr(test, assert_instr(brk, VAL = 0))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __breakpoint() { - static_assert_imm16!(VAL); - asm!("brk {}", const VAL); -} - /// Inserts a breakpoint instruction. /// /// `VAL` is a compile-time constant integer in range `[0, 255]`. From 52eaa8e7af57512c8e7f2889bd615ec99ac4d53c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 16:24:28 +0800 Subject: [PATCH 31/37] Revert "Move __dbg intrinsic to 'arm' (from arm_shared)" This reverts commit 213aaa94a695aa0f7ed951e222cc2ed0371d14ce. --- crates/core_arch/src/arm/mod.rs | 22 +--------------------- crates/core_arch/src/arm_shared/hints.rs | 21 +++++++++++++++++++++ crates/stdarch-verify/tests/arm.rs | 2 -- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index d978879f4b..69b898e464 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -79,29 +79,9 @@ pub(crate) mod neon; pub use neon::*; /// Generates the trap instruction `UDF` +#[cfg(target_arch = "arm")] #[cfg_attr(test, assert_instr(udf))] #[inline] pub unsafe fn udf() -> ! { crate::intrinsics::abort() } - -/// Generates a DBG instruction. -/// -/// This provides a hint to debugging and related systems. The argument must be -/// a constant integer from 0 to 15 inclusive. See implementation documentation -/// for the effect (if any) of this instruction and the meaning of the -/// argument. This is available only when compliling for AArch32. -// Section 10.1 of ACLE says that the supported arches are: 7, 7-M -// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and -// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A -// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 -// "Thumb instruction set support" -#[cfg(any(target_feature = "v7", doc))] -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __dbg() { - static_assert_imm4!(IMM4); - dbg(IMM4); -} diff --git a/crates/core_arch/src/arm_shared/hints.rs b/crates/core_arch/src/arm_shared/hints.rs index 160a7ab207..3f6e48bbfb 100644 --- a/crates/core_arch/src/arm_shared/hints.rs +++ b/crates/core_arch/src/arm_shared/hints.rs @@ -69,6 +69,27 @@ pub unsafe fn __yield() { hint(HINT_YIELD); } +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(target_feature = "v7")] +#[cfg(any(target_arch = "arm", doc))] +#[doc(cfg(target_arch = "arm"))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __dbg() { + static_assert_imm4!(IMM4); + dbg(IMM4); +} + /// Generates an unspecified no-op instruction. /// /// Note that not all architectures provide a distinguished NOP instruction. On diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 6c26c280f7..ed6b311a38 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -448,7 +448,6 @@ fn verify_all_signatures() { "vpadalq_u8", "vpadalq_u16", "vpadalq_u32", - "__dbg", ]; if !skip.contains(&rust.name) { println!( @@ -480,7 +479,6 @@ fn verify_all_signatures() { "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64", - "__dbg", ]; let arm = match map.get(rust.name) { Some(i) => i, From 58ced634dbaf4f5a8378a9825329842238ff4207 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Apr 2021 16:29:32 +0800 Subject: [PATCH 32/37] Fix build --- crates/core_arch/src/arm/armclang.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/core_arch/src/arm/armclang.rs b/crates/core_arch/src/arm/armclang.rs index 41d0138165..def60afefc 100644 --- a/crates/core_arch/src/arm/armclang.rs +++ b/crates/core_arch/src/arm/armclang.rs @@ -1,3 +1,14 @@ +//! ARM compiler specific intrinsics +//! +//! # References +//! +//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref] +//! +//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 + +#[cfg(test)] +use stdarch_test::assert_instr; + /// Inserts a breakpoint instruction. /// /// `VAL` is a compile-time constant integer in range `[0, 255]`. From 90ccef5578998386d8819404531317c9fb5b6b34 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 17 Apr 2021 11:56:47 +0100 Subject: [PATCH 33/37] More fixes --- crates/core_arch/src/aarch64/armclang.rs | 1 - crates/core_arch/src/arm/armclang.rs | 2 -- .../core_arch/src/{arm_shared => arm}/ex.rs | 6 ----- crates/core_arch/src/arm/mod.rs | 27 +++++++++++++++++++ crates/core_arch/src/arm_shared/hints.rs | 25 ----------------- crates/core_arch/src/arm_shared/mod.rs | 6 ----- 6 files changed, 27 insertions(+), 40 deletions(-) rename crates/core_arch/src/{arm_shared => arm}/ex.rs (94%) diff --git a/crates/core_arch/src/aarch64/armclang.rs b/crates/core_arch/src/aarch64/armclang.rs index 3afa241307..54847be7b1 100644 --- a/crates/core_arch/src/aarch64/armclang.rs +++ b/crates/core_arch/src/aarch64/armclang.rs @@ -14,7 +14,6 @@ use stdarch_test::assert_instr; /// `VAL` is a compile-time constant integer in range `[0, 65535]`. /// /// The breakpoint instruction inserted is `BRK` on A64. -#[cfg(all(target_arch = "aarch64", not(doc)))] #[cfg_attr(test, assert_instr(brk, VAL = 0))] #[inline(always)] #[rustc_legacy_const_generics(0)] diff --git a/crates/core_arch/src/arm/armclang.rs b/crates/core_arch/src/arm/armclang.rs index def60afefc..6b332e82c0 100644 --- a/crates/core_arch/src/arm/armclang.rs +++ b/crates/core_arch/src/arm/armclang.rs @@ -26,8 +26,6 @@ use stdarch_test::assert_instr; /// The current implementation only accepts values in range `[0, 255]`. /// /// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] #[cfg_attr(test, assert_instr(bkpt, VAL = 0))] #[inline(always)] #[rustc_legacy_const_generics(0)] diff --git a/crates/core_arch/src/arm_shared/ex.rs b/crates/core_arch/src/arm/ex.rs similarity index 94% rename from crates/core_arch/src/arm_shared/ex.rs rename to crates/core_arch/src/arm/ex.rs index a812cc4414..b9d5047a05 100644 --- a/crates/core_arch/src/arm_shared/ex.rs +++ b/crates/core_arch/src/arm/ex.rs @@ -5,7 +5,6 @@ // Not supported: v5, v6-M // NOTE: there's no dedicated CLREX instruction in v6 ( u8 { /// Executes a exclusive LDR instruction for 16 bit value. // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M -#[cfg(target_feature = "aarch64")] #[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M doc @@ -57,7 +54,6 @@ pub unsafe fn __ldrexh(p: *const u16) -> u16 { /// Executes a exclusive LDR instruction for 32 bit value. // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M -#[cfg(target_feature = "aarch64")] #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M @@ -77,7 +73,6 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // supported: v6K, v7-M, v7-A, v7-R // Not supported: v5, v6, v6-M -#[cfg(target_feature = "aarch64")] #[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M doc @@ -115,7 +110,6 @@ pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M -#[cfg(target_feature = "aarch64")] #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 69b898e464..940f95d31a 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -68,6 +68,9 @@ mod v7; #[cfg(any(target_feature = "v7", doc))] pub use self::v7::*; +mod ex; +pub use self::ex::*; + pub use crate::core_arch::arm_shared::*; #[cfg(test)] @@ -85,3 +88,27 @@ pub use neon::*; pub unsafe fn udf() -> ! { crate::intrinsics::abort() } + +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(any(target_feature = "v7", doc))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __dbg() { + static_assert_imm4!(IMM4); + dbg(IMM4); +} + +extern "C" { + #[link_name = "llvm.arm.dbg"] + fn dbg(_: i32); +} diff --git a/crates/core_arch/src/arm_shared/hints.rs b/crates/core_arch/src/arm_shared/hints.rs index 3f6e48bbfb..3145cde8d5 100644 --- a/crates/core_arch/src/arm_shared/hints.rs +++ b/crates/core_arch/src/arm_shared/hints.rs @@ -69,27 +69,6 @@ pub unsafe fn __yield() { hint(HINT_YIELD); } -/// Generates a DBG instruction. -/// -/// This provides a hint to debugging and related systems. The argument must be -/// a constant integer from 0 to 15 inclusive. See implementation documentation -/// for the effect (if any) of this instruction and the meaning of the -/// argument. This is available only when compliling for AArch32. -// Section 10.1 of ACLE says that the supported arches are: 7, 7-M -// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and -// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A -// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 -// "Thumb instruction set support" -#[cfg(target_feature = "v7")] -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __dbg() { - static_assert_imm4!(IMM4); - dbg(IMM4); -} - /// Generates an unspecified no-op instruction. /// /// Note that not all architectures provide a distinguished NOP instruction. On @@ -105,10 +84,6 @@ extern "C" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] fn hint(_: i32); - - #[cfg(target_arch = "arm")] - #[link_name = "llvm.arm.dbg"] - fn dbg(_: i32); } // from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td diff --git a/crates/core_arch/src/arm_shared/mod.rs b/crates/core_arch/src/arm_shared/mod.rs index 5e630f8fbe..4c8d19854e 100644 --- a/crates/core_arch/src/arm_shared/mod.rs +++ b/crates/core_arch/src/arm_shared/mod.rs @@ -57,17 +57,11 @@ mod barrier; pub use self::barrier::*; mod hints; - pub use self::hints::*; mod registers; - pub use self::registers::*; -mod ex; - -pub use self::ex::*; - #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] mod crc; #[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] From e1b481022b3ce10c35e12ebd47a4d342975653be Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 17 Apr 2021 12:16:18 +0100 Subject: [PATCH 34/37] More fixes --- crates/core_arch/src/aarch64/neon/mod.rs | 2 +- crates/core_arch/src/aarch64/test_support.rs | 2 +- crates/core_arch/src/arm/mod.rs | 1 - crates/core_arch/src/arm_shared/crc.rs | 2 +- crates/core_arch/src/arm_shared/crypto.rs | 2 +- crates/core_arch/src/arm_shared/test_support.rs | 8 +++++++- crates/core_arch/src/mod.rs | 6 +++--- 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 7ae7fd7d0f..71e6b83a63 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -10,7 +10,7 @@ pub use self::generated::*; // FIXME: replace neon with asimd use crate::{ - core_arch::{arm::*, simd::*, simd_llvm::*}, + core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, mem::{transmute, zeroed}, }; diff --git a/crates/core_arch/src/aarch64/test_support.rs b/crates/core_arch/src/aarch64/test_support.rs index e08c39a545..9c5994b150 100644 --- a/crates/core_arch/src/aarch64/test_support.rs +++ b/crates/core_arch/src/aarch64/test_support.rs @@ -1,4 +1,4 @@ -use crate::core_arch::{aarch64::neon::*, arm::*, simd::*}; +use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*}; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; macro_rules! V_u64 { diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index 940f95d31a..d6b12b8292 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -7,7 +7,6 @@ //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics mod armclang; - pub use self::armclang::*; mod v6; diff --git a/crates/core_arch/src/arm_shared/crc.rs b/crates/core_arch/src/arm_shared/crc.rs index ffce20fe22..b1cfbb381b 100644 --- a/crates/core_arch/src/arm_shared/crc.rs +++ b/crates/core_arch/src/arm_shared/crc.rs @@ -79,7 +79,7 @@ pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 { #[cfg(test)] mod tests { - use crate::core_arch::{arm::*, simd::*}; + use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/arm_shared/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs index 02f8c1d0c1..b4d5b2978f 100644 --- a/crates/core_arch/src/arm_shared/crypto.rs +++ b/crates/core_arch/src/arm_shared/crypto.rs @@ -192,7 +192,7 @@ pub unsafe fn vsha256su1q_u32( #[cfg(test)] mod tests { use super::*; - use crate::core_arch::{arm::*, simd::*}; + use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/arm_shared/test_support.rs b/crates/core_arch/src/arm_shared/test_support.rs index 337a270e40..ff752f25b3 100644 --- a/crates/core_arch/src/arm_shared/test_support.rs +++ b/crates/core_arch/src/arm_shared/test_support.rs @@ -1,4 +1,10 @@ -use crate::core_arch::{arm::*, simd::*}; +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; macro_rules! V_u8 { diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index c5ff21c428..4e6dcb7dc4 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -53,7 +53,7 @@ pub mod arch { #[doc(cfg(target_arch = "aarch64"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod aarch64 { - pub use crate::core_arch::{aarch64::*, arm::*}; + pub use crate::core_arch::aarch64::*; } /// Platform-specific intrinsics for the `wasm32` platform. @@ -234,8 +234,8 @@ mod x86_64; #[cfg(any(target_arch = "aarch64", doc))] #[doc(cfg(target_arch = "aarch64"))] mod aarch64; -#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))] -#[doc(cfg(any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(any(target_arch = "arm", doc))] +#[doc(cfg(any(target_arch = "arm")))] mod arm; #[cfg(any(target_arch = "wasm32", doc))] From 749ee371d7b04d3b81a4f791478407eb4035347d Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 17 Apr 2021 12:23:16 +0100 Subject: [PATCH 35/37] Ignore some intrinsic tests --- crates/stdarch-verify/tests/arm.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index ed6b311a38..971893f205 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -448,6 +448,14 @@ fn verify_all_signatures() { "vpadalq_u8", "vpadalq_u16", "vpadalq_u32", + "__ldrex", + "__strex", + "__ldrexb", + "__strexb", + "__ldrexh", + "__strexh", + "__clrex", + "__dbg", ]; if !skip.contains(&rust.name) { println!( From de7ce7f1f60f3c70803c5492274f128a5edeee5c Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 17 Apr 2021 12:33:42 +0100 Subject: [PATCH 36/37] More fixes --- crates/stdarch-verify/tests/arm.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 971893f205..f946991cbb 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -503,6 +503,7 @@ fn verify_all_signatures() { && !rust.file.ends_with("v7.rs\"") && !rust.file.ends_with("v8.rs\"") && !rust.file.ends_with("tme.rs\"") + && !rust.file.ends_with("ex.rs\"") && !skip_intrinsic_verify.contains(&rust.name) { println!( From 7abb5d9ad808da2743dcbfc42e670dfd71d24ece Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Sat, 17 Apr 2021 12:39:27 +0100 Subject: [PATCH 37/37] More fixes --- crates/stdarch-verify/tests/arm.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index f946991cbb..8c2cf47855 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -487,6 +487,7 @@ fn verify_all_signatures() { "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64", + "__dbg", ]; let arm = match map.get(rust.name) { Some(i) => i,