Skip to content

Commit daae8f8

Browse files
authored
add neon instruction vmaxnm_f* vpmaxnm_f* vminnm_f* vpminnm_f* (#1105)
1 parent 15babf5 commit daae8f8

File tree

4 files changed

+416
-1
lines changed

4 files changed

+416
-1
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2538,6 +2538,71 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25382538
vmaxq_f64_(a, b)
25392539
}
25402540

2541+
/// Floating-point Maximun Number (vector)
2542+
#[inline]
2543+
#[target_feature(enable = "neon")]
2544+
#[cfg_attr(test, assert_instr(fmaxnm))]
2545+
pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
2546+
#[allow(improper_ctypes)]
2547+
extern "C" {
2548+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
2549+
fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
2550+
}
2551+
vmaxnm_f64_(a, b)
2552+
}
2553+
2554+
/// Floating-point Maximun Number (vector)
2555+
#[inline]
2556+
#[target_feature(enable = "neon")]
2557+
#[cfg_attr(test, assert_instr(fmaxnm))]
2558+
pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2559+
#[allow(improper_ctypes)]
2560+
extern "C" {
2561+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
2562+
fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2563+
}
2564+
vmaxnmq_f64_(a, b)
2565+
}
2566+
2567+
/// Floating-point Maximum Number Pairwise (vector).
2568+
#[inline]
2569+
#[target_feature(enable = "neon")]
2570+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2571+
pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
2572+
#[allow(improper_ctypes)]
2573+
extern "C" {
2574+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
2575+
fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
2576+
}
2577+
vpmaxnm_f32_(a, b)
2578+
}
2579+
2580+
/// Floating-point Maximum Number Pairwise (vector).
2581+
#[inline]
2582+
#[target_feature(enable = "neon")]
2583+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2584+
pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2585+
#[allow(improper_ctypes)]
2586+
extern "C" {
2587+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
2588+
fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2589+
}
2590+
vpmaxnmq_f64_(a, b)
2591+
}
2592+
2593+
/// Floating-point Maximum Number Pairwise (vector).
2594+
#[inline]
2595+
#[target_feature(enable = "neon")]
2596+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2597+
pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2598+
#[allow(improper_ctypes)]
2599+
extern "C" {
2600+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
2601+
fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
2602+
}
2603+
vpmaxnmq_f32_(a, b)
2604+
}
2605+
25412606
/// Minimum (vector)
25422607
#[inline]
25432608
#[target_feature(enable = "neon")]
@@ -2564,6 +2629,71 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25642629
vminq_f64_(a, b)
25652630
}
25662631

2632+
/// Floating-point Minimun Number (vector)
2633+
#[inline]
2634+
#[target_feature(enable = "neon")]
2635+
#[cfg_attr(test, assert_instr(fminnm))]
2636+
pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
2637+
#[allow(improper_ctypes)]
2638+
extern "C" {
2639+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
2640+
fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
2641+
}
2642+
vminnm_f64_(a, b)
2643+
}
2644+
2645+
/// Floating-point Minimun Number (vector)
2646+
#[inline]
2647+
#[target_feature(enable = "neon")]
2648+
#[cfg_attr(test, assert_instr(fminnm))]
2649+
pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2650+
#[allow(improper_ctypes)]
2651+
extern "C" {
2652+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
2653+
fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2654+
}
2655+
vminnmq_f64_(a, b)
2656+
}
2657+
2658+
/// Floating-point Minimum Number Pairwise (vector).
2659+
#[inline]
2660+
#[target_feature(enable = "neon")]
2661+
#[cfg_attr(test, assert_instr(fminnmp))]
2662+
pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
2663+
#[allow(improper_ctypes)]
2664+
extern "C" {
2665+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
2666+
fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
2667+
}
2668+
vpminnm_f32_(a, b)
2669+
}
2670+
2671+
/// Floating-point Minimum Number Pairwise (vector).
2672+
#[inline]
2673+
#[target_feature(enable = "neon")]
2674+
#[cfg_attr(test, assert_instr(fminnmp))]
2675+
pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2676+
#[allow(improper_ctypes)]
2677+
extern "C" {
2678+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
2679+
fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2680+
}
2681+
vpminnmq_f64_(a, b)
2682+
}
2683+
2684+
/// Floating-point Minimum Number Pairwise (vector).
2685+
#[inline]
2686+
#[target_feature(enable = "neon")]
2687+
#[cfg_attr(test, assert_instr(fminnmp))]
2688+
pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2689+
#[allow(improper_ctypes)]
2690+
extern "C" {
2691+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
2692+
fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
2693+
}
2694+
vpminnmq_f32_(a, b)
2695+
}
2696+
25672697
/// Calculates the square root of each lane.
25682698
#[inline]
25692699
#[target_feature(enable = "neon")]
@@ -6347,6 +6477,51 @@ mod test {
63476477
assert_eq!(r, e);
63486478
}
63496479

6480+
#[simd_test(enable = "neon")]
6481+
unsafe fn test_vmaxnm_f64() {
6482+
let a: f64 = 1.0;
6483+
let b: f64 = 8.0;
6484+
let e: f64 = 8.0;
6485+
let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
6486+
assert_eq!(r, e);
6487+
}
6488+
6489+
#[simd_test(enable = "neon")]
6490+
unsafe fn test_vmaxnmq_f64() {
6491+
let a: f64x2 = f64x2::new(1.0, 2.0);
6492+
let b: f64x2 = f64x2::new(8.0, 16.0);
6493+
let e: f64x2 = f64x2::new(8.0, 16.0);
6494+
let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
6495+
assert_eq!(r, e);
6496+
}
6497+
6498+
#[simd_test(enable = "neon")]
6499+
unsafe fn test_vpmaxnm_f32() {
6500+
let a: f32x2 = f32x2::new(1.0, 2.0);
6501+
let b: f32x2 = f32x2::new(6.0, -3.0);
6502+
let e: f32x2 = f32x2::new(2.0, 6.0);
6503+
let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
6504+
assert_eq!(r, e);
6505+
}
6506+
6507+
#[simd_test(enable = "neon")]
6508+
unsafe fn test_vpmaxnmq_f64() {
6509+
let a: f64x2 = f64x2::new(1.0, 2.0);
6510+
let b: f64x2 = f64x2::new(6.0, -3.0);
6511+
let e: f64x2 = f64x2::new(2.0, 6.0);
6512+
let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
6513+
assert_eq!(r, e);
6514+
}
6515+
6516+
#[simd_test(enable = "neon")]
6517+
unsafe fn test_vpmaxnmq_f32() {
6518+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
6519+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
6520+
let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
6521+
let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
6522+
assert_eq!(r, e);
6523+
}
6524+
63506525
#[simd_test(enable = "neon")]
63516526
unsafe fn test_vmin_f64() {
63526527
let a: f64 = 1.0;
@@ -6365,6 +6540,51 @@ mod test {
63656540
assert_eq!(r, e);
63666541
}
63676542

6543+
#[simd_test(enable = "neon")]
6544+
unsafe fn test_vminnm_f64() {
6545+
let a: f64 = 1.0;
6546+
let b: f64 = 8.0;
6547+
let e: f64 = 1.0;
6548+
let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
6549+
assert_eq!(r, e);
6550+
}
6551+
6552+
#[simd_test(enable = "neon")]
6553+
unsafe fn test_vminnmq_f64() {
6554+
let a: f64x2 = f64x2::new(1.0, 2.0);
6555+
let b: f64x2 = f64x2::new(8.0, 16.0);
6556+
let e: f64x2 = f64x2::new(1.0, 2.0);
6557+
let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
6558+
assert_eq!(r, e);
6559+
}
6560+
6561+
#[simd_test(enable = "neon")]
6562+
unsafe fn test_vpminnm_f32() {
6563+
let a: f32x2 = f32x2::new(1.0, 2.0);
6564+
let b: f32x2 = f32x2::new(6.0, -3.0);
6565+
let e: f32x2 = f32x2::new(1.0, -3.0);
6566+
let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
6567+
assert_eq!(r, e);
6568+
}
6569+
6570+
#[simd_test(enable = "neon")]
6571+
unsafe fn test_vpminnmq_f64() {
6572+
let a: f64x2 = f64x2::new(1.0, 2.0);
6573+
let b: f64x2 = f64x2::new(6.0, -3.0);
6574+
let e: f64x2 = f64x2::new(1.0, -3.0);
6575+
let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
6576+
assert_eq!(r, e);
6577+
}
6578+
6579+
#[simd_test(enable = "neon")]
6580+
unsafe fn test_vpminnmq_f32() {
6581+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
6582+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
6583+
let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
6584+
let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
6585+
assert_eq!(r, e);
6586+
}
6587+
63686588
#[simd_test(enable = "neon")]
63696589
unsafe fn test_vsqrt_f32() {
63706590
let a: f32x2 = f32x2::new(4.0, 9.0);

crates/core_arch/src/arm/neon/generated.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4906,6 +4906,38 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
49064906
vmaxq_f32_(a, b)
49074907
}
49084908

4909+
/// Floating-point Maximun Number (vector)
4910+
#[inline]
4911+
#[target_feature(enable = "neon")]
4912+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4913+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
4914+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
4915+
pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
4916+
#[allow(improper_ctypes)]
4917+
extern "C" {
4918+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
4919+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
4920+
fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
4921+
}
4922+
vmaxnm_f32_(a, b)
4923+
}
4924+
4925+
/// Floating-point Maximun Number (vector)
4926+
#[inline]
4927+
#[target_feature(enable = "neon")]
4928+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4929+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
4930+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
4931+
pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
4932+
#[allow(improper_ctypes)]
4933+
extern "C" {
4934+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
4935+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
4936+
fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
4937+
}
4938+
vmaxnmq_f32_(a, b)
4939+
}
4940+
49094941
/// Minimum (vector)
49104942
#[inline]
49114943
#[target_feature(enable = "neon")]
@@ -5130,6 +5162,38 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
51305162
vminq_f32_(a, b)
51315163
}
51325164

5165+
/// Floating-point Minimun Number (vector)
5166+
#[inline]
5167+
#[target_feature(enable = "neon")]
5168+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
5169+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
5170+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
5171+
pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
5172+
#[allow(improper_ctypes)]
5173+
extern "C" {
5174+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
5175+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
5176+
fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
5177+
}
5178+
vminnm_f32_(a, b)
5179+
}
5180+
5181+
/// Floating-point Minimun Number (vector)
5182+
#[inline]
5183+
#[target_feature(enable = "neon")]
5184+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
5185+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
5186+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
5187+
pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
5188+
#[allow(improper_ctypes)]
5189+
extern "C" {
5190+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
5191+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
5192+
fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
5193+
}
5194+
vminnmq_f32_(a, b)
5195+
}
5196+
51335197
/// Reciprocal square-root estimate.
51345198
#[inline]
51355199
#[target_feature(enable = "neon")]
@@ -11292,6 +11356,24 @@ mod test {
1129211356
assert_eq!(r, e);
1129311357
}
1129411358

11359+
#[simd_test(enable = "neon")]
11360+
unsafe fn test_vmaxnm_f32() {
11361+
let a: f32x2 = f32x2::new(1.0, 2.0);
11362+
let b: f32x2 = f32x2::new(8.0, 16.0);
11363+
let e: f32x2 = f32x2::new(8.0, 16.0);
11364+
let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b)));
11365+
assert_eq!(r, e);
11366+
}
11367+
11368+
#[simd_test(enable = "neon")]
11369+
unsafe fn test_vmaxnmq_f32() {
11370+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
11371+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
11372+
let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0);
11373+
let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b)));
11374+
assert_eq!(r, e);
11375+
}
11376+
1129511377
#[simd_test(enable = "neon")]
1129611378
unsafe fn test_vmin_s8() {
1129711379
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
@@ -11418,6 +11500,24 @@ mod test {
1141811500
assert_eq!(r, e);
1141911501
}
1142011502

11503+
#[simd_test(enable = "neon")]
11504+
unsafe fn test_vminnm_f32() {
11505+
let a: f32x2 = f32x2::new(1.0, 2.0);
11506+
let b: f32x2 = f32x2::new(8.0, 16.0);
11507+
let e: f32x2 = f32x2::new(1.0, 2.0);
11508+
let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b)));
11509+
assert_eq!(r, e);
11510+
}
11511+
11512+
#[simd_test(enable = "neon")]
11513+
unsafe fn test_vminnmq_f32() {
11514+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
11515+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
11516+
let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0);
11517+
let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b)));
11518+
assert_eq!(r, e);
11519+
}
11520+
1142111521
#[simd_test(enable = "neon")]
1142211522
unsafe fn test_vrsqrte_f32() {
1142311523
let a: f32x2 = f32x2::new(1.0, 2.0);

0 commit comments

Comments
 (0)