Skip to content

Commit 2e562a7

Browse files
TheIronBorngnzlbg
authored andcommitted
add swap_bytes/to_le/to_be (#517)
* add large shuffle intrinsics * add swap_bytes/to_le * add to_be * more tests * improve swap_bytes tests
1 parent 1f336fc commit 2e562a7

File tree

6 files changed

+281
-3
lines changed

6 files changed

+281
-3
lines changed

coresimd/ppsv/api/masks_reductions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ macro_rules! impl_mask_reductions {
99
pub fn all(self) -> bool {
1010
unsafe { super::codegen::masks_reductions::All::all(self) }
1111
}
12-
/// Is `any` vector lanes `true`?
12+
/// Is `any` vector lane `true`?
1313
#[inline]
1414
pub fn any(self) -> bool {
1515
unsafe { super::codegen::masks_reductions::Any::any(self) }

coresimd/ppsv/api/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ mod masks_select;
7272
mod scalar_shifts;
7373
#[macro_use]
7474
mod shifts;
75+
#[macro_use]
76+
mod swap_bytes;
7577

7678
/// Sealed trait used for constraining select implementations.
7779
pub trait Lanes<A> {}
@@ -143,7 +145,8 @@ macro_rules! simd_i_ty {
143145
[impl_eq, $id],
144146
[impl_partial_eq, $id],
145147
[impl_default, $id, $elem_ty],
146-
[impl_int_minmax_ops, $id]
148+
[impl_int_minmax_ops, $id],
149+
[impl_swap_bytes, $id]
147150
);
148151

149152
$test_macro!(
@@ -197,7 +200,8 @@ macro_rules! simd_u_ty {
197200
[impl_eq, $id],
198201
[impl_partial_eq, $id],
199202
[impl_default, $id, $elem_ty],
200-
[impl_int_minmax_ops, $id]
203+
[impl_int_minmax_ops, $id],
204+
[impl_swap_bytes, $id]
201205
);
202206

203207
$test_macro!(
@@ -221,6 +225,7 @@ macro_rules! simd_u_ty {
221225
test_default!($id, $elem_ty);
222226
test_mask_select!($mask_ty, $id, $elem_ty);
223227
test_int_minmax_ops!($id, $elem_ty);
228+
test_swap_bytes!($id, $elem_ty);
224229
}
225230
);
226231
}

coresimd/ppsv/api/swap_bytes.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//! Horizontal swap bytes.
2+
3+
macro_rules! impl_swap_bytes {
4+
($id:ident) => {
5+
impl $id {
6+
/// Reverses the byte order of the vector.
7+
#[inline]
8+
pub fn swap_bytes(self) -> Self {
9+
unsafe {
10+
super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
11+
}
12+
}
13+
14+
/// Converts self to little endian from the target's endianness.
15+
///
16+
/// On little endian this is a no-op. On big endian the bytes are
17+
/// swapped.
18+
#[inline]
19+
pub fn to_le(self) -> Self {
20+
#[cfg(target_endian = "little")]
21+
{
22+
self
23+
}
24+
#[cfg(not(target_endian = "little"))]
25+
{
26+
self.swap_bytes()
27+
}
28+
}
29+
30+
/// Converts self to big endian from the target's endianness.
31+
///
32+
/// On big endian this is a no-op. On little endian the bytes are
33+
/// swapped.
34+
#[inline]
35+
pub fn to_be(self) -> Self {
36+
#[cfg(target_endian = "big")]
37+
{
38+
self
39+
}
40+
#[cfg(not(target_endian = "big"))]
41+
{
42+
self.swap_bytes()
43+
}
44+
}
45+
}
46+
};
47+
}
48+
49+
#[cfg(test)]
50+
macro_rules! test_swap_bytes {
51+
($id:ident, $elem_ty:ty) => {
52+
use coresimd::simd::$id;
53+
use std::{mem, slice};
54+
55+
const BYTES: [u8; 64] = [
56+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
57+
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
58+
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
59+
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
60+
];
61+
62+
macro_rules! swap {
63+
($func: ident) => {{
64+
// catch possible future >512 vectors
65+
assert!(mem::size_of::<$id>() <= 64);
66+
67+
let mut actual = BYTES;
68+
let elems: &mut [$elem_ty] = unsafe {
69+
slice::from_raw_parts_mut(
70+
actual.as_mut_ptr() as *mut $elem_ty,
71+
$id::lanes(),
72+
)
73+
};
74+
75+
let vec = $id::load_unaligned(elems);
76+
vec.$func().store_unaligned(elems);
77+
78+
actual
79+
}};
80+
}
81+
82+
macro_rules! test_swap {
83+
($func: ident) => {{
84+
let actual = swap!($func);
85+
let expected =
86+
BYTES.iter().rev().skip(64 - mem::size_of::<$id>());
87+
88+
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
89+
}};
90+
}
91+
92+
macro_rules! test_no_swap {
93+
($func: ident) => {{
94+
let actual = swap!($func);
95+
let expected = BYTES.iter().take(mem::size_of::<$id>());
96+
97+
assert!(actual.iter().zip(expected).all(|(x, y)| x == y));
98+
}};
99+
}
100+
101+
#[test]
102+
fn swap_bytes() {
103+
test_swap!(swap_bytes);
104+
}
105+
106+
#[test]
107+
fn to_le() {
108+
#[cfg(target_endian = "little")]
109+
{
110+
test_no_swap!(to_le);
111+
}
112+
#[cfg(not(target_endian = "little"))]
113+
{
114+
test_swap!(to_le);
115+
}
116+
}
117+
118+
#[test]
119+
fn to_be() {
120+
#[cfg(target_endian = "big")]
121+
{
122+
test_no_swap!(to_be);
123+
}
124+
#[cfg(not(target_endian = "big"))]
125+
{
126+
test_swap!(to_be);
127+
}
128+
}
129+
};
130+
}

coresimd/ppsv/codegen/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
pub mod wrapping;
55

66
pub mod masks_reductions;
7+
pub mod swap_bytes;
78

89
pub mod abs;
910
pub mod cos;

coresimd/ppsv/codegen/swap_bytes.rs

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
//! Horizontal mask reductions.
2+
3+
#![allow(unused)]
4+
5+
use coresimd::simd::*;
6+
7+
pub trait SwapBytes {
8+
unsafe fn swap_bytes(self) -> Self;
9+
}
10+
11+
// TODO: switch to shuffle API once it lands
12+
// TODO: investigate `llvm.bswap`
13+
macro_rules! impl_swap_bytes {
14+
(v16, $($id:ident,)+) => {$(
15+
impl SwapBytes for $id {
16+
#[inline]
17+
unsafe fn swap_bytes(self) -> Self {
18+
use coresimd::simd_llvm::simd_shuffle2;
19+
20+
const INDICES: [u32; 2] = [1, 0];
21+
simd_shuffle2(self, self, INDICES)
22+
}
23+
}
24+
)+};
25+
(v32, $($id:ident,)+) => {$(
26+
impl SwapBytes for $id {
27+
#[inline]
28+
unsafe fn swap_bytes(self) -> Self {
29+
use coresimd::simd_llvm::simd_shuffle4;
30+
31+
const INDICES: [u32; 4] = [3, 2, 1, 0];
32+
let vec8 = u8x4::from_bits(self);
33+
let shuffled: u8x4 = simd_shuffle4(vec8, vec8, INDICES);
34+
$id::from_bits(shuffled)
35+
}
36+
}
37+
)+};
38+
(v64, $($id:ident,)+) => {$(
39+
impl SwapBytes for $id {
40+
#[inline]
41+
unsafe fn swap_bytes(self) -> Self {
42+
use coresimd::simd_llvm::simd_shuffle8;
43+
44+
const INDICES: [u32; 8] = [7, 6, 5, 4, 3, 2, 1, 0];
45+
let vec8 = u8x8::from_bits(self);
46+
let shuffled: u8x8 = simd_shuffle8(vec8, vec8, INDICES);
47+
$id::from_bits(shuffled)
48+
}
49+
}
50+
)+};
51+
(v128, $($id:ident,)+) => {$(
52+
impl SwapBytes for $id {
53+
#[inline]
54+
unsafe fn swap_bytes(self) -> Self {
55+
use coresimd::simd_llvm::simd_shuffle16;
56+
57+
const INDICES: [u32; 16] = [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0];
58+
let vec8 = u8x16::from_bits(self);
59+
let shuffled: u8x16 = simd_shuffle16(vec8, vec8, INDICES);
60+
$id::from_bits(shuffled)
61+
}
62+
}
63+
)+};
64+
(v256, $($id:ident,)+) => {$(
65+
impl SwapBytes for $id {
66+
#[inline]
67+
unsafe fn swap_bytes(self) -> Self {
68+
use coresimd::simd_llvm::simd_shuffle32;
69+
70+
const INDICES: [u32; 32] = [
71+
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
72+
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
73+
];
74+
let vec8 = u8x32::from_bits(self);
75+
let shuffled: u8x32 = simd_shuffle32(vec8, vec8, INDICES);
76+
$id::from_bits(shuffled)
77+
}
78+
}
79+
)+};
80+
(v512, $($id:ident,)+) => {$(
81+
impl SwapBytes for $id {
82+
#[inline]
83+
unsafe fn swap_bytes(self) -> Self {
84+
use coresimd::simd_llvm::simd_shuffle64;
85+
86+
const INDICES: [u32; 64] = [
87+
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48,
88+
47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
89+
31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
90+
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
91+
];
92+
let vec8 = u8x64::from_bits(self);
93+
let shuffled: u8x64 = simd_shuffle64(vec8, vec8, INDICES);
94+
$id::from_bits(shuffled)
95+
}
96+
}
97+
)+};
98+
}
99+
100+
vector_impl!(
101+
[impl_swap_bytes, v16, u8x2, i8x2,],
102+
[impl_swap_bytes, v32, u8x4, i8x4, u16x2, i16x2,],
103+
[impl_swap_bytes, v64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,],
104+
[
105+
impl_swap_bytes,
106+
v128,
107+
u8x16,
108+
i8x16,
109+
u16x8,
110+
i16x8,
111+
u32x4,
112+
i32x4,
113+
u64x2,
114+
i64x2,
115+
],
116+
[
117+
impl_swap_bytes,
118+
v256,
119+
u8x32,
120+
i8x32,
121+
u16x16,
122+
i16x16,
123+
u32x8,
124+
i32x8,
125+
u64x4,
126+
i64x4,
127+
],
128+
[
129+
impl_swap_bytes,
130+
v512,
131+
u8x64,
132+
i8x64,
133+
u16x32,
134+
i16x32,
135+
u32x16,
136+
i32x16,
137+
u64x8,
138+
i64x8,
139+
]
140+
);

coresimd/simd_llvm.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ extern "platform-intrinsic" {
1515
pub fn simd_shuffle8<T, U>(x: T, y: T, idx: [u32; 8]) -> U;
1616
pub fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
1717
pub fn simd_shuffle32<T, U>(x: T, y: T, idx: [u32; 32]) -> U;
18+
pub fn simd_shuffle64<T, U>(x: T, y: T, idx: [u32; 64]) -> U;
19+
pub fn simd_shuffle128<T, U>(x: T, y: T, idx: [u32; 128]) -> U;
1820

1921
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
2022
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;

0 commit comments

Comments
 (0)