Skip to content

Commit 5f6ccfc

Browse files
committed
Support WebAssembly Relaxed SIMD
This makes use of the [WebAssembly Relaxed SIMD](https://github.com/WebAssembly/relaxed-simd) instructions, trade off consistent results across architectures for certain edge cases to gain better performance. These differing edge case behaviors already exist in the native equivalents that are used in the SIMD implementation, so this does not regress any correctness.
1 parent 6b4e8c7 commit 5f6ccfc

File tree

6 files changed

+35
-12
lines changed

6 files changed

+35
-12
lines changed

.cargo/config.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
[target.wasm32-wasi]
1+
[target.wasm32-wasip1]
22
runner = "wasmtime run --dir ."

.github/workflows/main.yml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,28 @@ jobs:
6666
uses: dtolnay/rust-toolchain@master
6767
with:
6868
toolchain: stable
69-
target: wasm32-wasi
69+
target: wasm32-wasip1
7070

7171
- name: Install wasmtime
7272
run: |
7373
curl https://wasmtime.dev/install.sh -sSf | bash
7474
echo "$HOME/.wasmtime/bin" >> $GITHUB_PATH
7575
7676
- name: Build with minimal features (no_std)
77-
run: cargo build --target wasm32-wasi --verbose --no-default-features --features no-std-float
77+
run: cargo build --target wasm32-wasip1 --verbose --no-default-features --features no-std-float
7878

7979
- name: Run tests without SIMD
80-
run: cargo test --target wasm32-wasi --verbose --no-default-features --features png-format
80+
run: cargo test --target wasm32-wasip1 --verbose --no-default-features --features png-format
8181

8282
- name: Run tests with SIMD128
8383
env:
8484
RUSTFLAGS: -Ctarget-feature=+simd128,+bulk-memory,+nontrapping-fptoint,+sign-ext
85-
run: cargo test --target wasm32-wasi
85+
run: cargo test --target wasm32-wasip1
86+
87+
- name: Run tests with Relaxed SIMD
88+
env:
89+
RUSTFLAGS: -Ctarget-feature=+simd128,+relaxed-simd,+bulk-memory,+nontrapping-fptoint,+sign-ext
90+
run: cargo test --target wasm32-wasip1
8691

8792
aarch64:
8893
runs-on: ubuntu-20.04

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ default = ["std", "simd", "png-format"]
3131
std = ["tiny-skia-path/std"]
3232
no-std-float = ["tiny-skia-path/no-std-float"]
3333

34-
# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128)
35-
# and AArch64 (Neon).
34+
# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128,
35+
# Relaxed SIMD) and AArch64 (Neon).
3636
# Has no effect on other targets. Present mainly for testing.
3737
simd = []
3838

src/wide/f32x4_t.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ impl f32x4 {
9090
cfg_if::cfg_if! {
9191
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
9292
Self(unsafe { _mm_max_ps(self.0, rhs.0) })
93+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
94+
Self(f32x4_relaxed_max(self.0, rhs.0))
9395
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
9496
Self(f32x4_pmax(self.0, rhs.0))
9597
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
@@ -111,6 +113,8 @@ impl f32x4 {
111113
cfg_if::cfg_if! {
112114
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
113115
Self(unsafe { _mm_min_ps(self.0, rhs.0) })
116+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
117+
Self(f32x4_relaxed_min(self.0, rhs.0))
114118
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
115119
Self(f32x4_pmin(self.0, rhs.0))
116120
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
@@ -245,6 +249,8 @@ impl f32x4 {
245249
cfg_if::cfg_if! {
246250
if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
247251
Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) })
252+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
253+
Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
248254
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
249255
Self(v128_bitselect(t.0, f.0, self.0))
250256
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
@@ -302,6 +308,8 @@ impl f32x4 {
302308
cfg_if::cfg_if! {
303309
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
304310
i32x4(unsafe { _mm_cvtps_epi32(self.0) })
311+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
312+
i32x4(i32x4_relaxed_trunc_f32x4(self.round().0))
305313
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
306314
i32x4(i32x4_trunc_sat_f32x4(self.round().0))
307315
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {
@@ -325,6 +333,8 @@ impl f32x4 {
325333
cfg_if::cfg_if! {
326334
if #[cfg(all(feature = "simd", target_feature = "sse2"))] {
327335
i32x4(unsafe { _mm_cvttps_epi32(self.0) })
336+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
337+
i32x4(i32x4_relaxed_trunc_f32x4(self.0))
328338
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
329339
i32x4(i32x4_trunc_sat_f32x4(self.0))
330340
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {

src/wide/f32x8_t.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,17 @@ impl f32x8 {
4343
}
4444

4545
pub fn floor(self) -> Self {
46-
let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
47-
roundtrip
48-
- roundtrip
49-
.cmp_gt(self)
50-
.blend(f32x8::splat(1.0), f32x8::default())
46+
cfg_if::cfg_if! {
47+
if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
48+
Self(self.0.floor(), self.1.floor())
49+
} else {
50+
let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8());
51+
roundtrip
52+
- roundtrip
53+
.cmp_gt(self)
54+
.blend(f32x8::splat(1.0), f32x8::default())
55+
}
56+
}
5157
}
5258

5359
pub fn fract(self) -> Self {

src/wide/i32x4_t.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ impl i32x4 {
5656
cfg_if::cfg_if! {
5757
if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] {
5858
Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) })
59+
} else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] {
60+
Self(i32x4_relaxed_laneselect(t.0, f.0, self.0))
5961
} else if #[cfg(all(feature = "simd", target_feature = "simd128"))] {
6062
Self(v128_bitselect(t.0, f.0, self.0))
6163
} else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {

0 commit comments

Comments
 (0)