1
1
//! SIMD (Single Instruction; Multiple Data) convenience functions.
2
2
//!
3
3
//! May offer a potential boost in performance on some targets by performing
4
- //! the same operations on multiple elements at once.
4
+ //! the same operation on multiple elements at once.
5
5
//!
6
6
//! Some functions are known to not work on MIPS.
7
7
@@ -10,7 +10,6 @@ const builtin = @import("builtin");
10
10
11
11
pub fn suggestVectorLengthForCpu (comptime T : type , comptime cpu : std.Target.Cpu ) ? comptime_int {
12
12
// This is guesswork, if you have better suggestions can add it or edit the current here
13
- // This can run in comptime only, but stage 1 fails at it, stage 2 can understand it
14
13
const element_bit_size = @max (8 , std .math .ceilPowerOfTwo (u16 , @bitSizeOf (T )) catch unreachable );
15
14
const vector_bit_size : u16 = blk : {
16
15
if (cpu .arch .isX86 ()) {
@@ -37,8 +36,37 @@ pub fn suggestVectorLengthForCpu(comptime T: type, comptime cpu: std.Target.Cpu)
37
36
// the 2048 bits or using just 64 per vector or something in between
38
37
if (std .Target .mips .featureSetHas (cpu .features , std .Target .mips .Feature .mips3d )) break :blk 64 ;
39
38
} else if (cpu .arch .isRISCV ()) {
40
- // in risc-v the Vector Extension allows configurable vector sizes, but a standard size of 128 is a safe estimate
41
- if (std .Target .riscv .featureSetHas (cpu .features , .v )) break :blk 128 ;
39
+ // In RISC-V Vector Registers are length agnostic so there's no good way to determine the best size.
40
+ // The usual vector length in most RISC-V cpus is 256 bits, however it can get to multiple kB.
41
+ if (std .Target .riscv .featureSetHas (cpu .features , .v )) {
42
+ var vec_bit_length : u32 = 256 ;
43
+ if (std .Target .riscv .featureSetHas (cpu .features , .zvl32b )) {
44
+ vec_bit_length = 32 ;
45
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl64b )) {
46
+ vec_bit_length = 64 ;
47
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl128b )) {
48
+ vec_bit_length = 128 ;
49
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl256b )) {
50
+ vec_bit_length = 256 ;
51
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl512b )) {
52
+ vec_bit_length = 512 ;
53
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl1024b )) {
54
+ vec_bit_length = 1024 ;
55
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl2048b )) {
56
+ vec_bit_length = 2048 ;
57
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl4096b )) {
58
+ vec_bit_length = 4096 ;
59
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl8192b )) {
60
+ vec_bit_length = 8192 ;
61
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl16384b )) {
62
+ vec_bit_length = 16384 ;
63
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl32768b )) {
64
+ vec_bit_length = 32768 ;
65
+ } else if (std .Target .riscv .featureSetHas (cpu .features , .zvl65536b )) {
66
+ vec_bit_length = 65536 ;
67
+ }
68
+ break :blk vec_bit_length ;
69
+ }
42
70
} else if (cpu .arch .isSPARC ()) {
43
71
// TODO: Test Sparc capability to handle bigger vectors
44
72
// In theory Sparc have 32 registers of 64 bits which can use in parallel
0 commit comments