Skip to content

Commit 1de242c

Browse files
authored
Merge pull request #17416 from jacobly0/x86_64
x86_64: implement more `f80` operations and other features
2 parents a9b37ac + b5dedd7 commit 1de242c

15 files changed

+1606
-805
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 1497 additions & 736 deletions
Large diffs are not rendered by default.

src/arch/x86_64/Encoding.zig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ pub const Mnemonic = enum {
260260
ud2,
261261
xadd, xchg, xor,
262262
// X87
263-
fisttp, fld,
263+
fabs, fchs, ffree, fisttp, fld, fst, fstp,
264264
// MMX
265265
movd, movq,
266266
packssdw, packsswb, packuswb,
@@ -316,6 +316,8 @@ pub const Mnemonic = enum {
316316
xorpd,
317317
// SSE3
318318
movddup, movshdup, movsldup,
319+
// SSSE3
320+
pabsb, pabsd, pabsw,
319321
// SSE4.1
320322
blendpd, blendps, blendvpd, blendvps,
321323
extractps,
@@ -353,6 +355,7 @@ pub const Mnemonic = enum {
353355
vmovupd, vmovups,
354356
vmulpd, vmulps, vmulsd, vmulss,
355357
vorpd, vorps,
358+
vpabsb, vpabsd, vpabsw,
356359
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
357360
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
358361
vpand, vpandn,
@@ -750,6 +753,7 @@ pub const Feature = enum {
750753
sse2,
751754
sse3,
752755
sse4_1,
756+
ssse3,
753757
x87,
754758
};
755759

src/arch/x86_64/Mir.zig

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,14 +342,10 @@ pub const Inst = struct {
342342
div,
343343
///
344344
int3,
345-
/// Store integer with truncation
346-
istt,
347345
/// Conditional jump
348346
j,
349347
/// Jump
350348
jmp,
351-
/// Load floating-point value
352-
ld,
353349
/// Load effective address
354350
lea,
355351
/// Load string
@@ -446,6 +442,19 @@ pub const Inst = struct {
446442
/// Bitwise logical xor of packed double-precision floating-point values
447443
xor,
448444

445+
/// Absolute value
446+
abs,
447+
/// Change sign
448+
chs,
449+
/// Free floating-point register
450+
free,
451+
/// Store integer with truncation
452+
istt,
453+
/// Load floating-point value
454+
ld,
455+
/// Store floating-point value
456+
st,
457+
449458
/// Pack with signed saturation
450459
ackssw,
451460
/// Pack with signed saturation

src/arch/x86_64/abi.zig

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub fn classifyWindows(ty: Type, mod: *Module) Class {
6464
}
6565
}
6666

67-
pub const Context = enum { ret, arg, other };
67+
pub const Context = enum { ret, arg, field, other };
6868

6969
/// There are a maximum of 8 possible return slots. Returned values are in
7070
/// the beginning of the array; unused slots are filled with .none.
@@ -120,7 +120,7 @@ pub fn classifySystemV(ty: Type, mod: *Module, ctx: Context) [8]Class {
120120
},
121121
.Float => switch (ty.floatBits(target)) {
122122
16 => {
123-
if (ctx == .other) {
123+
if (ctx == .field) {
124124
result[0] = .memory;
125125
} else {
126126
// TODO clang doesn't allow __fp16 as .ret or .arg
@@ -140,7 +140,7 @@ pub fn classifySystemV(ty: Type, mod: *Module, ctx: Context) [8]Class {
140140
// "Arguments of types __float128, _Decimal128 and __m128 are
141141
// split into two halves. The least significant ones belong
142142
// to class SSE, the most significant one to class SSEUP."
143-
if (ctx == .other) {
143+
if (ctx == .field) {
144144
result[0] = .memory;
145145
return result;
146146
}
@@ -229,7 +229,7 @@ pub fn classifySystemV(ty: Type, mod: *Module, ctx: Context) [8]Class {
229229
if (field_align != .none and field_align.compare(.lt, field_ty.abiAlignment(mod)))
230230
return memory_class;
231231
const field_size = field_ty.abiSize(mod);
232-
const field_class_array = classifySystemV(field_ty, mod, .other);
232+
const field_class_array = classifySystemV(field_ty, mod, .field);
233233
const field_class = std.mem.sliceTo(&field_class_array, .none);
234234
if (byte_i + field_size <= 8) {
235235
// Combine this field with the previous one.
@@ -347,7 +347,7 @@ pub fn classifySystemV(ty: Type, mod: *Module, ctx: Context) [8]Class {
347347
return memory_class;
348348
}
349349
// Combine this field with the previous one.
350-
const field_class = classifySystemV(field_ty.toType(), mod, .other);
350+
const field_class = classifySystemV(field_ty.toType(), mod, .field);
351351
for (&result, 0..) |*result_item, i| {
352352
const field_item = field_class[i];
353353
// "If both classes are equal, this is the resulting class."
@@ -444,7 +444,7 @@ pub const SysV = struct {
444444
/// These registers need to be preserved (saved on the stack) and restored by the caller before
445445
/// the caller relinquishes control to a subroutine via call instruction (or similar).
446446
/// In other words, these registers are free to use by the callee.
447-
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 } ++ sse_avx_regs;
447+
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs;
448448

449449
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
450450
pub const c_abi_sse_param_regs = sse_avx_regs[0..8].*;
@@ -459,7 +459,7 @@ pub const Win64 = struct {
459459
/// These registers need to be preserved (saved on the stack) and restored by the caller before
460460
/// the caller relinquishes control to a subroutine via call instruction (or similar).
461461
/// In other words, these registers are free to use by the callee.
462-
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 } ++ sse_avx_regs;
462+
pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs;
463463

464464
pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 };
465465
pub const c_abi_sse_param_regs = sse_avx_regs[0..4].*;
@@ -531,30 +531,32 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention) []const Register
531531
const gp_regs = [_]Register{
532532
.rax, .rcx, .rdx, .rbx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15,
533533
};
534+
const x87_regs = [_]Register{
535+
.st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7,
536+
};
534537
const sse_avx_regs = [_]Register{
535538
.ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7,
536539
.ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
537540
};
538-
const allocatable_regs = gp_regs ++ sse_avx_regs;
539-
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_regs);
541+
const allocatable_regs = gp_regs ++ x87_regs[0 .. x87_regs.len - 1] ++ sse_avx_regs;
542+
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, allocatable_regs);
540543

541544
// Register classes
542545
const RegisterBitSet = RegisterManager.RegisterBitSet;
543546
pub const RegisterClass = struct {
544547
pub const gp: RegisterBitSet = blk: {
545548
var set = RegisterBitSet.initEmpty();
546-
set.setRangeValue(.{
547-
.start = 0,
548-
.end = gp_regs.len,
549-
}, true);
549+
for (allocatable_regs, 0..) |reg, index| if (reg.class() == .general_purpose) set.set(index);
550+
break :blk set;
551+
};
552+
pub const x87: RegisterBitSet = blk: {
553+
var set = RegisterBitSet.initEmpty();
554+
for (allocatable_regs, 0..) |reg, index| if (reg.class() == .x87) set.set(index);
550555
break :blk set;
551556
};
552557
pub const sse: RegisterBitSet = blk: {
553558
var set = RegisterBitSet.initEmpty();
554-
set.setRangeValue(.{
555-
.start = gp_regs.len,
556-
.end = allocatable_regs.len,
557-
}, true);
559+
for (allocatable_regs, 0..) |reg, index| if (reg.class() == .sse) set.set(index);
558560
break :blk set;
559561
};
560562
};

src/arch/x86_64/encodings.zig

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -829,13 +829,28 @@ pub const table = [_]Entry{
829829
.{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none },
830830

831831
// X87
832+
.{ .fabs, .np, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 },
833+
834+
.{ .fchs, .np, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 },
835+
836+
.{ .ffree, .o, &.{ .st }, &.{ 0xdd, 0xc0 }, 0, .none, .x87 },
837+
832838
.{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 },
833839
.{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 },
834840
.{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 },
835841

836-
.{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
837-
.{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
838-
.{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
842+
.{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 },
843+
.{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 },
844+
.{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 },
845+
.{ .fld, .o, &.{ .st }, &.{ 0xd9, 0xc0 }, 0, .none, .x87 },
846+
847+
.{ .fst, .m, &.{ .m32 }, &.{ 0xd9 }, 2, .none, .x87 },
848+
.{ .fst, .m, &.{ .m64 }, &.{ 0xdd }, 2, .none, .x87 },
849+
.{ .fst, .o, &.{ .st }, &.{ 0xdd, 0xd0 }, 0, .none, .x87 },
850+
.{ .fstp, .m, &.{ .m32 }, &.{ 0xd9 }, 3, .none, .x87 },
851+
.{ .fstp, .m, &.{ .m64 }, &.{ 0xdd }, 3, .none, .x87 },
852+
.{ .fstp, .m, &.{ .m80 }, &.{ 0xdb }, 7, .none, .x87 },
853+
.{ .fstp, .o, &.{ .st }, &.{ 0xdd, 0xd8 }, 0, .none, .x87 },
839854

840855
// SSE
841856
.{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse },
@@ -1093,6 +1108,14 @@ pub const table = [_]Entry{
10931108

10941109
.{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 },
10951110

1111+
// SSSE3
1112+
.{ .pabsb, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
1113+
.{ .pabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .none, .ssse3 },
1114+
.{ .pabsd, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
1115+
.{ .pabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .none, .ssse3 },
1116+
.{ .pabsw, .rm, &.{ .mm, .mm_m64 }, &.{ 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
1117+
.{ .pabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .none, .ssse3 },
1118+
10961119
// SSE4.1
10971120
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
10981121

@@ -1353,6 +1376,10 @@ pub const table = [_]Entry{
13531376
.{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx },
13541377
.{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx },
13551378

1379+
.{ .vpabsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_128_wig, .avx },
1380+
.{ .vpabsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_128_wig, .avx },
1381+
.{ .vpabsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_128_wig, .avx },
1382+
13561383
.{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx },
13571384
.{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx },
13581385

@@ -1522,6 +1549,10 @@ pub const table = [_]Entry{
15221549
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
15231550
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
15241551

1552+
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
1553+
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
1554+
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
1555+
15251556
.{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 },
15261557
.{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 },
15271558

src/register_manager.zig

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,13 @@ pub fn RegisterManager(
9191
return null;
9292
}
9393

94-
pub fn indexOfRegIntoTracked(reg: Register) ?RegisterBitSet.ShiftInt {
94+
pub fn indexOfRegIntoTracked(
95+
reg: Register,
96+
) ?std.math.IntFittingRange(0, tracked_registers.len) {
9597
return indexOfReg(tracked_registers, reg);
9698
}
9799

98-
pub fn regAtTrackedIndex(index: RegisterBitSet.ShiftInt) Register {
100+
pub fn regAtTrackedIndex(index: std.math.IntFittingRange(0, tracked_registers.len)) Register {
99101
return tracked_registers[index];
100102
}
101103

test/behavior/abs.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ test "@abs floats" {
9595
try comptime testAbsFloats(f64);
9696
try testAbsFloats(f64);
9797
try comptime testAbsFloats(f80);
98-
if (builtin.zig_backend != .stage2_x86_64 and builtin.zig_backend != .stage2_wasm) try testAbsFloats(f80);
98+
if (builtin.zig_backend != .stage2_wasm) try testAbsFloats(f80);
9999
try comptime testAbsFloats(f128);
100100
if (builtin.zig_backend != .stage2_wasm) try testAbsFloats(f128);
101101
}
@@ -280,7 +280,7 @@ test "@abs float vectors" {
280280
try testAbsFloatVectors(f16, 16);
281281
try comptime testAbsFloatVectors(f16, 17);
282282

283-
try testAbsFloatVectors(f32, 17);
283+
try testAbsFloatVectors(f32, 1);
284284
try comptime testAbsFloatVectors(f32, 1);
285285
try testAbsFloatVectors(f32, 1);
286286
try comptime testAbsFloatVectors(f32, 2);

test/behavior/asm.zig

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ test "module level assembly" {
4242

4343
test "output constraint modifiers" {
4444
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
45-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
4645
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
4746
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
4847
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -66,7 +65,6 @@ test "output constraint modifiers" {
6665

6766
test "alternative constraints" {
6867
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
69-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
7068
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
7169
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
7270
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
@@ -163,8 +161,8 @@ export fn derp() i32 {
163161
}
164162

165163
test "rw constraint (x86_64)" {
166-
if (builtin.target.cpu.arch != .x86_64 or builtin.zig_backend != .stage2_llvm)
167-
return error.SkipZigTest;
164+
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
165+
if (builtin.target.cpu.arch != .x86_64) return error.SkipZigTest;
168166

169167
var res: i32 = 5;
170168
asm ("addl %[b], %[a]"

test/behavior/cast.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ test "@floatFromInt" {
118118

119119
test "@floatFromInt(f80)" {
120120
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
121-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
122121
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
123122
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
124123
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
125124
if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArmOrThumb()) return error.SkipZigTest;
125+
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
126126

127127
const S = struct {
128128
fn doTheTest(comptime Int: type) !void {
@@ -1476,9 +1476,9 @@ test "pointer to empty struct literal to mutable slice" {
14761476
test "coerce between pointers of compatible differently-named floats" {
14771477
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
14781478
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
1479-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
14801479
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
14811480
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
1481+
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
14821482

14831483
if (builtin.os.tag == .windows) {
14841484
// https://github.com/ziglang/zig/issues/12396

0 commit comments

Comments
 (0)