Skip to content

Commit f4bb8be

Browse files
authored
Merge pull request #6654 from joachimschmidt557/stage2-arm
stage2 ARM: more stuff
2 parents aaff66b + 7b4f3c7 commit f4bb8be

File tree

2 files changed

+360
-49
lines changed

2 files changed

+360
-49
lines changed

src/codegen.zig

+142-41
Original file line numberDiff line numberDiff line change
@@ -573,25 +573,54 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
573573
// sub sp, sp, #reloc
574574
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.push(.al, .{ .fp, .lr }).toU32());
575575
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .fp, Instruction.Operand.reg(.sp, Instruction.Operand.Shift.none)).toU32());
576-
// TODO: prepare stack for local variables
577-
// const backpatch_reloc = try self.code.addManyAsArray(4);
576+
const backpatch_reloc = self.code.items.len;
577+
try self.code.resize(backpatch_reloc + 4);
578578

579579
try self.dbgSetPrologueEnd();
580580

581581
try self.genBody(self.mod_fn.analysis.success);
582582

583583
// Backpatch stack offset
584-
// const stack_end = self.max_end_stack;
585-
// const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
586-
// mem.writeIntLittle(u32, backpatch_reloc, Instruction.sub(.al, .sp, .sp, Instruction.Operand.imm()));
584+
const stack_end = self.max_end_stack;
585+
const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
586+
if (Instruction.Operand.fromU32(@intCast(u32, aligned_stack_end))) |op| {
587+
mem.writeIntLittle(u32, self.code.items[backpatch_reloc..][0..4], Instruction.sub(.al, .sp, .sp, op).toU32());
588+
} else {
589+
return self.fail(self.src, "TODO ARM: allow larger stacks", .{});
590+
}
587591

588592
try self.dbgSetEpilogueBegin();
589593

594+
// exitlude jumps
595+
if (self.exitlude_jump_relocs.items.len == 1) {
596+
// There is only one relocation. Hence,
597+
// this relocation must be at the end of
598+
// the code. Therefore, we can just delete
599+
// the space initially reserved for the
600+
// jump
601+
self.code.items.len -= 4;
602+
} else for (self.exitlude_jump_relocs.items) |jmp_reloc| {
603+
const amt = self.code.items.len - (jmp_reloc + 4);
604+
if (amt == 0) {
605+
// This return is at the end of the
606+
// code block. We can't just delete
607+
// the space because there may be
608+
// other jumps we already relocated to
609+
// the address. Instead, insert a nop
610+
mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.nop().toU32());
611+
} else {
612+
if (math.cast(i26, amt)) |offset| {
613+
mem.writeIntLittle(u32, self.code.items[jmp_reloc..][0..4], Instruction.b(.al, offset).toU32());
614+
} else |err| {
615+
return self.fail(self.src, "exitlude jump is too large", .{});
616+
}
617+
}
618+
}
619+
590620
// mov sp, fp
591621
// pop {fp, pc}
592-
// TODO: return by jumping to this code, use relocations
593-
// mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
594-
// mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
622+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
623+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
595624
} else {
596625
try self.dbgSetPrologueEnd();
597626
try self.genBody(self.mod_fn.analysis.success);
@@ -1661,12 +1690,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
16611690
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.jalr(.zero, 0, .ra).toU32());
16621691
},
16631692
.arm => {
1664-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, .sp, Instruction.Operand.reg(.fp, Instruction.Operand.Shift.none)).toU32());
1665-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.pop(.al, .{ .fp, .pc }).toU32());
1666-
// TODO: jump to the end with relocation
1667-
// // Just add space for an instruction, patch this later
1668-
// try self.code.resize(self.code.items.len + 4);
1669-
// try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
1693+
// Just add space for an instruction, patch this later
1694+
try self.code.resize(self.code.items.len + 4);
1695+
try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
16701696
},
16711697
else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
16721698
}
@@ -1932,6 +1958,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
19321958
mem.writeIntLittle(i32, self.code.addManyAsArrayAssumeCapacity(4), delta);
19331959
}
19341960
},
1961+
.arm => {
1962+
if (math.cast(i26, @intCast(i32, index) - @intCast(i32, self.code.items.len))) |delta| {
1963+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.b(.al, delta).toU32());
1964+
} else |err| {
1965+
return self.fail(src, "TODO: enable larger branch offset", .{});
1966+
}
1967+
},
19351968
else => return self.fail(src, "TODO implement jump for {}", .{self.target.cpu.arch}),
19361969
}
19371970
}
@@ -2167,6 +2200,58 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
21672200

21682201
fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
21692202
switch (arch) {
2203+
.arm => switch (mcv) {
2204+
.dead => unreachable,
2205+
.ptr_stack_offset => unreachable,
2206+
.ptr_embedded_in_code => unreachable,
2207+
.unreach, .none => return, // Nothing to do.
2208+
.undef => {
2209+
if (!self.wantSafety())
2210+
return; // The already existing value will do just fine.
2211+
// TODO Upgrade this to a memset call when we have that available.
2212+
switch (ty.abiSize(self.target.*)) {
2213+
1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
2214+
2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
2215+
4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
2216+
8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
2217+
else => return self.fail(src, "TODO implement memset", .{}),
2218+
}
2219+
},
2220+
.compare_flags_unsigned => |op| {
2221+
return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
2222+
},
2223+
.compare_flags_signed => |op| {
2224+
return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
2225+
},
2226+
.immediate => {
2227+
const reg = try self.copyToTmpRegister(src, mcv);
2228+
return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
2229+
},
2230+
.embedded_in_code => |code_offset| {
2231+
return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
2232+
},
2233+
.register => |reg| {
2234+
// TODO: strb, strh
2235+
if (stack_offset <= math.maxInt(u12)) {
2236+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.str(.al, reg, .fp, .{
2237+
.offset = Instruction.Offset.imm(@intCast(u12, stack_offset)),
2238+
.positive = false,
2239+
}).toU32());
2240+
} else {
2241+
return self.fail(src, "TODO genSetStack with larger offsets", .{});
2242+
}
2243+
},
2244+
.memory => |vaddr| {
2245+
return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
2246+
},
2247+
.stack_offset => |off| {
2248+
if (stack_offset == off)
2249+
return; // Copy stack variable to itself; nothing to do.
2250+
2251+
const reg = try self.copyToTmpRegister(src, mcv);
2252+
return self.genSetStack(src, ty, stack_offset, MCValue{ .register = reg });
2253+
},
2254+
},
21702255
.x86_64 => switch (mcv) {
21712256
.dead => unreachable,
21722257
.ptr_stack_offset => unreachable,
@@ -2274,35 +2359,39 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
22742359
return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa });
22752360
},
22762361
.immediate => |x| {
2277-
// TODO better analysis of x to determine the
2278-
// least amount of necessary instructions (use
2279-
// more intelligent rotating)
2280-
if (x <= math.maxInt(u8)) {
2281-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
2282-
return;
2283-
} else if (x <= math.maxInt(u16)) {
2284-
// TODO Use movw Note: Not supported on
2285-
// all ARM targets!
2286-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
2287-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
2288-
} else if (x <= math.maxInt(u32)) {
2289-
// TODO Use movw and movt Note: Not
2290-
// supported on all ARM targets! Also TODO
2291-
// write constant to code and load
2292-
// relative to pc
2362+
if (x > math.maxInt(u32)) return self.fail(src, "ARM registers are 32-bit wide", .{});
22932363

2294-
// immediate: 0xaabbccdd
2295-
// mov reg, #0xaa
2296-
// orr reg, reg, #0xbb, 24
2297-
// orr reg, reg, #0xcc, 16
2298-
// orr reg, reg, #0xdd, 8
2299-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
2300-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
2301-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
2302-
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
2303-
return;
2364+
if (Instruction.Operand.fromU32(@intCast(u32, x))) |op| {
2365+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, op).toU32());
2366+
} else if (Instruction.Operand.fromU32(~@intCast(u32, x))) |op| {
2367+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mvn(.al, reg, op).toU32());
2368+
} else if (x <= math.maxInt(u16)) {
2369+
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
2370+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @intCast(u16, x)).toU32());
2371+
} else {
2372+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
2373+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
2374+
}
23042375
} else {
2305-
return self.fail(src, "ARM registers are 32-bit wide", .{});
2376+
// TODO write constant to code and load
2377+
// relative to pc
2378+
if (Target.arm.featureSetHas(self.target.cpu.features, .has_v7)) {
2379+
// immediate: 0xaaaabbbb
2380+
// movw reg, #0xbbbb
2381+
// movt reg, #0xaaaa
2382+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movw(.al, reg, @truncate(u16, x)).toU32());
2383+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.movt(.al, reg, @truncate(u16, x >> 16)).toU32());
2384+
} else {
2385+
// immediate: 0xaabbccdd
2386+
// mov reg, #0xaa
2387+
// orr reg, reg, #0xbb, 24
2388+
// orr reg, reg, #0xcc, 16
2389+
// orr reg, reg, #0xdd, 8
2390+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.mov(.al, reg, Instruction.Operand.imm(@truncate(u8, x), 0)).toU32());
2391+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 8), 12)).toU32());
2392+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 16), 8)).toU32());
2393+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.orr(.al, reg, reg, Instruction.Operand.imm(@truncate(u8, x >> 24), 4)).toU32());
2394+
}
23062395
}
23072396
},
23082397
.register => |src_reg| {
@@ -2319,6 +2408,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
23192408
try self.genSetReg(src, reg, .{ .immediate = addr });
23202409
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, reg, .{ .offset = Instruction.Offset.none }).toU32());
23212410
},
2411+
.stack_offset => |unadjusted_off| {
2412+
// TODO: ldrb, ldrh
2413+
// TODO: maybe addressing from sp instead of fp
2414+
if (unadjusted_off <= math.maxInt(u12)) {
2415+
mem.writeIntLittle(u32, try self.code.addManyAsArray(4), Instruction.ldr(.al, reg, .fp, .{
2416+
.offset = Instruction.Offset.imm(@intCast(u12, unadjusted_off)),
2417+
.positive = false,
2418+
}).toU32());
2419+
} else {
2420+
return self.fail(src, "TODO genSetReg with larger stack offset", .{});
2421+
}
2422+
},
23222423
else => return self.fail(src, "TODO implement getSetReg for arm {}", .{mcv}),
23232424
},
23242425
.riscv64 => switch (mcv) {

0 commit comments

Comments
 (0)