Skip to content

Commit 2ac3a31

Browse files
committed
std.process.Child: use clone3 on x86 and x86_64
1 parent 8899d66 commit 2ac3a31

File tree

4 files changed

+113
-10
lines changed

4 files changed

+113
-10
lines changed

lib/std/os/linux.zig

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,40 @@ pub fn clone(
8787
) callconv(.C) usize, @ptrCast(&syscall_bits.clone))(func, stack, flags, arg, ptid, tp, ctid);
8888
}
8989

90+
pub const clone_args = extern struct {
91+
flags: u64,
92+
pidfd: u64,
93+
child_tid: u64,
94+
parent_tid: u64,
95+
exit_signal: u64,
96+
stack: u64,
97+
stack_size: u64,
98+
tls: u64,
99+
set_tid: u64,
100+
set_tid_size: u64,
101+
cgroup: u64,
102+
};
103+
104+
pub fn clone3(
105+
cl_args: *const clone_args,
106+
size: usize,
107+
func: *const fn (arg: usize) callconv(.C) u8,
108+
arg: usize,
109+
) usize {
110+
// TODO: write asm for other arch.
111+
if (@hasDecl(syscall_bits, "clone3")) {
112+
// Can't directly call a naked function; cast to C calling convention first.
113+
return @as(*const fn (
114+
cl_args: *const clone_args,
115+
size: usize,
116+
func: *const fn (arg: usize) callconv(.C) u8,
117+
arg: usize,
118+
) callconv(.C) usize, @ptrCast(&syscall_bits.clone3))(cl_args, size, func, arg);
119+
} else {
120+
return @bitCast(-@as(isize, @intFromEnum(E.NOSYS)));
121+
}
122+
}
123+
90124
pub const ARCH = arch_bits.ARCH;
91125
pub const Elf_Symndx = arch_bits.Elf_Symndx;
92126
pub const F = arch_bits.F;

lib/std/os/linux/x86.zig

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,37 @@ pub fn clone() callconv(.Naked) usize {
167167
);
168168
}
169169

170+
pub fn clone3() callconv(.Naked) usize {
171+
asm volatile (
172+
\\ movl 4(%%esp),%%ecx
173+
\\ movl 12(%esp),%%edx
174+
\\ pushl %%ebx
175+
\\ pushl %%esi
176+
\\ movl 24(%%esp),%%esi
177+
\\ movl %%ecx,%%ebx
178+
\\ movl 16(%%esp),%%ecx
179+
\\ movl $435,%%eax // SYS_clone3
180+
\\ int $128
181+
\\ testl %%eax,%%eax
182+
\\ jz 1f
183+
\\ popl %%esi
184+
\\ popl %%ebx
185+
\\ retl
186+
\\
187+
\\1:
188+
\\ .cfi_undefined %%eip
189+
\\ xorl %%ebp,%%ebp
190+
\\
191+
\\ andl $-16,%%esp
192+
\\ subl $12,%%esp
193+
\\ pushl %%esi
194+
\\ calll *%%edx
195+
\\ movl %%eax,%%ebx
196+
\\ movl $1,%%eax // SYS_exit
197+
\\ int $128
198+
);
199+
}
200+
170201
pub fn restore() callconv(.Naked) noreturn {
171202
switch (@import("builtin").zig_backend) {
172203
.stage2_c => asm volatile (

lib/std/os/linux/x86_64.zig

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,27 @@ pub fn clone() callconv(.Naked) usize {
129129
);
130130
}
131131

132+
pub fn clone3() callconv(.Naked) usize {
133+
asm volatile (
134+
\\ movl $435,%%eax // SYS_clone3
135+
\\ movq %%rcx,%%r8
136+
\\ syscall
137+
\\ testq %%rax,%%rax
138+
\\ jz 1f
139+
\\ retq
140+
\\
141+
\\1: .cfi_undefined %%rip
142+
\\ xorl %%ebp,%%ebp
143+
\\
144+
\\ movq %%r8,%%rdi
145+
\\ callq *%%rdx
146+
\\ movl %%eax,%%edi
147+
\\ movl $60,%%eax // SYS_exit
148+
\\ syscall
149+
\\
150+
);
151+
}
152+
132153
pub const restore = restore_rt;
133154

134155
pub fn restore_rt() callconv(.Naked) noreturn {

lib/std/process/Child.zig

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -724,10 +724,6 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
724724
immediateExit(spawnPosixChildHelper(@intFromPtr(&child_arg)));
725725
}
726726
} else {
727-
var old_mask: posix.sigset_t = undefined;
728-
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
729-
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
730-
child_arg.sigmask = &old_mask;
731727
child_arg.ret_err = null;
732728
// Although the stack is fixed sized, we alloc it here,
733729
// because stack-smashing protection may have higher overhead than allocation.
@@ -737,13 +733,34 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
737733
// For simplicity, we just align it to page boundary here.
738734
const stack = try self.allocator.alignedAlloc(u8, mem.page_size, stack_size);
739735
defer self.allocator.free(stack);
740-
const rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
741-
pid_result = switch (posix.errno(rc)) {
742-
.SUCCESS => @intCast(rc),
743-
.AGAIN => return error.SystemResources,
744-
.NOMEM => return error.SystemResources,
736+
737+
var clone_args = mem.zeroes(linux.clone_args);
738+
clone_args.flags = linux.CLONE.VM | linux.CLONE.VFORK | linux.CLONE.CLEAR_SIGHAND;
739+
clone_args.exit_signal = linux.SIG.CHLD;
740+
clone_args.stack = @intFromPtr(stack.ptr);
741+
clone_args.stack_size = stack_size;
742+
var rc = linux.clone3(&clone_args, @sizeOf(linux.clone_args), spawnPosixChildHelper, @intFromPtr(&child_arg));
743+
switch (posix.errno(rc)) {
744+
.SUCCESS => {},
745+
.AGAIN, .NOMEM => return error.SystemResources,
746+
.INVAL, .NOSYS => {
747+
// Fallback to use clone().
748+
// We need to block signals here because we share VM with child before exec.
749+
// Signal handlers may mess up our memory.
750+
var old_mask: posix.sigset_t = undefined;
751+
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
752+
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
753+
child_arg.sigmask = &old_mask;
754+
rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
755+
switch (posix.errno(rc)) {
756+
.SUCCESS => {},
757+
.AGAIN, .NOMEM => return error.SystemResources,
758+
else => |err| return posix.unexpectedErrno(err),
759+
}
760+
},
745761
else => |err| return posix.unexpectedErrno(err),
746-
};
762+
}
763+
pid_result = @intCast(rc);
747764
if (child_arg.ret_err) |err| {
748765
return err;
749766
}

0 commit comments

Comments
 (0)