Skip to content

Commit 9726435

Browse files
committed
std.process.Child: use clone3 on x86 and x86_64
1 parent bf7f827 commit 9726435

File tree

4 files changed

+112
-10
lines changed

4 files changed

+112
-10
lines changed

lib/std/os/linux.zig

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,40 @@ pub fn clone(
8787
) callconv(.C) usize, @ptrCast(&syscall_bits.clone))(func, stack, flags, arg, ptid, tp, ctid);
8888
}
8989

90+
pub const clone_args = extern struct {
91+
flags: u64,
92+
pidfd: u64,
93+
child_tid: u64,
94+
parent_tid: u64,
95+
exit_signal: u64,
96+
stack: u64,
97+
stack_size: u64,
98+
tls: u64,
99+
set_tid: u64,
100+
set_tid_size: u64,
101+
cgroup: u64,
102+
};
103+
104+
pub fn clone3(
105+
cl_args: *const clone_args,
106+
size: usize,
107+
func: *const fn (arg: usize) callconv(.C) u8,
108+
arg: usize,
109+
) usize {
110+
// TODO: write asm for other arch.
111+
if (@hasDecl(syscall_bits, "clone3")) {
112+
// Can't directly call a naked function; cast to C calling convention first.
113+
return @as(*const fn (
114+
cl_args: *const clone_args,
115+
size: usize,
116+
func: *const fn (arg: usize) callconv(.C) u8,
117+
arg: usize,
118+
) callconv(.C) usize, @ptrCast(&syscall_bits.clone3))(cl_args, size, func, arg);
119+
} else {
120+
return @bitCast(-@as(isize, @intFromEnum(E.NOSYS)));
121+
}
122+
}
123+
90124
pub const ARCH = arch_bits.ARCH;
91125
pub const Elf_Symndx = arch_bits.Elf_Symndx;
92126
pub const F = arch_bits.F;

lib/std/os/linux/x86.zig

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,36 @@ pub fn clone() callconv(.Naked) usize {
167167
);
168168
}
169169

170+
pub fn clone3() callconv(.Naked) usize {
171+
asm volatile (
172+
\\ pushl %%ebx
173+
\\ pushl %%esi
174+
\\ movl 12(%%esp),%%ebx
175+
\\ movl 16(%%esp),%%ecx
176+
\\ movl 20(%%esp),%%edx
177+
\\ movl 24(%%esp),%%esi
178+
\\ movl $435,%%eax // SYS_clone3
179+
\\ int $128
180+
\\ testl %%eax,%%eax
181+
\\ jz 1f
182+
\\ popl %%esi
183+
\\ popl %%ebx
184+
\\ retl
185+
\\
186+
\\1:
187+
\\ .cfi_undefined %%eip
188+
\\ xorl %%ebp,%%ebp
189+
\\
190+
\\ andl $-16,%%esp
191+
\\ subl $12,%%esp
192+
\\ pushl %%esi
193+
\\ calll *%%edx
194+
\\ movl %%eax,%%ebx
195+
\\ movl $1,%%eax // SYS_exit
196+
\\ int $128
197+
);
198+
}
199+
170200
pub fn restore() callconv(.Naked) noreturn {
171201
switch (@import("builtin").zig_backend) {
172202
.stage2_c => asm volatile (

lib/std/os/linux/x86_64.zig

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,27 @@ pub fn clone() callconv(.Naked) usize {
129129
);
130130
}
131131

132+
pub fn clone3() callconv(.Naked) usize {
133+
asm volatile (
134+
\\ movl $435,%%eax // SYS_clone3
135+
\\ movq %%rcx,%%r8
136+
\\ syscall
137+
\\ testq %%rax,%%rax
138+
\\ jz 1f
139+
\\ retq
140+
\\
141+
\\1: .cfi_undefined %%rip
142+
\\ xorl %%ebp,%%ebp
143+
\\
144+
\\ movq %%r8,%%rdi
145+
\\ callq *%%rdx
146+
\\ movl %%eax,%%edi
147+
\\ movl $60,%%eax // SYS_exit
148+
\\ syscall
149+
\\
150+
);
151+
}
152+
132153
pub const restore = restore_rt;
133154

134155
pub fn restore_rt() callconv(.Naked) noreturn {

lib/std/process/Child.zig

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -727,24 +727,41 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
727727
immediateExit(spawnPosixChildHelper(@intFromPtr(&child_arg)));
728728
}
729729
} else {
730-
var old_mask: posix.sigset_t = undefined;
731-
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
732-
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
733-
child_arg.sigmask = &old_mask;
734730
child_arg.ret_err = null;
735731
// Although the stack is fixed sized, we alloc it here,
736732
// because stack-smashing protection may have higher overhead than allocation.
737733
const stack_size = 0x8000;
738734
// On aarch64, stack address must be a multiple of 16.
739735
const stack = try self.allocator.alignedAlloc(u8, 16, stack_size);
740736
defer self.allocator.free(stack);
741-
const rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
742-
pid_result = switch (posix.errno(rc)) {
743-
.SUCCESS => @intCast(rc),
744-
.AGAIN => return error.SystemResources,
745-
.NOMEM => return error.SystemResources,
737+
738+
var clone_args = mem.zeroes(linux.clone_args);
739+
clone_args.flags = linux.CLONE.VM | linux.CLONE.VFORK | linux.CLONE.CLEAR_SIGHAND;
740+
clone_args.exit_signal = linux.SIG.CHLD;
741+
clone_args.stack = @intFromPtr(stack.ptr);
742+
clone_args.stack_size = stack_size;
743+
var rc = linux.clone3(&clone_args, @sizeOf(linux.clone_args), spawnPosixChildHelper, @intFromPtr(&child_arg));
744+
switch (linux.E.init(rc)) {
745+
.SUCCESS => {},
746+
.AGAIN, .NOMEM => return error.SystemResources,
747+
.INVAL, .NOSYS => {
748+
// Fallback to use clone().
749+
// We need to block signals here because we share VM with child before exec.
750+
// Signal handlers may mess up our memory.
751+
var old_mask: posix.sigset_t = undefined;
752+
posix.sigprocmask(posix.SIG.SETMASK, &linux.all_mask, &old_mask);
753+
defer posix.sigprocmask(posix.SIG.SETMASK, &old_mask, null);
754+
child_arg.sigmask = &old_mask;
755+
rc = linux.clone(spawnPosixChildHelper, @intFromPtr(stack.ptr) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
756+
switch (linux.E.init(rc)) {
757+
.SUCCESS => {},
758+
.AGAIN, .NOMEM => return error.SystemResources,
759+
else => |err| return posix.unexpectedErrno(err),
760+
}
761+
},
746762
else => |err| return posix.unexpectedErrno(err),
747-
};
763+
}
764+
pid_result = @intCast(rc);
748765
if (child_arg.ret_err) |err| {
749766
return err;
750767
}

0 commit comments

Comments
 (0)