Skip to content

Commit 57ee91d

Browse files
committed
std.process.Child: use clone() instead of fork()
1 parent 021289a commit 57ee91d

File tree

1 file changed

+102
-64
lines changed

1 file changed

+102
-64
lines changed

lib/std/process/Child.zig

Lines changed: 102 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ cwd: ?[]const u8,
7373
/// Once that is done, `cwd` will be deprecated in favor of this field.
7474
cwd_dir: ?fs.Dir = null,
7575

76-
err_pipe: ?if (native_os == .windows) void else [2]posix.fd_t,
76+
err_pipe: ?if (native_os == .windows or native_os == .linux) void else [2]posix.fd_t,
7777

7878
expand_arg0: Arg0Expand,
7979

@@ -488,27 +488,10 @@ fn cleanupStreams(self: *ChildProcess) void {
488488
}
489489

490490
fn cleanupAfterWait(self: *ChildProcess, status: u32) !Term {
491-
if (self.err_pipe) |err_pipe| {
492-
defer destroyPipe(err_pipe);
493-
494-
if (native_os == .linux) {
495-
var fd = [1]posix.pollfd{posix.pollfd{
496-
.fd = err_pipe[0],
497-
.events = posix.POLL.IN,
498-
.revents = undefined,
499-
}};
500-
501-
// Check if the eventfd buffer stores a non-zero value by polling
502-
// it, that's the error code returned by the child process.
503-
_ = posix.poll(&fd, 0) catch unreachable;
504-
505-
// According to eventfd(2) the descriptor is readable if the counter
506-
// has a value greater than 0
507-
if ((fd[0].revents & posix.POLL.IN) != 0) {
508-
const err_int = try readIntFd(err_pipe[0]);
509-
return @as(SpawnError, @errorCast(@errorFromInt(err_int)));
510-
}
511-
} else {
491+
if (native_os != .linux) {
492+
if (self.err_pipe) |err_pipe| {
493+
defer destroyPipe(err_pipe);
494+
512495
// Write maxInt(ErrInt) to the write end of the err_pipe. This is after
513496
// waitpid, so this write is guaranteed to be after the child
514497
// pid potentially wrote an error. This way we can do a blocking
@@ -538,6 +521,57 @@ fn statusToTerm(status: u32) Term {
538521
Term{ .Unknown = status };
539522
}
540523

524+
const RetErr = if (native_os == .linux) ?anyerror else posix.fd_t;
525+
526+
const ChildArg = struct {
527+
self: *ChildProcess,
528+
stdin_pipe_0: posix.fd_t,
529+
stdout_pipe_1: posix.fd_t,
530+
stderr_pipe_1: posix.fd_t,
531+
prog_pipe_1: posix.fd_t,
532+
dev_null_fd: posix.fd_t,
533+
argv_buf: [:null]?[*:0]const u8,
534+
envp: [*:null]const ?[*:0]const u8,
535+
ret_err: RetErr,
536+
};
537+
538+
fn spawnPosixChildHelper(arg: usize) callconv(.c) u8 {
539+
const child_arg: *ChildArg = @ptrFromInt(arg);
540+
const prog_fileno = 3;
541+
542+
setUpChildIo(child_arg.self.stdin_behavior, child_arg.stdin_pipe_0, posix.STDIN_FILENO, child_arg.dev_null_fd) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
543+
setUpChildIo(child_arg.self.stdout_behavior, child_arg.stdout_pipe_1, posix.STDOUT_FILENO, child_arg.dev_null_fd) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
544+
setUpChildIo(child_arg.self.stderr_behavior, child_arg.stderr_pipe_1, posix.STDERR_FILENO, child_arg.dev_null_fd) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
545+
546+
if (child_arg.self.cwd_dir) |cwd| {
547+
posix.fchdir(cwd.fd) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
548+
} else if (child_arg.self.cwd) |cwd| {
549+
posix.chdir(cwd) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
550+
}
551+
552+
// Must happen after fchdir above, the cwd file descriptor might be
553+
// equal to prog_fileno and be clobbered by this dup2 call.
554+
if (child_arg.prog_pipe_1 != -1) posix.dup2(child_arg.prog_pipe_1, prog_fileno) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
555+
556+
if (child_arg.self.gid) |gid| {
557+
posix.setregid(gid, gid) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
558+
}
559+
560+
if (child_arg.self.uid) |uid| {
561+
posix.setreuid(uid, uid) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
562+
}
563+
564+
if (child_arg.self.pgid) |pid| {
565+
posix.setpgid(0, pid) catch |err| return forkChildErrReport(&child_arg.ret_err, err);
566+
}
567+
568+
const err = switch (child_arg.self.expand_arg0) {
569+
.expand => posix.execvpeZ_expandArg0(.expand, child_arg.argv_buf.ptr[0].?, child_arg.argv_buf.ptr, child_arg.envp),
570+
.no_expand => posix.execvpeZ_expandArg0(.no_expand, child_arg.argv_buf.ptr[0].?, child_arg.argv_buf.ptr, child_arg.envp),
571+
};
572+
return forkChildErrReport(&child_arg.ret_err, err);
573+
}
574+
541575
fn spawnPosix(self: *ChildProcess) SpawnError!void {
542576
// The child process does need to access (one end of) these pipes. However,
543577
// we must initially set CLOEXEC to avoid a race condition. If another thread
@@ -639,51 +673,47 @@ fn spawnPosix(self: *ChildProcess) SpawnError!void {
639673
// This pipe is used to communicate errors between the time of fork
640674
// and execve from the child process to the parent process.
641675
const err_pipe = blk: {
642-
if (native_os == .linux) {
643-
const fd = try posix.eventfd(0, linux.EFD.CLOEXEC);
644-
// There's no distinction between the readable and the writeable
645-
// end with eventfd
646-
break :blk [2]posix.fd_t{ fd, fd };
647-
} else {
676+
if (native_os != .linux) {
648677
break :blk try posix.pipe2(.{ .CLOEXEC = true });
678+
} else {
679+
break :blk undefined;
649680
}
650681
};
651682
errdefer destroyPipe(err_pipe);
652683

653-
const pid_result = try posix.fork();
654-
if (pid_result == 0) {
655-
// we are the child
656-
setUpChildIo(self.stdin_behavior, stdin_pipe[0], posix.STDIN_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err);
657-
setUpChildIo(self.stdout_behavior, stdout_pipe[1], posix.STDOUT_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err);
658-
setUpChildIo(self.stderr_behavior, stderr_pipe[1], posix.STDERR_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err);
684+
var child_arg = ChildArg {
685+
.self = self,
686+
.stdin_pipe_0 = stdin_pipe[0],
687+
.stdout_pipe_1 = stdout_pipe[1],
688+
.stderr_pipe_1 = stderr_pipe[1],
689+
.prog_pipe_1 = prog_pipe[1],
690+
.dev_null_fd = dev_null_fd,
691+
.argv_buf = argv_buf,
692+
.envp = envp,
693+
.ret_err = undefined,
694+
};
659695

660-
if (self.cwd_dir) |cwd| {
661-
posix.fchdir(cwd.fd) catch |err| forkChildErrReport(err_pipe[1], err);
662-
} else if (self.cwd) |cwd| {
663-
posix.chdir(cwd) catch |err| forkChildErrReport(err_pipe[1], err);
696+
var pid_result: posix.pid_t = undefined;
697+
if (native_os != .linux) {
698+
child_arg.ret_err = err_pipe[1];
699+
pid_result = try posix.fork();
700+
if (pid_result == 0) {
701+
immediateExit(spawnPosixChildHelper(@intFromPtr(&child_arg)));
664702
}
665-
666-
// Must happen after fchdir above, the cwd file descriptor might be
667-
// equal to prog_fileno and be clobbered by this dup2 call.
668-
if (prog_pipe[1] != -1) posix.dup2(prog_pipe[1], prog_fileno) catch |err| forkChildErrReport(err_pipe[1], err);
669-
670-
if (self.gid) |gid| {
671-
posix.setregid(gid, gid) catch |err| forkChildErrReport(err_pipe[1], err);
703+
} else {
704+
child_arg.ret_err = null;
705+
const stack_size = 0x1000;
706+
var stack: [stack_size]u8 = undefined;
707+
const rc = linux.clone(spawnPosixChildHelper, @intFromPtr(&stack) + stack_size, linux.CLONE.VM | linux.CLONE.VFORK | linux.SIG.CHLD, @intFromPtr(&child_arg), null, 0, null);
708+
switch (posix.errno(rc)) {
709+
.AGAIN => return error.SystemResources,
710+
.NOMEM => return error.SystemResources,
711+
else => |err| return posix.unexpectedErrno(err),
672712
}
673-
674-
if (self.uid) |uid| {
675-
posix.setreuid(uid, uid) catch |err| forkChildErrReport(err_pipe[1], err);
713+
pid_result = @intCast(rc);
714+
if (child_arg.ret_err) |err| {
715+
return err;
676716
}
677-
678-
if (self.pgid) |pid| {
679-
posix.setpgid(0, pid) catch |err| forkChildErrReport(err_pipe[1], err);
680-
}
681-
682-
const err = switch (self.expand_arg0) {
683-
.expand => posix.execvpeZ_expandArg0(.expand, argv_buf.ptr[0].?, argv_buf.ptr, envp),
684-
.no_expand => posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, envp),
685-
};
686-
forkChildErrReport(err_pipe[1], err);
687717
}
688718

689719
// we are the parent
@@ -1012,19 +1042,27 @@ fn destroyPipe(pipe: [2]posix.fd_t) void {
10121042
if (pipe[0] != pipe[1]) posix.close(pipe[1]);
10131043
}
10141044

1015-
// Child of fork calls this to report an error to the fork parent.
1016-
// Then the child exits.
1017-
fn forkChildErrReport(fd: i32, err: ChildProcess.SpawnError) noreturn {
1018-
writeIntFd(fd, @as(ErrInt, @intFromError(err))) catch {};
1045+
fn immediateExit(exitcode: u8) noreturn {
10191046
// If we're linking libc, some naughty applications may have registered atexit handlers
10201047
// which we really do not want to run in the fork child. I caught LLVM doing this and
10211048
// it caused a deadlock instead of doing an exit syscall. In the words of Avril Lavigne,
10221049
// "Why'd you have to go and make things so complicated?"
10231050
if (builtin.link_libc) {
10241051
// The _exit(2) function does nothing but make the exit syscall, unlike exit(3)
1025-
std.c._exit(1);
1052+
std.c._exit(exitcode);
1053+
}
1054+
posix.exit(exitcode);
1055+
}
1056+
1057+
// Child of fork calls this to report an error to the fork parent.
1058+
// Returns exit code.
1059+
fn forkChildErrReport(retErr: *RetErr, err: ChildProcess.SpawnError) u8 {
1060+
if (native_os != .linux) {
1061+
writeIntFd(retErr.*, @as(ErrInt, @intFromError(err))) catch {};
1062+
} else {
1063+
retErr.* = err;
10261064
}
1027-
posix.exit(1);
1065+
return 1;
10281066
}
10291067

10301068
fn writeIntFd(fd: i32, value: ErrInt) !void {

0 commit comments

Comments
 (0)