Skip to content

Commit 76a1954

Browse files
authored
Merge pull request #6516 from LemonBoy/fastfilecopy
std: Make file copy ops use zero-copy mechanisms
2 parents b02341d + 03762da commit 76a1954

File tree

3 files changed

+76
-12
lines changed

3 files changed

+76
-12
lines changed

lib/std/c/darwin.zig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header;
1818
pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize;
1919
pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8;
2020

21+
pub const COPYFILE_ACL = 1 << 0;
22+
pub const COPYFILE_STAT = 1 << 1;
23+
pub const COPYFILE_XATTR = 1 << 2;
24+
pub const COPYFILE_DATA = 1 << 3;
25+
26+
pub const copyfile_state_t = *@Type(.Opaque);
27+
pub extern "c" fn fcopyfile(from: fd_t, to: fd_t, state: ?copyfile_state_t, flags: u32) c_int;
28+
2129
pub extern "c" fn @"realpath$DARWIN_EXTSN"(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
2230

2331
pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, basep: *i64) isize;

lib/std/fs.zig

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1823,7 +1823,7 @@ pub const Dir = struct {
18231823
var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode });
18241824
defer atomic_file.deinit();
18251825

1826-
try atomic_file.file.writeFileAll(in_file, .{ .in_len = size });
1826+
try copy_file(in_file.handle, atomic_file.file.handle);
18271827
return atomic_file.finish();
18281828
}
18291829

@@ -2271,6 +2271,53 @@ pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 {
22712271
return allocator.dupe(u8, try os.realpath(pathname, &buf));
22722272
}
22732273

2274+
const CopyFileError = error{SystemResources} || os.CopyFileRangeError || os.SendFileError;
2275+
2276+
// Transfer all the data between two file descriptors in the most efficient way.
2277+
// The copy starts at offset 0, the initial offsets are preserved.
2278+
// No metadata is transferred over.
2279+
fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileError!void {
2280+
if (comptime std.Target.current.isDarwin()) {
2281+
const rc = os.system.fcopyfile(fd_in, fd_out, null, os.system.COPYFILE_DATA);
2282+
switch (os.errno(rc)) {
2283+
0 => return,
2284+
os.EINVAL => unreachable,
2285+
os.ENOMEM => return error.SystemResources,
2286+
// The source file is not a directory, symbolic link, or regular file.
2287+
// Try with the fallback path before giving up.
2288+
os.ENOTSUP => {},
2289+
else => |err| return os.unexpectedErrno(err),
2290+
}
2291+
}
2292+
2293+
if (std.Target.current.os.tag == .linux) {
2294+
// Try copy_file_range first as that works at the FS level and is the
2295+
// most efficient method (if available).
2296+
var offset: u64 = 0;
2297+
cfr_loop: while (true) {
2298+
// The kernel checks the u64 value `offset+count` for overflow, use
2299+
// a 32 bit value so that the syscall won't return EINVAL except for
2300+
// impossibly large files (> 2^64-1 - 2^32-1).
2301+
const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32), 0);
2302+
// Terminate when no data was copied
2303+
if (amt == 0) break :cfr_loop;
2304+
offset += amt;
2305+
}
2306+
return;
2307+
}
2308+
2309+
// Sendfile is a zero-copy mechanism iff the OS supports it, otherwise the
2310+
// fallback code will copy the contents chunk by chunk.
2311+
const empty_iovec = [0]os.iovec_const{};
2312+
var offset: u64 = 0;
2313+
sendfile_loop: while (true) {
2314+
const amt = try os.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0);
2315+
// Terminate when no data was copied
2316+
if (amt == 0) break :sendfile_loop;
2317+
offset += amt;
2318+
}
2319+
}
2320+
22742321
test "" {
22752322
if (builtin.os.tag != .wasi) {
22762323
_ = makeDirAbsolute;

lib/std/os.zig

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4945,6 +4945,9 @@ pub fn sendfile(
49454945
pub const CopyFileRangeError = error{
49464946
FileTooBig,
49474947
InputOutput,
4948+
/// `fd_in` is not open for reading; or `fd_out` is not open for writing;
4949+
/// or the `O_APPEND` flag is set for `fd_out`.
4950+
FilesOpenedWithWrongFlags,
49484951
IsDir,
49494952
OutOfMemory,
49504953
NoSpaceLeft,
@@ -4953,6 +4956,11 @@ pub const CopyFileRangeError = error{
49534956
FileBusy,
49544957
} || PReadError || PWriteError || UnexpectedError;
49554958

4959+
var has_copy_file_range_syscall = init: {
4960+
const kernel_has_syscall = std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) orelse true;
4961+
break :init std.atomic.Int(bool).init(kernel_has_syscall);
4962+
};
4963+
49564964
/// Transfer data between file descriptors at specified offsets.
49574965
/// Returns the number of bytes written, which can less than requested.
49584966
///
@@ -4981,22 +4989,18 @@ pub const CopyFileRangeError = error{
49814989
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize {
49824990
const use_c = std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 }).ok;
49834991

4984-
// TODO support for other systems than linux
4985-
const try_syscall = comptime std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) != false;
4986-
4987-
if (use_c or try_syscall) {
4992+
if (std.Target.current.os.tag == .linux and
4993+
(use_c or has_copy_file_range_syscall.get()))
4994+
{
49884995
const sys = if (use_c) std.c else linux;
49894996

49904997
var off_in_copy = @bitCast(i64, off_in);
49914998
var off_out_copy = @bitCast(i64, off_out);
49924999

49935000
const rc = sys.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags);
4994-
4995-
// TODO avoid wasting a syscall every time if kernel is too old and returns ENOSYS https://github.com/ziglang/zig/issues/1018
4996-
49975001
switch (sys.getErrno(rc)) {
49985002
0 => return @intCast(usize, rc),
4999-
EBADF => unreachable,
5003+
EBADF => return error.FilesOpenedWithWrongFlags,
50005004
EFBIG => return error.FileTooBig,
50015005
EIO => return error.InputOutput,
50025006
EISDIR => return error.IsDir,
@@ -5005,9 +5009,14 @@ pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len
50055009
EOVERFLOW => return error.Unseekable,
50065010
EPERM => return error.PermissionDenied,
50075011
ETXTBSY => return error.FileBusy,
5008-
EINVAL => {}, // these may not be regular files, try fallback
5009-
EXDEV => {}, // support for cross-filesystem copy added in Linux 5.3, use fallback
5010-
ENOSYS => {}, // syscall added in Linux 4.5, use fallback
5012+
// these may not be regular files, try fallback
5013+
EINVAL => {},
5014+
// support for cross-filesystem copy added in Linux 5.3, use fallback
5015+
EXDEV => {},
5016+
// syscall added in Linux 4.5, use fallback
5017+
ENOSYS => {
5018+
has_copy_file_range_syscall.set(false);
5019+
},
50115020
else => |err| return unexpectedErrno(err),
50125021
}
50135022
}

0 commit comments

Comments
 (0)