Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std: Make file copy ops use zero-copy mechanisms #6516

Merged
merged 5 commits into from
Oct 9, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lib/std/c/darwin.zig
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header;
pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize;
pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8;

pub const COPYFILE_ACL = 1 << 0;
pub const COPYFILE_STAT = 1 << 1;
pub const COPYFILE_XATTR = 1 << 2;
pub const COPYFILE_DATA = 1 << 3;

pub const copyfile_state_t = *@Type(.Opaque);
pub extern "c" fn copyfile_state_alloc() copyfile_state_t;
pub extern "c" fn copyfile_state_free(state: copyfile_state_t) c_int;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but the alloc/free are now unused and can get the boot before/after merging.

pub extern "c" fn fcopyfile(from: fd_t, to: fd_t, state: ?copyfile_state_t, flags: u32) c_int;

pub extern "c" fn @"realpath$DARWIN_EXTSN"(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;

pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, basep: *i64) isize;
Expand Down
48 changes: 47 additions & 1 deletion lib/std/fs.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1823,7 +1823,7 @@ pub const Dir = struct {
var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode });
defer atomic_file.deinit();

try atomic_file.file.writeFileAll(in_file, .{ .in_len = size });
try copy_file(in_file.handle, atomic_file.file.handle);
return atomic_file.finish();
}

Expand Down Expand Up @@ -2263,6 +2263,52 @@ pub fn realpathAlloc(allocator: *Allocator, pathname: []const u8) ![]u8 {
return allocator.dupe(u8, try os.realpath(pathname, &buf));
}

const CopyFileError = error{SystemResources} || os.CopyFileRangeError || os.SendFileError;

/// Transfer all the data between two file descriptors in the most efficient way.
/// No metadata is transferred over.
fn copy_file(fd_in: os.fd_t, fd_out: os.fd_t) CopyFileError!void {
if (comptime std.Target.current.isDarwin()) {
const rc = os.system.fcopyfile(fd_in, fd_out, null, os.system.COPYFILE_DATA);
switch (os.errno(rc)) {
0 => return,
os.EINVAL => unreachable,
os.ENOMEM => return error.SystemResources,
// The source file is not a directory, symbolic link, or regular file.
// Try with the fallback path before giving up.
os.ENOTSUP => {},
else => |err| return os.unexpectedErrno(err),
}
}

if (std.Target.current.os.tag == .linux) {
// Try copy_file_range first as that works at the FS level and is the
// most efficient method (if available).
var offset: u64 = 0;
cfr_loop: while (true) {
// The kernel checks the u64 value `offset+count` for overflow, use
// a 32 bit value so that the syscall won't return EINVAL except for
// impossibly large files (> 2^64-1 - 2^32-1).
const amt = try os.copy_file_range(fd_in, offset, fd_out, offset, math.maxInt(u32), 0);
// Terminate when no data was copied
if (amt == 0) break :cfr_loop;
offset += amt;
}
return;
}

// Sendfile is a zero-copy mechanism iff the OS supports it, otherwise the
// fallback code will copy the contents chunk by chunk.
const empty_iovec = [0]os.iovec_const{};
var offset: u64 = 0;
sendfile_loop: while (true) {
const amt = try os.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0);
// Terminate when no data was copied
if (amt == 0) break :sendfile_loop;
offset += amt;
}
}

test "" {
if (builtin.os.tag != .wasi) {
_ = makeDirAbsolute;
Expand Down
54 changes: 36 additions & 18 deletions lib/std/os.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4945,6 +4945,7 @@ pub fn sendfile(
pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
InvalidFileDescriptor,
IsDir,
OutOfMemory,
NoSpaceLeft,
Expand All @@ -4953,6 +4954,11 @@ pub const CopyFileRangeError = error{
FileBusy,
} || PReadError || PWriteError || UnexpectedError;

var has_copy_file_range_syscall = init: {
const kernel_has_syscall = comptime std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) orelse true;
break :init std.atomic.Int(bool).init(kernel_has_syscall);
};

/// Transfer data between file descriptors at specified offsets.
/// Returns the number of bytes written, which can less than requested.
///
Expand Down Expand Up @@ -4981,22 +4987,18 @@ pub const CopyFileRangeError = error{
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize {
const use_c = std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 }).ok;

// TODO support for other systems than linux
const try_syscall = comptime std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) != false;

if (use_c or try_syscall) {
if (std.Target.current.os.tag == .linux and
(use_c or has_copy_file_range_syscall.get()))
{
const sys = if (use_c) std.c else linux;

var off_in_copy = @bitCast(i64, off_in);
var off_out_copy = @bitCast(i64, off_out);

const rc = sys.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags);

// TODO avoid wasting a syscall every time if kernel is too old and returns ENOSYS https://github.com/ziglang/zig/issues/1018

switch (sys.getErrno(rc)) {
0 => return @intCast(usize, rc),
EBADF => unreachable,
EBADF => return error.InvalidFileDescriptor,
EFBIG => return error.FileTooBig,
EIO => return error.InputOutput,
EISDIR => return error.IsDir,
Expand All @@ -5005,20 +5007,36 @@ pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len
EOVERFLOW => return error.Unseekable,
EPERM => return error.PermissionDenied,
ETXTBSY => return error.FileBusy,
EINVAL => {}, // these may not be regular files, try fallback
EXDEV => {}, // support for cross-filesystem copy added in Linux 5.3, use fallback
ENOSYS => {}, // syscall added in Linux 4.5, use fallback
// these may not be regular files, try fallback
EINVAL => {},
// support for cross-filesystem copy added in Linux 5.3, use fallback
EXDEV => {},
// syscall added in Linux 4.5, use fallback
ENOSYS => {
has_copy_file_range_syscall.set(false);
},
else => |err| return unexpectedErrno(err),
}
}

var buf: [8 * 4096]u8 = undefined;
const adjusted_count = math.min(buf.len, len);
const amt_read = try pread(fd_in, buf[0..adjusted_count], off_in);
// TODO without @as the line below fails to compile for wasm32-wasi:
// error: integer value 0 cannot be coerced to type 'os.PWriteError!usize'
if (amt_read == 0) return @as(usize, 0);
return pwrite(fd_out, buf[0..amt_read], off_out);
var buf: [2 * 4096]u8 = undefined;

var total_copied: usize = 0;
var read_off = off_in;
var write_off = off_out;
while (total_copied < len) {
const adjusted_count = math.min(buf.len, len - total_copied);
const amt_read = try pread(fd_in, buf[0..adjusted_count], read_off);
if (amt_read == 0) break;
const amt_written = try pwrite(fd_out, buf[0..amt_read], write_off);
// pwrite may write less than the specified amount, handle the remaining
// chunk of data in the next iteration
read_off += amt_written;
write_off += amt_written;
total_copied += amt_written;
}

return total_copied;
}

pub const PollError = error{
Expand Down