Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

copy_file_range linux syscall #6010

Merged
merged 2 commits into from
Aug 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lib/std/builtin.zig
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,14 @@ pub const Version = struct {
if (self.max.order(ver) == .lt) return false;
return true;
}

/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Range, ver: Version) ?bool {
if (self.min.order(ver) != .lt) return true;
if (self.max.order(ver) == .lt) return false;
return null;
}
};

pub fn order(lhs: Version, rhs: Version) std.math.Order {
Expand Down
2 changes: 2 additions & 0 deletions lib/std/c/linux.zig
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ pub extern "c" fn sendfile(
count: usize,
) isize;

pub extern "c" fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: c_uint) isize;

pub const pthread_attr_t = extern struct {
__size: [56]u8,
__align: c_long,
Expand Down
9 changes: 2 additions & 7 deletions lib/std/fs/file.zig
Original file line number Diff line number Diff line change
Expand Up @@ -607,15 +607,10 @@ pub const File = struct {
}
}

pub const CopyRangeError = PWriteError || PReadError;
pub const CopyRangeError = os.CopyFileRangeError;

pub fn copyRange(in: File, in_offset: u64, out: File, out_offset: u64, len: usize) CopyRangeError!usize {
// TODO take advantage of copy_file_range OS APIs
var buf: [8 * 4096]u8 = undefined;
const adjusted_count = math.min(buf.len, len);
const amt_read = try in.pread(buf[0..adjusted_count], in_offset);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the fallback path should remain here for systems that don't support os.copy_file_range (rather than being moved to os.zig)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.zig should be about what the OS supports: papering over differences and making abstractions for files should be in file.zig.

Copy link
Member

@andrewrk andrewrk Aug 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.zig is to be renamed to posix.zig: #5019
it is its job to paper over differences in that layer. there's already precedent for this with all the other read/write functions. Even if the proposal you are making were to be accepted it would not apply to this PR, it would be a separate proposal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy_file_range isn't posix either; it's a linux-specific syscall.

normal reading/writing does have standard posix interfaces.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zig's posix api layer is bespoke. It's not strictly conforming to posix. It cannot be; posix is specific to C. So it's "zig flavored posix" and whatever code has the same logical abstraction level as other posix functions go there. copy_file_range is clearly in that same layer.

if (amt_read == 0) return @as(usize, 0);
return out.pwrite(buf[0..amt_read], out_offset);
return os.copy_file_range(in.handle, in_offset, out.handle, out_offset, len, 0);
}

/// Returns the number of bytes copied. If the number read is smaller than `buffer.len`, it
Expand Down
26 changes: 26 additions & 0 deletions lib/std/fs/test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,32 @@ test "sendfile" {
testing.expect(mem.eql(u8, written_buf[0..amt], "header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n"));
}

test "copyRangeAll" {
var tmp = tmpDir(.{});
defer tmp.cleanup();

try tmp.dir.makePath("os_test_tmp");
defer tmp.dir.deleteTree("os_test_tmp") catch {};

var dir = try tmp.dir.openDir("os_test_tmp", .{});
defer dir.close();

var src_file = try dir.createFile("file1.txt", .{ .read = true });
defer src_file.close();

const data = "u6wj+JmdF3qHsFPE BUlH2g4gJCmEz0PP";
try src_file.writeAll(data);

var dest_file = try dir.createFile("file2.txt", .{ .read = true });
defer dest_file.close();

var written_buf: [100]u8 = undefined;
_ = try src_file.copyRangeAll(0, dest_file, 0, data.len);

const amt = try dest_file.preadAll(&written_buf, 0);
testing.expect(mem.eql(u8, written_buf[0..amt], data));
}

test "fs.copyFile" {
const data = "u6wj+JmdF3qHsFPE BUlH2g4gJCmEz0PP";
const src_file = "tmp_test_copy_file.txt";
Expand Down
79 changes: 79 additions & 0 deletions lib/std/os.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4932,6 +4932,85 @@ pub fn sendfile(
return total_written;
}

pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
IsDir,
OutOfMemory,
NoSpaceLeft,
Unseekable,
PermissionDenied,
FileBusy,
} || PReadError || PWriteError || UnexpectedError;

/// Transfer data between file descriptors at specified offsets.
/// Returns the number of bytes written, which can less than requested.
///
/// The `copy_file_range` call copies `len` bytes from one file descriptor to another. When possible,
/// this is done within the operating system kernel, which can provide better performance
/// characteristics than transferring data from kernel to user space and back, such as with
/// `pread` and `pwrite` calls.
///
/// `fd_in` must be a file descriptor opened for reading, and `fd_out` must be a file descriptor
/// opened for writing. They may be any kind of file descriptor; however, if `fd_in` is not a regular
/// file system file, it may cause this function to fall back to calling `pread` and `pwrite`, in which case
/// atomicity guarantees no longer apply.
///
/// If `fd_in` and `fd_out` are the same, source and target ranges must not overlap.
/// The file descriptor seek positions are ignored and not updated.
/// When `off_in` is past the end of the input file, it successfully reads 0 bytes.
///
/// `flags` has different meanings per operating system; refer to the respective man pages.
///
/// These systems support in-kernel data copying:
/// * Linux 4.5 (cross-filesystem 5.3)
///
/// Other systems fall back to calling `pread` / `pwrite`.
///
/// Maximum offsets on Linux are `math.maxInt(i64)`.
pub fn copy_file_range(fd_in: fd_t, off_in: u64, fd_out: fd_t, off_out: u64, len: usize, flags: u32) CopyFileRangeError!usize {
const use_c = std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 }).ok;

// TODO support for other systems than linux
const try_syscall = comptime std.Target.current.os.isAtLeast(.linux, .{ .major = 4, .minor = 5 }) != false;

if (use_c or try_syscall) {
const sys = if (use_c) std.c else linux;

var off_in_copy = @bitCast(i64, off_in);
var off_out_copy = @bitCast(i64, off_out);

const rc = sys.copy_file_range(fd_in, &off_in_copy, fd_out, &off_out_copy, len, flags);

// TODO avoid wasting a syscall every time if kernel is too old and returns ENOSYS https://github.com/ziglang/zig/issues/1018

switch (sys.getErrno(rc)) {
0 => return @intCast(usize, rc),
EBADF => unreachable,
EFBIG => return error.FileTooBig,
EIO => return error.InputOutput,
EISDIR => return error.IsDir,
ENOMEM => return error.OutOfMemory,
ENOSPC => return error.NoSpaceLeft,
EOVERFLOW => return error.Unseekable,
EPERM => return error.PermissionDenied,
ETXTBSY => return error.FileBusy,
EINVAL => {}, // these may not be regular files, try fallback
EXDEV => {}, // support for cross-filesystem copy added in Linux 5.3, use fallback
ENOSYS => {}, // syscall added in Linux 4.5, use fallback
else => |err| return unexpectedErrno(err),
}
}

var buf: [8 * 4096]u8 = undefined;
const adjusted_count = math.min(buf.len, len);
const amt_read = try pread(fd_in, buf[0..adjusted_count], off_in);
// TODO without @as the line below fails to compile for wasm32-wasi:
// error: integer value 0 cannot be coerced to type 'os.PWriteError!usize'
if (amt_read == 0) return @as(usize, 0);
return pwrite(fd_out, buf[0..amt_read], off_out);
}

pub const PollError = error{
/// The kernel had no space to allocate file descriptor tables.
SystemResources,
Expand Down
12 changes: 12 additions & 0 deletions lib/std/os/linux.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,18 @@ pub fn signalfd4(fd: fd_t, mask: *const sigset_t, flags: i32) usize {
);
}

pub fn copy_file_range(fd_in: fd_t, off_in: ?*i64, fd_out: fd_t, off_out: ?*i64, len: usize, flags: u32) usize {
return syscall6(
.copy_file_range,
@bitCast(usize, @as(isize, fd_in)),
@ptrToInt(off_in),
@bitCast(usize, @as(isize, fd_out)),
@ptrToInt(off_out),
len,
flags,
);
}

test "" {
if (builtin.os.tag == .linux) {
_ = @import("linux/test.zig");
Expand Down
28 changes: 28 additions & 0 deletions lib/std/target.zig
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ pub const Target = struct {
pub fn includesVersion(self: Range, ver: WindowsVersion) bool {
return @enumToInt(ver) >= @enumToInt(self.min) and @enumToInt(ver) <= @enumToInt(self.max);
}

/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Range, ver: WindowsVersion) ?bool {
if (@enumToInt(self.min) >= @enumToInt(ver)) return true;
if (@enumToInt(self.max) < @enumToInt(ver)) return false;
return null;
}
};

/// This function is defined to serialize a Zig source code representation of this
Expand Down Expand Up @@ -135,6 +143,12 @@ pub const Target = struct {
pub fn includesVersion(self: LinuxVersionRange, ver: Version) bool {
return self.range.includesVersion(ver);
}

/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: LinuxVersionRange, ver: Version) ?bool {
return self.range.isAtLeast(ver);
}
};

/// The version ranges here represent the minimum OS version to be supported
Expand All @@ -158,6 +172,8 @@ pub const Target = struct {
///
/// Binaries built with a given maximum version will continue to function on newer operating system
/// versions. However, such a binary may not take full advantage of the newer operating system APIs.
///
/// See `Os.isAtLeast`.
pub const VersionRange = union {
none: void,
semver: Version.Range,
Expand Down Expand Up @@ -273,6 +289,18 @@ pub const Target = struct {
};
}

/// Checks if system is guaranteed to be at least `version` or older than `version`.
/// Returns `null` if a runtime check is required.
pub fn isAtLeast(self: Os, comptime tag: Tag, version: anytype) ?bool {
if (self.tag != tag) return false;

return switch (tag) {
.linux => self.version_range.linux.isAtLeast(version),
.windows => self.version_range.windows.isAtLeast(version),
else => self.version_range.semver.isAtLeast(version),
};
}

pub fn requiresLibC(os: Os) bool {
return switch (os.tag) {
.freebsd,
Expand Down