From 3cf3309ed67e329eb99eb4e16b9d5840f2317aa8 Mon Sep 17 00:00:00 2001 From: meowjesty <43983236+meowjesty@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:57:47 -0300 Subject: [PATCH] Fixes go syscallN being broken for mmap (and adds a few more file system sycalls for go). (#2114) * Fixes go syscallN being broken for mmap. // Adds the syscalls crate so we dont have to roll our own syscall handler. * Add some hooks for go (pwrite64, pread64, fsync, fdatasync). * changelog --- Cargo.lock | 11 +++ changelog.d/2099.fixed.md | 1 + mirrord/layer/Cargo.toml | 1 + mirrord/layer/src/file/hooks.rs | 63 ++++++++++---- mirrord/layer/src/file/ops.rs | 19 +++-- mirrord/layer/src/go/linux_aarch64.rs | 25 ------ mirrord/layer/src/go/linux_x64.rs | 114 ++++++++------------------ mirrord/layer/src/go/mod.rs | 81 +++++++++++++++--- 8 files changed, 180 insertions(+), 135 deletions(-) create mode 100644 changelog.d/2099.fixed.md diff --git a/Cargo.lock b/Cargo.lock index 9b2f8e701c9..9c687e0223f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3980,6 +3980,7 @@ dependencies = [ "serde_json", "socket2 0.5.5", "streammap-ext", + "syscalls", "test-cdylib", "tests", "thiserror", @@ -6098,6 +6099,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "syscalls" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56b389b38331a454883a34fd19f25cbd1510b3510ff7aa28cb8d6de85d888439" +dependencies = [ + "serde", + "serde_repr", +] + [[package]] name = "system-configuration" version = "0.5.1" diff --git a/changelog.d/2099.fixed.md b/changelog.d/2099.fixed.md new file mode 100644 index 00000000000..c3030a5454b --- /dev/null +++ b/changelog.d/2099.fixed.md @@ -0,0 +1 @@ +Uses the `syscalls` crate to handle calling the syscalls for go. And adds `pwrite64`, `pread64`, `fsync` and `fdatasync` hooks for go. diff --git a/mirrord/layer/Cargo.toml b/mirrord/layer/Cargo.toml index 46b838449de..5d9b1a375b9 100644 --- a/mirrord/layer/Cargo.toml +++ b/mirrord/layer/Cargo.toml @@ -52,6 +52,7 @@ bimap.workspace = true dashmap = "5.4" hashbrown = "0.14" exec.workspace = true +syscalls = { version = "0.6", features = ["full"] } [target.'cfg(target_os = "macos")'.dependencies] mirrord-sip = { path = "../sip" } diff --git a/mirrord/layer/src/file/hooks.rs b/mirrord/layer/src/file/hooks.rs index d9c68ff1a81..f8fe49a3e89 100644 --- a/mirrord/layer/src/file/hooks.rs +++ b/mirrord/layer/src/file/hooks.rs @@ -521,21 +521,37 @@ pub(crate) unsafe extern "C" fn _pread_nocancel_detour( .unwrap_or_bypass_with(|_| FN__PREAD_NOCANCEL(fd, out_buffer, amount_to_read, offset)) } -#[hook_guard_fn] -pub(crate) unsafe extern "C" fn pwrite_detour( +/// Common code between the `pwrite` detours. +/// +/// Handle the `.unwrap_or_bypass` in their respective functions though. +unsafe fn pwrite_logic( fd: RawFd, in_buffer: *const c_void, amount_to_write: size_t, offset: off_t, -) -> ssize_t { +) -> Detour { // Convert the given buffer into a u8 slice, upto the amount to write. let casted_in_buffer: &[u8] = slice::from_raw_parts(in_buffer.cast(), amount_to_write); - pwrite(fd, casted_in_buffer, offset as u64) - .map(|write_response| { - let WriteFileResponse { written_amount } = write_response; - written_amount as ssize_t - }) + pwrite(fd, casted_in_buffer, offset as u64).map(|write_response| { + let WriteFileResponse { written_amount } = write_response; + written_amount as ssize_t + }) +} + +/// ## Note on go hook for this +/// +/// On linux, this is `pwrite64`, but if you try to hook it and call as `FN_PWRITE64`, you won't +/// find it, resulting in an `unwrap on None` error when the detour is called. So, even though +/// golang receives a syscall of [`libc::SYS_pwrite64`], hooking it like this is what works. +#[hook_guard_fn] +pub(crate) unsafe extern "C" fn pwrite_detour( + fd: RawFd, + in_buffer: *const c_void, + amount_to_write: size_t, + offset: off_t, +) -> ssize_t { + pwrite_logic(fd, in_buffer, amount_to_write, offset) .unwrap_or_bypass_with(|_| FN_PWRITE(fd, in_buffer, amount_to_write, offset)) } @@ -546,14 +562,7 @@ pub(crate) unsafe extern "C" fn _pwrite_nocancel_detour( amount_to_write: size_t, offset: off_t, ) -> ssize_t { - // Convert the given buffer into a u8 slice, upto the amount to write. - let casted_in_buffer: &[u8] = slice::from_raw_parts(in_buffer.cast(), amount_to_write); - - pwrite(fd, casted_in_buffer, offset as u64) - .map(|write_response| { - let WriteFileResponse { written_amount } = write_response; - written_amount as ssize_t - }) + pwrite_logic(fd, in_buffer, amount_to_write, offset) .unwrap_or_bypass_with(|_| FN__PWRITE_NOCANCEL(fd, in_buffer, amount_to_write, offset)) } @@ -637,6 +646,18 @@ pub(crate) unsafe extern "C" fn faccessat_detour( } } +/// Hook for `libc::fsync`. +#[hook_guard_fn] +pub(crate) unsafe extern "C" fn fsync_detour(fd: RawFd) -> c_int { + fsync(fd).unwrap_or_bypass_with(|_| FN_FSYNC(fd)) +} + +/// Hook for `libc::fdatasync`. +#[hook_guard_fn] +pub(crate) unsafe extern "C" fn fdatasync_detour(fd: RawFd) -> c_int { + fsync(fd).unwrap_or_bypass_with(|_| FN_FDATASYNC(fd)) +} + /// Tries to convert input to type O, if it fails it returns the max value of O. /// For example, if you put u32::MAX into a u8, it will return u8::MAX. fn best_effort_cast + Bounded>(input: I) -> O { @@ -974,6 +995,16 @@ pub(crate) unsafe fn enable_file_hooks(hook_manager: &mut HookManager) { FnFaccessat, FN_FACCESSAT ); + + replace!(hook_manager, "fsync", fsync_detour, FnFsync, FN_FSYNC); + replace!( + hook_manager, + "fdatasync", + fdatasync_detour, + FnFdatasync, + FN_FDATASYNC + ); + // this requires newer libc which we don't link with to support old libc.. // leaving this in code so we can enable it when libc is updated. // #[cfg(target_os = "linux")] diff --git a/mirrord/layer/src/file/ops.rs b/mirrord/layer/src/file/ops.rs index 17d5154d995..e9ec865d013 100644 --- a/mirrord/layer/src/file/ops.rs +++ b/mirrord/layer/src/file/ops.rs @@ -116,7 +116,7 @@ fn get_remote_fd(local_fd: RawFd) -> Detour { } /// Create temporary local file to get a valid local fd. -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] fn create_local_fake_file(remote_fd: u64) -> Detour { let random_string = Alphanumeric.sample_string(&mut rand::thread_rng(), 16); let file_name = format!("{remote_fd}-{random_string}"); @@ -135,7 +135,7 @@ fn create_local_fake_file(remote_fd: u64) -> Detour { } /// Close the remote file if the call to [`libc::shm_open`] failed and we have an invalid local fd. -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] fn close_remote_file_on_failure(fd: u64) -> Result<()> { error!("Creating a temporary local file resulted in an error, closing the file remotely!"); RemoteFile::remote_close(fd) @@ -152,7 +152,7 @@ fn close_remote_file_on_failure(fd: u64) -> Result<()> { /// [`open`] is also used by other _open-ish_ functions, and it takes care of **creating** the /// _local_ and _remote_ file association, plus **inserting** it into the storage for /// [`OPEN_FILES`]. -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] pub(crate) fn open(path: Detour, open_options: OpenOptionsInternal) -> Detour { let path = path?; @@ -208,7 +208,7 @@ pub(crate) fn fdopen(fd: RawFd, rawish_mode: Option<&CStr>) -> Detour<*mut FILE> } /// creates a directory stream for the `remote_fd` in the agent -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] pub(crate) fn fdopendir(fd: RawFd) -> Detour { // usize == ptr size // we don't return a pointer to an address that contains DIR @@ -231,7 +231,7 @@ pub(crate) fn fdopendir(fd: RawFd) -> Detour { Detour::Success(local_dir_fd as usize) } -#[tracing::instrument(level = "trace")] +#[tracing::instrument(level = "trace", ret)] pub(crate) fn openat( fd: RawFd, path: Detour, @@ -294,6 +294,7 @@ pub(crate) fn pread(local_fd: RawFd, buffer_size: u64, offset: u64) -> Detour Detour { let remote_fd = get_remote_fd(local_fd)?; + trace!("pwrite: local_fd {local_fd}"); let writing_file = WriteLimitedFileRequest { remote_fd, @@ -369,6 +370,14 @@ pub(crate) fn access(path: Detour, mode: u8) -> Detour { Detour::Success(0) } +/// Original function _flushes_ data from `fd` to disk, but we don't really do any of this +/// for our managed fds, so we just return `0` which means success. +#[tracing::instrument(level = "trace", ret)] +pub(crate) fn fsync(fd: RawFd) -> Detour { + get_remote_fd(fd)?; + Detour::Success(0) +} + /// General stat function that can be used for lstat, fstat, stat and fstatat. /// Note: We treat cases of `AT_SYMLINK_NOFOLLOW_ANY` as `AT_SYMLINK_NOFOLLOW` because even Go does /// that. diff --git a/mirrord/layer/src/go/linux_aarch64.rs b/mirrord/layer/src/go/linux_aarch64.rs index 14cc0be79c6..b7ccdcf7103 100644 --- a/mirrord/layer/src/go/linux_aarch64.rs +++ b/mirrord/layer/src/go/linux_aarch64.rs @@ -7,31 +7,6 @@ use crate::{go::c_abi_syscall6_handler, macros::hook_symbol, HookManager}; type VoidFn = unsafe extern "C" fn() -> (); static mut FN_ASMCGOCALL: Option = None; -/// [Naked function] 6 param version, used by Rawsyscall & Syscall -#[naked] -pub(crate) unsafe extern "C" fn syscall_6( - syscall: i64, - param1: i64, - param2: i64, - param3: i64, - param4: i64, - param5: i64, - param6: i64, -) -> i64 { - asm!( - "mov x8, x0", - "mov x0, x1", - "mov x1, x2", - "mov x2, x3", - "mov x3, x4", - "mov x4, x5", - "mov x5, x6", - "svc 0x0", - "ret", - options(noreturn) - ) -} - /// asmcgocall can only pass a pointer argument, so it sends us the SP /// which layouts to this struct. #[repr(C)] diff --git a/mirrord/layer/src/go/linux_x64.rs b/mirrord/layer/src/go/linux_x64.rs index 8030e570158..67c865a7e7a 100644 --- a/mirrord/layer/src/go/linux_x64.rs +++ b/mirrord/layer/src/go/linux_x64.rs @@ -319,7 +319,7 @@ unsafe extern "C" fn c_abi_syscall_handler( param2, param3 ); - let res = match syscall { + let syscall_result = match syscall { libc::SYS_socket => socket_detour(param1 as _, param2 as _, param3 as _) as i64, libc::SYS_bind => bind_detour(param1 as _, param2 as _, param3 as _) as i64, libc::SYS_listen => listen_detour(param1 as _, param2 as _) as i64, @@ -346,90 +346,46 @@ unsafe extern "C" fn c_abi_syscall_handler( libc::SYS_fstat => fstat_detour(param1 as _, param2 as _) as i64, libc::SYS_getdents64 => getdents64_detour(param1 as _, param2 as _, param3 as _) as i64, _ => { - let syscall_res = syscall_3(syscall, param1, param2, param3); - return syscall_res; + let (Ok(result) | Err(result)) = syscalls::syscall!( + syscalls::Sysno::from(syscall as i32), + param1, + param2, + param3 + ) + .map(|success| success as i64) + .map_err(|fail| { + let raw_errno = fail.into_raw(); + errno::set_errno(errno::Errno(raw_errno)); + + -(raw_errno as i64) + }); + result } }, _ => { - let syscall_res = syscall_3(syscall, param1, param2, param3); - return syscall_res; + let (Ok(result) | Err(result)) = syscalls::syscall!( + syscalls::Sysno::from(syscall as i32), + param1, + param2, + param3 + ) + .map(|success| success as i64) + .map_err(|fail| { + let raw_errno = fail.into_raw(); + errno::set_errno(errno::Errno(raw_errno)); + + -(raw_errno as i64) + }); + result } }; - match res { - -1 => -errno().0 as i64, - _ => res, - } -} - -/// [Naked function] 3 param version (Syscall6) for making the syscall, libc's syscall is not -/// used here as it doesn't return the value that go expects (it does translation) -#[naked] -unsafe extern "C" fn syscall_3(syscall: i64, param1: i64, param2: i64, param3: i64) -> i64 { - asm!( - "mov rax, rdi", - "mov rdi, rsi", - "mov rsi, rdx", - "mov rdx, rcx", - "syscall", - "ret", - options(noreturn) - ) -} - -/// [Naked function] 6 param version, used by Rawsyscall & Syscall -#[naked] -pub(crate) unsafe extern "C" fn syscall_6( - syscall: i64, - param1: i64, - param2: i64, - param3: i64, - param4: i64, - param5: i64, - param6: i64, -) -> i64 { - asm!( - "mov rax, rdi", - "mov rdi, rsi", - "mov rsi, rdx", - "mov rdx, rcx", - "mov r10, r8", - "mov r8, r9", - "mov r9, QWORD PTR[rsp]", - "syscall", - "ret", - options(noreturn) - ) -} -/// Clobbers rax, rcx -#[no_mangle] -#[naked] -unsafe extern "C" fn enter_syscall() { - asm!( - "mov rax, qword ptr [r14 + 0x30]", // get mp - "inc qword ptr [ rax + 0x130 ]", // inc cgocall - "inc qword ptr [ rax + 0x138 ]", // inc cgo - "mov rcx, qword ptr [ rax + 0x140 ]", - "mov qword ptr [rcx], 0x0", // reset traceback - "mov byte ptr [ RAX + 0x118], 0x1", // incgo = true - "ret", - options(noreturn) - ); -} - -/// clobbers xmm15, r14, rax -#[no_mangle] -#[naked] -unsafe extern "C" fn exit_syscall() { - asm!( - "xorps xmm15, xmm15", - "mov r14, qword ptr FS:[0xfffffff8]", - "mov rax, qword ptr [r14 + 0x30]", - "dec qword ptr [ rax + 0x138 ]", // dec cgo - "mov byte ptr [ RAX + 0x118], 0x0", // incgo = false - "ret", - options(noreturn) - ); + if syscall_result.is_negative() { + // Might not be an exact mapping, but it should be good enough. + -errno().0 as i64 + } else { + syscall_result + } } /// Detour for Go >= 1.19 diff --git a/mirrord/layer/src/go/mod.rs b/mirrord/layer/src/go/mod.rs index 90f1acb4d3c..e329431f420 100644 --- a/mirrord/layer/src/go/mod.rs +++ b/mirrord/layer/src/go/mod.rs @@ -17,7 +17,6 @@ use crate::{close_detour, file::hooks::*, socket::hooks::*}; )] pub(crate) mod go_hooks; -use go_hooks::syscall_6; /// Syscall & Syscall6 handler - supports upto 6 params, mainly used for /// accept4 Note: Depending on success/failure Syscall may or may not call this handler #[no_mangle] @@ -34,7 +33,7 @@ unsafe extern "C" fn c_abi_syscall6_handler( "c_abi_syscall6_handler: syscall={} param1={} param2={} param3={} param4={} param5={} param6={}", syscall, param1, param2, param3, param4, param5, param6 ); - let res = match syscall { + let syscall_result = match syscall { libc::SYS_accept4 => { accept4_detour(param1 as _, param2 as _, param3 as _, param4 as _) as i64 } @@ -48,7 +47,13 @@ unsafe extern "C" fn c_abi_syscall6_handler( _ if crate::setup().fs_config().is_active() => { match syscall { libc::SYS_read => read_detour(param1 as _, param2 as _, param3 as _) as i64, + libc::SYS_pread64 => { + pread_detour(param1 as _, param2 as _, param3 as _, param4 as _) as i64 + } libc::SYS_write => write_detour(param1 as _, param2 as _, param3 as _) as i64, + libc::SYS_pwrite64 => { + pwrite_detour(param1 as _, param2 as _, param3 as _, param4 as _) as i64 + } libc::SYS_lseek => lseek_detour(param1 as _, param2 as _, param3 as _), // Note(syscall_linux.go) // if flags == 0 { @@ -75,23 +80,79 @@ unsafe extern "C" fn c_abi_syscall6_handler( libc::SYS_newfstatat => { fstatat_logic(param1 as _, param2 as _, param3 as _, param4 as _) .unwrap_or_bypass_with(|_| { - syscall_6(syscall, param1, param2, param3, param4, param5, param6) - .try_into() - .unwrap() + let (Ok(result) | Err(result)) = syscalls::syscall!( + syscalls::Sysno::from(syscall as i32), + param1, + param2, + param3, + param4, + param5, + param6 + ) + .map(|success| success as i64) + .map_err(|fail| { + let raw_errno = fail.into_raw(); + errno::set_errno(errno::Errno(raw_errno)); + + -(raw_errno as i64) + }); + result as i32 }) .into() } + libc::SYS_fstat => fstat_detour(param1 as _, param2 as _) as i64, + libc::SYS_fsync => fsync_detour(param1 as _) as i64, + libc::SYS_fdatasync => fsync_detour(param1 as _) as i64, libc::SYS_openat => openat_detour(param1 as _, param2 as _, param3 as _) as i64, libc::SYS_getdents64 => { getdents64_detour(param1 as _, param2 as _, param3 as _) as i64 } - _ => syscall_6(syscall, param1, param2, param3, param4, param5, param6), + _ => { + let (Ok(result) | Err(result)) = syscalls::syscall!( + syscalls::Sysno::from(syscall as i32), + param1, + param2, + param3, + param4, + param5, + param6 + ) + .map(|success| success as i64) + .map_err(|fail| { + let raw_errno = fail.into_raw(); + errno::set_errno(errno::Errno(raw_errno)); + + -(raw_errno as i64) + }); + result + } } } - _ => syscall_6(syscall, param1, param2, param3, param4, param5, param6), + _ => { + let (Ok(result) | Err(result)) = syscalls::syscall!( + syscalls::Sysno::from(syscall as i32), + param1, + param2, + param3, + param4, + param5, + param6 + ) + .map(|success| success as i64) + .map_err(|fail| { + let raw_errno = fail.into_raw(); + errno::set_errno(errno::Errno(raw_errno)); + + -(raw_errno as i64) + }); + result + } }; - match res { - -1 => -errno().0 as i64, - _ => res, + + if syscall_result.is_negative() { + // Might not be an exact mapping, but it should be good enough. + -errno().0 as i64 + } else { + syscall_result } }