diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index ef19028f97..beb1503dbc 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -49,12 +49,13 @@ name = "stack_walking" harness = false [features] -default = ["allocation_profiling", "timeline", "exception_profiling"] +default = ["allocation_profiling", "timeline", "exception_profiling", "io_profiling"] allocation_profiling = [] timeline = [] exception_profiling = [] stack_walking_tests = [] test = [] +io_profiling = [] # only for testing: trigger_time_sample = [] diff --git a/profiling/src/config.rs b/profiling/src/config.rs index c21185e838..50e0f9e2ac 100644 --- a/profiling/src/config.rs +++ b/profiling/src/config.rs @@ -28,6 +28,7 @@ pub struct SystemSettings { pub profiling_exception_enabled: bool, pub profiling_exception_message_enabled: bool, pub profiling_wall_time_enabled: bool, + pub profiling_io_enabled: bool, // todo: can't this be Option? I don't think the string can ever be static. pub output_pprof: Option>, @@ -46,6 +47,7 @@ impl SystemSettings { self.profiling_timeline_enabled = false; self.profiling_exception_enabled = false; self.profiling_exception_message_enabled = false; + self.profiling_io_enabled = false; } /// # Safety @@ -68,6 +70,7 @@ impl SystemSettings { profiling_exception_enabled: profiling_exception_enabled(), profiling_exception_message_enabled: profiling_exception_message_enabled(), profiling_wall_time_enabled: profiling_wall_time_enabled(), + profiling_io_enabled: profiling_io_enabled(), output_pprof: profiling_output_pprof(), profiling_exception_sampling_distance: profiling_exception_sampling_distance(), profiling_log_level: profiling_log_level(), @@ -131,6 +134,7 @@ impl SystemSettings { profiling_exception_enabled: false, profiling_exception_message_enabled: false, profiling_wall_time_enabled: false, + profiling_io_enabled: false, output_pprof: None, profiling_exception_sampling_distance: 0, profiling_log_level: LevelFilter::Off, @@ -148,6 +152,7 @@ impl SystemSettings { system_settings.profiling_timeline_enabled = false; system_settings.profiling_exception_enabled = false; system_settings.profiling_exception_message_enabled = false; + system_settings.profiling_io_enabled = false; } } @@ -344,6 +349,7 @@ pub(crate) enum ConfigId { ProfilingExceptionEnabled, ProfilingExceptionMessageEnabled, ProfilingExceptionSamplingDistance, + ProfilingExperimentalIOEnabled, ProfilingLogLevel, ProfilingOutputPprof, ProfilingWallTimeEnabled, @@ -372,6 +378,7 @@ impl ConfigId { ProfilingExceptionEnabled => b"DD_PROFILING_EXCEPTION_ENABLED\0", ProfilingExceptionMessageEnabled => b"DD_PROFILING_EXCEPTION_MESSAGE_ENABLED\0", ProfilingExceptionSamplingDistance => b"DD_PROFILING_EXCEPTION_SAMPLING_DISTANCE\0", + ProfilingExperimentalIOEnabled => b"DD_PROFILING_EXPERIMENTAL_IO_ENABLED\0", ProfilingLogLevel => b"DD_PROFILING_LOG_LEVEL\0", // Note: this group is meant only for debugging and testing. Please @@ -415,6 +422,7 @@ lazy_static::lazy_static! { profiling_exception_enabled: false, profiling_exception_message_enabled: false, profiling_wall_time_enabled: false, + profiling_io_enabled: false, output_pprof: None, profiling_exception_sampling_distance: u32::MAX, profiling_log_level: LevelFilter::Off, @@ -432,6 +440,7 @@ lazy_static::lazy_static! { profiling_exception_enabled: true, profiling_exception_message_enabled: false, profiling_wall_time_enabled: true, + profiling_io_enabled: false, output_pprof: None, profiling_exception_sampling_distance: 100, profiling_log_level: LevelFilter::Off, @@ -533,6 +542,18 @@ unsafe fn profiling_exception_sampling_distance() -> u32 { ) } +/// # Safety +/// This function must only be called after config has been initialized in +/// rinit, and before it is uninitialized in mshutdown. +unsafe fn profiling_io_enabled() -> bool { + profiling_enabled() + && (profiling_experimental_features_enabled() + || get_system_bool( + ProfilingExperimentalIOEnabled, + DEFAULT_SYSTEM_SETTINGS.profiling_io_enabled, + )) +} + /// # Safety /// This function must only be called after config has been initialized in /// first rinit, and before it is uninitialized in mshutdown. @@ -874,6 +895,17 @@ pub(crate) fn minit(module_number: libc::c_int) { parser: Some(parse_exception_sampling_distance_filter), env_config_fallback: None, }, + zai_config_entry { + id: transmute::(ProfilingExperimentalIOEnabled), + name: ProfilingExperimentalIOEnabled.env_var_name(), + type_: ZAI_CONFIG_TYPE_BOOL, + default_encoded_value: ZaiStr::literal(b"0\0"), + aliases: ptr::null_mut(), + aliases_count: 0, + ini_change: Some(zai_config_system_ini_change), + parser: None, + env_config_fallback: None, + }, zai_config_entry { id: transmute::(ProfilingLogLevel), name: ProfilingLogLevel.env_var_name(), @@ -1064,6 +1096,8 @@ mod tests { b"DD_PROFILING_TIMELINE_ENABLED\0", "datadog.profiling.timeline_enabled", ), + #[cfg(feature = "io_profiling")] + (b"DD_PROFILING_IO_ENABLED\0", "datadog.profiling.io_enabled"), (b"DD_PROFILING_LOG_LEVEL\0", "datadog.profiling.log_level"), ( b"DD_PROFILING_OUTPUT_PPROF\0", @@ -1142,4 +1176,4 @@ mod tests { let expected = AgentEndpoint::default(); assert_eq!(endpoint, expected); } -} +} \ No newline at end of file diff --git a/profiling/src/io.rs b/profiling/src/io.rs new file mode 100644 index 0000000000..baf50b3e1a --- /dev/null +++ b/profiling/src/io.rs @@ -0,0 +1,683 @@ +use crate::bindings::{ + Elf64_Dyn, Elf64_Rela, Elf64_Sym, Elf64_Xword, DT_JMPREL, DT_NULL, DT_PLTRELSZ, DT_STRTAB, + DT_SYMTAB, PT_DYNAMIC, R_AARCH64_JUMP_SLOT, R_X86_64_JUMP_SLOT, +}; +use crate::profiling::Profiler; +use crate::zend; +use crate::REQUEST_LOCALS; +use libc::{c_char, c_int, c_void, dl_phdr_info}; +use log::{error, trace}; +use rand::rngs::ThreadRng; +use std::cell::RefCell; +use std::ffi::CStr; +use std::ptr; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; +use std::time::Instant; + +use rand_distr::{Distribution, Poisson}; + +fn elf64_r_type(info: Elf64_Xword) -> u32 { + (info & 0xffffffff) as u32 +} + +fn elf64_r_sym(info: Elf64_Xword) -> u32 { + (info >> 32) as u32 +} + +/// Override the GOT entry for symbols specified in `overwrites`. +/// +/// See: https://cs4401.walls.ninja/notes/lecture/basics_global_offset_table.html +/// See: https://bottomupcs.com/ch09s03.html +/// See: https://www.codeproject.com/articles/1032231/what-is-the-symbol-table-and-what-is-the-global-of +/// +/// Safety: Why is anything happening in in here safe? Well generally we can say all of the pointer +/// arithmetics are safe because the dynamic library the `info` is pointing to was loaded by the +/// dynamic linker prior to us messing with the global offset table. If the dynamic library would +/// not be a valid ELF64, the dynamic linker would have not loaded it. +unsafe fn override_got_entry(info: *mut dl_phdr_info, overwrites: *mut Vec) { + let phdr = (*info).dlpi_phdr; + + // Locate the dynamic programm header (`PT_DYNAMIC`) + let mut dyn_ptr: *const Elf64_Dyn = ptr::null(); + for i in 0..(*info).dlpi_phnum { + let phdr_i = phdr.offset(i as isize); + if (*phdr_i).p_type == PT_DYNAMIC { + dyn_ptr = ((*info).dlpi_addr as usize + (*phdr_i).p_vaddr as usize) as *const Elf64_Dyn; + break; + } + } + if dyn_ptr.is_null() { + trace!("Failed to locate dynamic section"); + return; + } + + let mut rel_plt: *mut Elf64_Rela = ptr::null_mut(); + let mut rel_plt_size: usize = 0; + let mut symtab: *mut Elf64_Sym = ptr::null_mut(); + let mut strtab: *const c_char = ptr::null(); + + // The dynamic programm header (`PT_DYNAMIC`) has different sections. We are interessted in the + // procedure linkage table (PLT in `DT_JMPREL`), the size of the PLT (`DT_PLTRELSZ`), the + // symbol table (`DT_SYMTAB`) and the the string table for the symbol names (`DT_STRTAB`). + // + // Addresses in here are sometimes relative, sometimes absolute + // - on musl, addresses are relative + // - on glibc, addresses are absolutes + // https://elixir.bootlin.com/glibc/glibc-2.36/source/elf/get-dynamic-info.h#L84 + let mut dyn_iter = dyn_ptr; + loop { + let d_tag = (*dyn_iter).d_tag as u32; + if d_tag == DT_NULL { + break; + } + match d_tag { + DT_JMPREL => { + // Relocation entries for the PLT (Procedure Linkage Table) + if ((*dyn_iter).d_un.d_ptr as usize) < ((*info).dlpi_addr as usize) { + rel_plt = ((*info).dlpi_addr as usize + (*dyn_iter).d_un.d_ptr as usize) + as *mut Elf64_Rela; + } else { + rel_plt = (*dyn_iter).d_un.d_ptr as *mut Elf64_Rela; + } + } + DT_PLTRELSZ => { + // Size of the PLT relocation entries + rel_plt_size = (*dyn_iter).d_un.d_val as usize; + } + DT_SYMTAB => { + // The symbol table + if ((*dyn_iter).d_un.d_ptr as usize) < ((*info).dlpi_addr as usize) { + symtab = ((*info).dlpi_addr as usize + (*dyn_iter).d_un.d_ptr as usize) + as *mut Elf64_Sym; + } else { + symtab = (*dyn_iter).d_un.d_ptr as *mut Elf64_Sym; + } + } + DT_STRTAB => { + // The string table for the symbol names + if ((*dyn_iter).d_un.d_ptr as usize) < ((*info).dlpi_addr as usize) { + strtab = ((*info).dlpi_addr as usize + (*dyn_iter).d_un.d_ptr as usize) + as *const c_char; + } else { + strtab = (*dyn_iter).d_un.d_ptr as *const c_char; + } + } + _ => {} + } + dyn_iter = dyn_iter.offset(1); + } + + if rel_plt.is_null() || rel_plt_size == 0 || symtab.is_null() || strtab.is_null() { + trace!("Failed to locate required ELF sections (`DT_JMPREL`, `DT_PLTRELSZ`, `DT_SYMTAB` and `DT_STRTAB`)"); + return; + } + + let num_relocs = rel_plt_size / std::mem::size_of::(); + + // For each symbol we want to overwrite (from `overwrites`), we scan the relocation entries. + // Once the matching symbol name is found, patch its GOT entry to point to our new function. + for overwrite in &mut *overwrites { + for i in 0..num_relocs { + let rel = rel_plt.add(i); + let r_type = elf64_r_type((*rel).r_info); + + // Only handle JUMP_SLOT relocations + if r_type != R_AARCH64_JUMP_SLOT && r_type != R_X86_64_JUMP_SLOT { + continue; + } + + // Get the symbol index for this relocation, then the symbol struct + let sym_index = elf64_r_sym((*rel).r_info) as usize; + let sym = symtab.add(sym_index); + + // Access the symbol name via the string table + let name_offset = (*sym).st_name as isize; + let name_ptr = strtab.offset(name_offset); + let name = CStr::from_ptr(name_ptr).to_str().unwrap_or(""); + + if name == overwrite.symbol_name { + // Calculate the GOT entry address + let got_entry = + ((*info).dlpi_addr as usize + (*rel).r_offset as usize) as *mut *mut (); + + // Change memory protection so we can write to the GOT entry + let page_size = libc::sysconf(libc::_SC_PAGESIZE) as usize; + let aligned_addr = (got_entry as usize) & !(page_size - 1); + if libc::mprotect( + aligned_addr as *mut c_void, + page_size, + libc::PROT_READ | libc::PROT_WRITE, + ) != 0 + { + let err = *libc::__errno_location(); + trace!("mprotect failed: {}", err); + return; + } + + trace!( + "Overriding GOT entry for {} at offset {:?} (abs: {:p}) pointing to {:p} (orig function at {:p})", + overwrite.symbol_name, + (*rel).r_offset, + got_entry, + *got_entry, + *overwrite.orig_func + ); + + // This works for musl based linux distros, but not for libc once + *overwrite.orig_func = libc::dlsym(libc::RTLD_NEXT, name_ptr) as *mut (); + if (*overwrite.orig_func).is_null() { + // libc linux fallback + *overwrite.orig_func = *got_entry; + } + *got_entry = overwrite.new_func; + continue; + } + } + } +} + +/// Callback function that should be passed to `libc::dl_iterate_phdr()` and gets called for every +/// shared object. +unsafe extern "C" fn callback(info: *mut dl_phdr_info, _size: usize, data: *mut c_void) -> c_int { + let overwrites = &mut *(data as *mut Vec); + + // detect myself ... + let mut my_info: libc::Dl_info = std::mem::zeroed(); + if libc::dladdr(callback as *const c_void, &mut my_info) == 0 { + error!("Did not find my own `dladdr` and therefore can't hook into the GOT."); + return 0; + } + let my_base_addr = my_info.dli_fbase as usize; + let module_base_addr = (*info).dlpi_addr as usize; + if module_base_addr == my_base_addr { + // "this" lib is actually me: skipping GOT hooking for myself + return 0; + } + + let name = if (*info).dlpi_name.is_null() || *(*info).dlpi_name == 0 { + "[Executable]" + } else { + CStr::from_ptr((*info).dlpi_name) + .to_str() + .unwrap_or("[Unknown]") + }; + + //libc::raise(libc::SIGTRAP); + + // I guess if we try to hook into GOT from `linux-vdso` or `ld-linux` our best outcome will be + // that nothing happens, but most likely we'll crash and we should avoid that. + if name.contains("linux-vdso") || name.contains("ld-linux") { + return 0; + } + + // `curl_exec()` will spawn a new thread for name resolution. GOT hooking follows threads and + // as such we might sample from another thread even in a NTS build of PHP, which we should not. + // So instead of crashing (or risking a crash) we currently refrain from collection I/O from + // name resolution. + if name.contains("resolve") { + return 0; + } + + override_got_entry(info, overwrites); + + 0 +} + +struct GotSymbolOverwrite { + symbol_name: &'static str, + new_func: *mut (), + orig_func: *mut *mut (), +} + +static mut ORIG_POLL: unsafe extern "C" fn(*mut libc::pollfd, u64, c_int) -> i32 = libc::poll; +/// The `poll()` libc call has only every been observed when reading/writing to/from a socket, +/// never when reading/writing to a file. There is two known cases in PHP: +/// - the PHP stream layer (e.g. `file_get_contents("proto://url")`) +/// - the curl extension in `curl_exec()`/`curl_multi_exec()` +/// The `nfds` argument is usually 1, in case of a `curl_multi_exec()` call it is >= 1 and exactly +/// the number of concurrent requests. In rare cases the `nfds` argument is 0 and fds a +/// NULL-pointer. This is basically and "old trick" to ms precision sleep() and currently ignored. +unsafe extern "C" fn observed_poll(fds: *mut libc::pollfd, nfds: u64, timeout: c_int) -> i32 { + let start = Instant::now(); + let ret = ORIG_POLL(fds, nfds, timeout); + let duration = start.elapsed(); + + if !fds.is_null() { + if (*fds).revents & 1 == 1 { + // requested events contains reading + SOCKET_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + } else if (*fds).revents & 4 == 4 { + // requested events contains writing + SOCKET_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + } else if (*fds).events & 1 == 1 { + // socket became readable + SOCKET_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + } else if (*fds).events & 4 == 4 { + // socket became writeable + SOCKET_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + } + } + + ret +} + +static mut ORIG_RECV: unsafe extern "C" fn(c_int, *mut c_void, usize, c_int) -> isize = libc::recv; + +unsafe extern "C" fn observed_recv( + socket: c_int, + buf: *mut c_void, + length: usize, + flags: c_int, +) -> isize { + let start = Instant::now(); + let len = ORIG_RECV(socket, buf, length, flags); + let duration = start.elapsed(); + + SOCKET_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + SOCKET_READ_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +static mut ORIG_SEND: unsafe extern "C" fn(c_int, *const c_void, usize, c_int) -> isize = + libc::send; +unsafe extern "C" fn observed_send( + socket: c_int, + buf: *const c_void, + length: usize, + flags: c_int, +) -> isize { + let start = Instant::now(); + let len = ORIG_SEND(socket, buf, length, flags); + let duration = start.elapsed(); + + SOCKET_WRITE_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + SOCKET_WRITE_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +static mut ORIG_FWRITE: unsafe extern "C" fn( + *const c_void, + usize, + usize, + *mut libc::FILE, +) -> usize = libc::fwrite; +unsafe extern "C" fn observed_fwrite( + buf: *const c_void, + size: usize, + n: usize, + stream: *mut libc::FILE, +) -> usize { + let start = Instant::now(); + let len = ORIG_FWRITE(buf, size, n, stream); + let duration = start.elapsed(); + + FILE_WRITE_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + FILE_WRITE_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +static mut ORIG_WRITE: unsafe extern "C" fn(c_int, *const c_void, usize) -> isize = libc::write; +unsafe extern "C" fn observed_write(fd: c_int, buf: *const c_void, count: usize) -> isize { + let start = Instant::now(); + let len = ORIG_WRITE(fd, buf, count); + let duration = start.elapsed(); + + FILE_WRITE_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + FILE_WRITE_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +static mut ORIG_FREAD: unsafe extern "C" fn(*mut c_void, usize, usize, *mut libc::FILE) -> usize = + libc::fread; +// So far there seems to be only one situation where a file is read using `fread()` instead of +// `read()` in PHP and that is when compiling a PHP file, triggered by it being the start file or a +// userland call to `include()`/`require()` functions. +unsafe extern "C" fn observed_fread( + buf: *mut c_void, + size: usize, + n: usize, + fp: *mut libc::FILE, +) -> usize { + let start = Instant::now(); + let len = ORIG_FREAD(buf, size, n, fp); + let duration = start.elapsed(); + + FILE_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + FILE_READ_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +static mut ORIG_READ: unsafe extern "C" fn(c_int, *mut c_void, usize) -> isize = libc::read; +unsafe extern "C" fn observed_read(fd: c_int, buf: *mut c_void, count: usize) -> isize { + let start = Instant::now(); + let len = ORIG_READ(fd, buf, count); + let duration = start.elapsed(); + + FILE_READ_TIME_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(duration.as_nanos() as u64) + }); + FILE_READ_SIZE_PROFILING_STATS.with(|cell| { + let mut io = cell.borrow_mut(); + io.track(len as u64) + }); + + len +} + +/// Take a sample every 1 second of read I/O +/// Will be initialized on first RINIT and is controlled by a INI_SYSTEM, so we do not need a +/// thread local for the profiling interval. +pub static SOCKET_READ_TIME_PROFILING_INTERVAL: AtomicU64 = + AtomicU64::new(std::time::Duration::from_millis(10).as_nanos() as u64); +pub static SOCKET_WRITE_TIME_PROFILING_INTERVAL: AtomicU64 = + AtomicU64::new(std::time::Duration::from_millis(10).as_nanos() as u64); +pub static FILE_READ_TIME_PROFILING_INTERVAL: AtomicU64 = + AtomicU64::new(std::time::Duration::from_millis(10).as_nanos() as u64); +pub static FILE_WRITE_TIME_PROFILING_INTERVAL: AtomicU64 = + AtomicU64::new(std::time::Duration::from_millis(10).as_nanos() as u64); +pub static SOCKET_READ_SIZE_PROFILING_INTERVAL: AtomicU64 = AtomicU64::new(1024 * 1024); +pub static SOCKET_WRITE_SIZE_PROFILING_INTERVAL: AtomicU64 = AtomicU64::new(1024 * 1024); +pub static FILE_READ_SIZE_PROFILING_INTERVAL: AtomicU64 = AtomicU64::new(1024 * 1024); +pub static FILE_WRITE_SIZE_PROFILING_INTERVAL: AtomicU64 = AtomicU64::new(1024 * 1024); + +pub trait IOCollector { + fn collect(&self, profiler: &Profiler, value: u64); +} + +pub struct SocketReadTimeCollector; +impl IOCollector for SocketReadTimeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_socket_read_time( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct SocketWriteTimeCollector; +impl IOCollector for SocketWriteTimeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_socket_write_time( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct FileReadTimeCollector; +impl IOCollector for FileReadTimeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_file_read_time( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct FileWriteTimeCollector; +impl IOCollector for FileWriteTimeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_file_write_time( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct SocketReadSizeCollector; +impl IOCollector for SocketReadSizeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_socket_read_size( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct SocketWriteSizeCollector; +impl IOCollector for SocketWriteSizeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_socket_write_size( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct FileReadSizeCollector; +impl IOCollector for FileReadSizeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_file_read_size( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct FileWriteSizeCollector; +impl IOCollector for FileWriteSizeCollector { + fn collect(&self, profiler: &Profiler, value: u64) { + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + unsafe { + profiler.collect_file_write_size( + zend::ddog_php_prof_get_current_execute_data(), + value as i64, + ) + }; + } +} + +pub struct IOProfilingStats { + next_sample: u64, + poisson: Poisson, + rng: ThreadRng, + collector: C, +} + +impl IOProfilingStats { + fn new(lambda: f64, collector: C) -> Self { + // Safety: this will only error if lambda <= 0 + let poisson = Poisson::new(lambda).unwrap(); + let mut stats = IOProfilingStats { + next_sample: 0, + poisson, + rng: rand::thread_rng(), + collector, + }; + stats.next_sampling_interval(); + stats + } + + fn next_sampling_interval(&mut self) { + self.next_sample = self.poisson.sample(&mut self.rng) as u64; + } + + fn track(&mut self, value: u64) { + if let Some(next_sample) = self.next_sample.checked_sub(value) { + self.next_sample = next_sample; + return; + } + self.next_sampling_interval(); + if let Some(profiler) = Profiler::get() { + self.collector.collect(profiler, value); + } + } +} + +thread_local! { + static SOCKET_READ_TIME_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + SOCKET_READ_TIME_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + SocketReadTimeCollector + ) + ); + static SOCKET_WRITE_TIME_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + SOCKET_WRITE_TIME_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + SocketWriteTimeCollector + ) + ); + static FILE_READ_TIME_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + FILE_READ_TIME_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + FileReadTimeCollector + ) + ); + static FILE_WRITE_TIME_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + FILE_WRITE_TIME_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + FileWriteTimeCollector + ) + ); + static SOCKET_READ_SIZE_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + SOCKET_READ_SIZE_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + SocketReadSizeCollector + ) + ); + static SOCKET_WRITE_SIZE_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + SOCKET_WRITE_SIZE_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + SocketWriteSizeCollector + ) + ); + static FILE_READ_SIZE_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + FILE_READ_SIZE_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + FileReadSizeCollector + ) + ); + static FILE_WRITE_SIZE_PROFILING_STATS: RefCell> = RefCell::new( + IOProfilingStats::new( + FILE_WRITE_SIZE_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64, + FileWriteSizeCollector + ) + ); +} + +pub fn io_prof_first_rinit() { + let io_profiling = REQUEST_LOCALS.with(|cell| { + cell.try_borrow() + .map(|locals| locals.system_settings().profiling_io_enabled) + .unwrap_or(false) + }); + + trace!("I/O profiling: {io_profiling:?}"); + + if io_profiling { + unsafe { + let mut overwrites = vec![ + GotSymbolOverwrite { + symbol_name: "recv", + new_func: observed_recv as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_RECV) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "send", + new_func: observed_send as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_SEND) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "write", + new_func: observed_write as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_WRITE) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "read", + new_func: observed_read as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_READ) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "fwrite", + new_func: observed_fwrite as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_FWRITE) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "fread", + new_func: observed_fread as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_FREAD) as *mut _ as *mut *mut (), + }, + GotSymbolOverwrite { + symbol_name: "poll", + new_func: observed_poll as *mut (), + orig_func: ptr::addr_of_mut!(ORIG_POLL) as *mut _ as *mut *mut (), + }, + ]; + trace!("I/O profiling: {io_profiling:?}"); + libc::dl_iterate_phdr( + Some(callback), + &mut overwrites as *mut _ as *mut libc::c_void, + ); + }; + } +} \ No newline at end of file diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index f9f6434644..8bf9e1a6f3 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -15,6 +15,9 @@ mod string_set; #[cfg(feature = "allocation_profiling")] mod allocation; +#[cfg(all(feature = "io_profiling", target_os = "linux"))] +mod io; + #[cfg(feature = "exception_profiling")] mod exception; @@ -544,6 +547,9 @@ extern "C" fn rinit(_type: c_int, _module_number: c_int) -> ZendResult { #[cfg(feature = "exception_profiling")] exception::exception_profiling_first_rinit(); + + #[cfg(all(feature = "io_profiling", target_os = "linux"))] + io::io_prof_first_rinit(); }); Profiler::init(system_settings); @@ -780,6 +786,28 @@ unsafe extern "C" fn minfo(module_ptr: *mut zend::ModuleEntry) { } } + cfg_if::cfg_if! { + if #[cfg(feature = "io_profiling")] { + zend::php_info_print_table_row( + 2, + b"I/O Profiling Enabled\0".as_ptr(), + if system_settings.profiling_io_enabled { + yes + } else if system_settings.profiling_enabled { + no + } else { + no_all + }, + ); + } else { + zend::php_info_print_table_row( + 2, + b"I/O Profiling Enabled\0".as_ptr(), + b"Not available. The profiler was built without I/O profiling support.\0" + ); + } + } + zend::php_info_print_table_row( 2, b"Endpoint Collection Enabled\0".as_ptr(), @@ -946,4 +974,4 @@ fn notify_trace_finished(local_root_span_id: u64, span_type: Cow, resource: } } }); -} \ No newline at end of file +} diff --git a/profiling/src/php_ffi.h b/profiling/src/php_ffi.h index 15de64ec0a..d6fdab69e2 100644 --- a/profiling/src/php_ffi.h +++ b/profiling/src/php_ffi.h @@ -16,6 +16,10 @@ #include +#ifdef __linux__ +#include +#endif + // Needed for `zend_observer_error_register` starting from PHP 8 #if CFG_ZEND_ERROR_OBSERVER // defined by build.rs #include diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 37e6e8bee7..639ced2375 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -76,6 +76,14 @@ pub struct SampleValues { alloc_size: i64, timeline: i64, exception: i64, + socket_read_time: i64, + socket_write_time: i64, + file_read_time: i64, + file_write_time: i64, + socket_read_size: i64, + socket_write_size: i64, + file_read_size: i64, + file_write_size: i64, } const WALL_TIME_PERIOD: Duration = Duration::from_millis(10); @@ -1197,6 +1205,98 @@ impl Profiler { } } + #[cfg(feature = "io_profiling")] + pub fn collect_socket_read_time(&self, ed: *mut zend_execute_data, socket_io_read_time: i64) { + self.collect_io(ed, |vals| { + vals.socket_read_time = socket_io_read_time; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_socket_write_time(&self, ed: *mut zend_execute_data, socket_io_write_time: i64) { + self.collect_io(ed, |vals| { + vals.socket_write_time = socket_io_write_time; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_file_read_time(&self, ed: *mut zend_execute_data, file_io_read_time: i64) { + self.collect_io(ed, |vals| { + vals.file_read_time = file_io_read_time; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_file_write_time(&self, ed: *mut zend_execute_data, file_io_write_time: i64) { + self.collect_io(ed, |vals| { + vals.file_write_time = file_io_write_time; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_socket_read_size(&self, ed: *mut zend_execute_data, socket_io_read_size: i64) { + self.collect_io(ed, |vals| { + vals.socket_read_size = socket_io_read_size; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_socket_write_size(&self, ed: *mut zend_execute_data, socket_io_write_size: i64) { + self.collect_io(ed, |vals| { + vals.socket_write_size = socket_io_write_size; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_file_read_size(&self, ed: *mut zend_execute_data, file_io_read_size: i64) { + self.collect_io(ed, |vals| { + vals.file_read_size = file_io_read_size; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_file_write_size(&self, ed: *mut zend_execute_data, file_io_write_size: i64) { + self.collect_io(ed, |vals| { + vals.file_write_size = file_io_write_size; + }) + } + + #[cfg(feature = "io_profiling")] + pub fn collect_io(&self, execute_data: *mut zend_execute_data, set_value: F) + where + F: FnOnce(&mut SampleValues), + { + let result = collect_stack_sample(execute_data); + match result { + Ok(frames) => { + let depth = frames.len(); + let labels = Profiler::common_labels(0); + + let n_labels = labels.len(); + + let mut values = SampleValues::default(); + set_value(&mut values); + + match self.prepare_and_send_message( + frames, + values, + labels, + NO_TIMESTAMP, + ) { + Ok(_) => trace!( + "Sent stack sample of {depth} frames, {n_labels} labels with to profiler." + ), + Err(err) => warn!( + "Failed to send stack sample of {depth} frames, {n_labels} labels to profiler: {err}" + ), + } + } + Err(err) => { + warn!("Failed to collect stack sample: {err}") + } + } + } + /// Creates the common message labels for all samples. /// /// * `n_extra_labels` - Reserve room for extra labels, such as when the @@ -1324,6 +1424,7 @@ mod tests { profiling_exception_enabled: false, profiling_exception_message_enabled: false, profiling_wall_time_enabled: true, + profiling_io_enabled: false, output_pprof: None, profiling_exception_sampling_distance: 100, profiling_log_level: LevelFilter::Off, @@ -1340,6 +1441,14 @@ mod tests { alloc_size: 50, timeline: 60, exception: 70, + socket_read_time: 80, + socket_write_time: 90, + file_read_time: 100, + file_write_time: 110, + socket_read_size: 120, + socket_write_size: 130, + file_read_size: 140, + file_write_size: 150, } } diff --git a/profiling/src/profiling/sample_type_filter.rs b/profiling/src/profiling/sample_type_filter.rs index ff4c244821..2ad6b0764c 100644 --- a/profiling/src/profiling/sample_type_filter.rs +++ b/profiling/src/profiling/sample_type_filter.rs @@ -1,7 +1,7 @@ use crate::config::SystemSettings; use crate::profiling::{SampleValues, ValueType}; -const MAX_SAMPLE_TYPES: usize = 7; +const MAX_SAMPLE_TYPES: usize = 15; pub struct SampleTypeFilter { sample_types: Vec, @@ -19,6 +19,14 @@ impl SampleTypeFilter { ValueType::new("alloc-size", "bytes"), ValueType::new("timeline", "nanoseconds"), ValueType::new("exception-samples", "count"), + ValueType::new("socket-read-time", "nanoseconds"), + ValueType::new("socket-write-time", "nanoseconds"), + ValueType::new("file-io-read-time", "nanoseconds"), + ValueType::new("file-io-write-time", "nanoseconds"), + ValueType::new("socket-read-size", "bytes"), + ValueType::new("socket-write-size", "bytes"), + ValueType::new("file-io-read-size", "bytes"), + ValueType::new("file-io-write-size", "bytes"), ]; let mut sample_types = Vec::with_capacity(SAMPLE_TYPES.len()); @@ -50,6 +58,26 @@ impl SampleTypeFilter { sample_types.push(SAMPLE_TYPES[6]); sample_types_mask[6] = true; } + + #[cfg(feature = "io_profiling")] + if system_settings.profiling_io_enabled { + sample_types.push(SAMPLE_TYPES[7]); + sample_types_mask[7] = true; + sample_types.push(SAMPLE_TYPES[8]); + sample_types_mask[8] = true; + sample_types.push(SAMPLE_TYPES[9]); + sample_types_mask[9] = true; + sample_types.push(SAMPLE_TYPES[10]); + sample_types_mask[10] = true; + sample_types.push(SAMPLE_TYPES[11]); + sample_types_mask[11] = true; + sample_types.push(SAMPLE_TYPES[12]); + sample_types_mask[12] = true; + sample_types.push(SAMPLE_TYPES[13]); + sample_types_mask[13] = true; + sample_types.push(SAMPLE_TYPES[14]); + sample_types_mask[14] = true; + } } Self { @@ -76,6 +104,14 @@ impl SampleTypeFilter { sample_values.alloc_size, sample_values.timeline, sample_values.exception, + sample_values.socket_read_time, + sample_values.socket_write_time, + sample_values.file_read_time, + sample_values.file_write_time, + sample_values.socket_read_size, + sample_values.socket_write_size, + sample_values.file_read_size, + sample_values.file_write_size, ]; for (value, enabled) in values.into_iter().zip(self.sample_types_mask.iter()) { @@ -223,4 +259,4 @@ mod tests { ); assert_eq!(values, vec![10, 20, 30, 70]); } -} +} \ No newline at end of file diff --git a/profiling/tests/phpt/phpinfo_01.phpt b/profiling/tests/phpt/phpinfo_01.phpt index 376244c927..be2ece010f 100644 --- a/profiling/tests/phpt/phpinfo_01.phpt +++ b/profiling/tests/phpt/phpinfo_01.phpt @@ -50,6 +50,7 @@ $sections = [ ["Experimental CPU Time Profiling Enabled", "false (profiling disabled)"], ["Allocation Profiling Enabled", "false (profiling disabled)"], ["Exception Profiling Enabled", "false (profiling disabled)"], + ["I/O Profiling Enabled", "false (profiling disabled)"], ["Endpoint Collection Enabled", "false (profiling disabled)"], ["Profiling Log Level", "off (profiling disabled)"], ["Profiling Agent Endpoint", "http://datadog:8126/"], @@ -69,4 +70,4 @@ echo "Done."; ?> --EXPECT-- -Done. +Done. \ No newline at end of file diff --git a/profiling/tests/phpt/phpinfo_03.phpt b/profiling/tests/phpt/phpinfo_03.phpt index dc19cb2ddb..b0a854c7bc 100644 --- a/profiling/tests/phpt/phpinfo_03.phpt +++ b/profiling/tests/phpt/phpinfo_03.phpt @@ -50,6 +50,7 @@ $sections = [ ["Experimental CPU Time Profiling Enabled", "false"], ["Allocation Profiling Enabled", "false"], ["Exception Profiling Enabled", "false"], + ["I/O Profiling Enabled", "false"], ["Endpoint Collection Enabled", "true"], ["Profiling Log Level", "off"], ["Profiling Agent Endpoint", "http://datadog:8126/"], @@ -69,4 +70,4 @@ echo "Done.", PHP_EOL; ?> --EXPECT-- -Done. +Done. \ No newline at end of file diff --git a/profiling/tests/phpt/phpinfo_06.phpt b/profiling/tests/phpt/phpinfo_06.phpt index 3408cd63a5..79a0d14500 100644 --- a/profiling/tests/phpt/phpinfo_06.phpt +++ b/profiling/tests/phpt/phpinfo_06.phpt @@ -48,6 +48,7 @@ $sections = [ ["Allocation Profiling Enabled", "false (profiling disabled)"], ["Exception Profiling Enabled", "false (profiling disabled)"], ["Timeline Enabled", "false (profiling disabled)"], + ["I/O Profiling Enabled", "false (profiling disabled)"], ]; foreach ($sections as [$key, $expected]) { @@ -61,4 +62,4 @@ echo "Done."; ?> --EXPECT-- -Done. +Done. \ No newline at end of file diff --git a/profiling/tests/phpt/phpinfo_07.phpt b/profiling/tests/phpt/phpinfo_07.phpt index 34ddc2f7fb..526777e724 100644 --- a/profiling/tests/phpt/phpinfo_07.phpt +++ b/profiling/tests/phpt/phpinfo_07.phpt @@ -48,6 +48,7 @@ $sections = [ ["Allocation Profiling Enabled", "false"], ["Exception Profiling Enabled", "false"], ["Timeline Enabled", "false"], + ["I/O Profiling Enabled", "true"], ]; foreach ($sections as [$key, $expected]) { @@ -61,4 +62,4 @@ echo "Done."; ?> --EXPECT-- -Done. +Done. \ No newline at end of file diff --git a/profiling/tests/phpt/phpinfo_ini_01.phpt b/profiling/tests/phpt/phpinfo_ini_01.phpt index 387da64383..6a80ee0233 100644 --- a/profiling/tests/phpt/phpinfo_ini_01.phpt +++ b/profiling/tests/phpt/phpinfo_ini_01.phpt @@ -49,6 +49,7 @@ $sections = [ ["Experimental CPU Time Profiling Enabled", "false (profiling disabled)"], ["Allocation Profiling Enabled", "false (profiling disabled)"], ["Exception Profiling Enabled", "false (profiling disabled)"], + ["I/O Profiling Enabled", "false (profiling disabled)"], ["Endpoint Collection Enabled", "false (profiling disabled)"], ["Profiling Log Level", "off (profiling disabled)"], ["Profiling Agent Endpoint", "http://datadog:8126/"], @@ -68,4 +69,4 @@ echo "Done."; ?> --EXPECT-- -Done. +Done. \ No newline at end of file