From 539ba7769624a4c1a7056c7f1aac1faeb310cfee Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Tue, 17 Sep 2024 17:58:06 +0300 Subject: [PATCH 1/9] dummy drop for cleanup handlers --- commons/zenoh-shm/src/cleanup.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index ac29dfe08f..e77fe8782f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,7 +12,7 @@ // ZettaScale Zenoh Team, // -use signal_hook::consts::signal::*; +use signal_hook::{consts::signal::*, SigId}; use static_init::dynamic; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will @@ -23,12 +23,14 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, + handlers: Vec, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signal handlers // that execute std::terminate instead of exit + let mut handlers: Vec = Default::default(); for signal in [ #[cfg(not(target_os = "windows"))] SIGHUP, @@ -37,15 +39,18 @@ impl Cleanup { #[cfg(not(target_os = "windows"))] SIGQUIT, ] { - unsafe { - let _ = signal_hook::low_level::register(signal, || { + if let Ok(sigid) = unsafe { + signal_hook::low_level::register(signal, || { std::process::exit(0); - }); + }) + } { + handlers.push(sigid); } } Self { cleanups: Default::default(), + handlers, } } @@ -64,6 +69,9 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + for handler in &self.handlers { + signal_hook::low_level::unregister(*handler); + } self.cleanup(); } } From ff534884064bed3f4f2b6f1985d7c58c1313ab77 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 13:50:25 +0300 Subject: [PATCH 2/9] migrate to ctrlc crate --- Cargo.lock | 76 +++++++++++++++++++++----------- Cargo.toml | 2 +- commons/zenoh-shm/Cargo.toml | 2 +- commons/zenoh-shm/src/cleanup.rs | 40 +++++++---------- 4 files changed, 69 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ebe1688e2..2bd1f064d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1078,6 +1078,16 @@ dependencies = [ "cipher 0.2.5", ] +[[package]] +name = "ctrlc" +version = "3.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" +dependencies = [ + "nix 0.29.0", + "windows-sys 0.59.0", +] + [[package]] name = "data-encoding" version = "2.4.0" @@ -5125,7 +5135,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -5145,17 +5164,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -5166,9 +5186,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -5184,9 +5204,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -5202,9 +5222,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -5220,9 +5246,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -5238,9 +5264,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -5250,9 +5276,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -5268,9 +5294,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -5852,13 +5878,13 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", + "ctrlc", "libc", "lockfree", "num-traits", "num_cpus", "rand 0.8.5", "shared_memory", - "signal-hook", "stabby", "static_init", "thread-priority", diff --git a/Cargo.toml b/Cargo.toml index 54a25e4c81..34848bb788 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,7 +162,7 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -signal-hook = { version = "0.3.17", default-features = false } +ctrlc = { version = "3.4.5" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index c5b2a0a628..679202f500 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,7 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -signal-hook = { workspace = true } +ctrlc = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index e77fe8782f..304f92e3da 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,8 +12,8 @@ // ZettaScale Zenoh Team, // -use signal_hook::{consts::signal::*, SigId}; use static_init::dynamic; +use zenoh_core::zerror; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will /// execute all registered cleanup routines at this moment @@ -23,34 +23,29 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, - handlers: Vec, } impl Cleanup { fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signal handlers - // that execute std::terminate instead of exit - let mut handlers: Vec = Default::default(); - for signal in [ - #[cfg(not(target_os = "windows"))] - SIGHUP, - SIGTERM, - SIGINT, - #[cfg(not(target_os = "windows"))] - SIGQUIT, - ] { - if let Ok(sigid) = unsafe { - signal_hook::low_level::register(signal, || { - std::process::exit(0); - }) - } { - handlers.push(sigid); + // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals + if let Err(e) = ctrlc::set_handler(|| std::process::exit(0)) { + match e { + ctrlc::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } } } - Self { cleanups: Default::default(), - handlers, } } @@ -69,9 +64,6 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { - for handler in &self.handlers { - signal_hook::low_level::unregister(*handler); - } self.cleanup(); } } From 50b6e6cb072b25119ab63f189a86c5656e91ec92 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 14:32:52 +0300 Subject: [PATCH 3/9] add force_cleanup_before_exit routine --- commons/zenoh-shm/src/api/cleanup.rs | 35 ++++++++++++++++++++++++++++ commons/zenoh-shm/src/api/mod.rs | 1 + commons/zenoh-shm/src/cleanup.rs | 10 +++++--- zenoh/src/lib.rs | 1 + 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 commons/zenoh-shm/src/api/cleanup.rs diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs new file mode 100644 index 0000000000..dda3637a07 --- /dev/null +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -0,0 +1,35 @@ +// +// Copyright (c) 2024 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// + +use crate::cleanup::CLEANUP; + +/// Make forced cleanup +/// NOTE: this is a part of a temporary on-exit-cleanup workaround and it will be very likely removed in the future. +/// WARN: The improper usage can break the application logic, impacting SHM-utilizing Sessions in other processes. +/// Cleanup unlinks SHM segments _created_ by current process from filesystem with the following consequences: +/// - Sessions that are not linked to this segment will fail to link it if they try. Such kind of errors are properly handled. +/// - Already linked processes will still have this shared memory mapped and safely accessible +/// - The actual memory will be reclaimed by the OS only after last process using it will close it or exit +/// +/// In order to properly cleanup some SHM internals upon process exit, Zenoh installs exit handlers (see atexit() API). +/// The atexit handler is executed only on process exit(), the inconvenience is that terminating signal handlers +/// (like SIGINT) bypass it and terminate the process without cleanup. To eliminate this effect, Zenoh overrides +/// SIGHUP, SIGTERM, SIGINT and SIGQUIT handlers and calls exit() inside to make graceful shutdown. If user is going to +/// override these Zenoh's handlers, the workaround will break, and there are two ways to keep this workaround working: +/// - execute overridden Zenoh handlers in overriding handler code +/// - call force_cleanup_before_exit() anywhere at any time before terminating the process +#[zenoh_macros::unstable_doc] +pub fn force_cleanup_before_exit() { + CLEANUP.read().cleanup(); +} diff --git a/commons/zenoh-shm/src/api/mod.rs b/commons/zenoh-shm/src/api/mod.rs index a87188da29..8802b1eb58 100644 --- a/commons/zenoh-shm/src/api/mod.rs +++ b/commons/zenoh-shm/src/api/mod.rs @@ -13,6 +13,7 @@ // pub mod buffer; +pub mod cleanup; pub mod client; pub mod client_storage; pub mod common; diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index b0097db520..d87bbf534f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -52,10 +52,8 @@ impl Cleanup { pub(crate) fn register_cleanup(&self, cleanup_fn: Box) { self.cleanups.push(Some(cleanup_fn)); } -} -impl Drop for Cleanup { - fn drop(&mut self) { + pub(crate) fn cleanup(&self) { while let Some(cleanup) = self.cleanups.pop() { if let Some(f) = cleanup { f(); @@ -63,3 +61,9 @@ impl Drop for Cleanup { } } } + +impl Drop for Cleanup { + fn drop(&mut self) { + self.cleanup(); + } +} diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 0275cbe8be..4b53ceabd1 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -427,6 +427,7 @@ pub mod shm { zshm::{zshm, ZShm}, zshmmut::{zshmmut, ZShmMut}, }, + cleanup::force_cleanup_before_exit, client::{shm_client::ShmClient, shm_segment::ShmSegment}, client_storage::{ShmClientStorage, GLOBAL_CLIENT_STORAGE}, common::types::{ChunkID, ProtocolID, SegmentID}, From a717dd8e9e51fab67307b40993e442acbb52f82c Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 19:41:41 +0300 Subject: [PATCH 4/9] use ctrlc2 with other Cleanup finalization seq --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- commons/zenoh-shm/Cargo.toml | 2 +- commons/zenoh-shm/src/cleanup.rs | 12 ++++++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa18c47cbc..263f129720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1071,10 +1071,10 @@ dependencies = [ ] [[package]] -name = "ctrlc" -version = "3.4.5" +name = "ctrlc2" +version = "3.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" +checksum = "383ab70e5dc6ffdfe5545b6e6dea714938594be13ecaec73ebacc372ed4e292d" dependencies = [ "nix 0.29.0", "windows-sys 0.59.0", @@ -5932,7 +5932,7 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", - "ctrlc", + "ctrlc2", "libc", "lockfree", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index 0f1b72c559..e996c0a343 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,7 +163,7 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -ctrlc = { version = "3.4.5" } +ctrlc2 = { version = "3.5.8" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index 679202f500..683a4d42c0 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,7 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -ctrlc = { workspace = true, features = ["termination"] } +ctrlc2 = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index d87bbf534f..af1b4c2b8d 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -28,22 +28,26 @@ pub(crate) struct Cleanup { impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - if let Err(e) = ctrlc::set_handler(|| std::process::exit(0)) { + if let Err(e) = ctrlc2::set_handler(|| { + tokio::task::spawn_blocking(|| std::process::exit(0)); + false + }) { match e { - ctrlc::Error::NoSuchSignal(signal_type) => { + ctrlc2::Error::NoSuchSignal(signal_type) => { zerror!( "Error registering cleanup handler for signal {:?}: no such signal!", signal_type ); } - ctrlc::Error::MultipleHandlers => { + ctrlc2::Error::MultipleHandlers => { zerror!("Error registering cleanup handler: already registered!"); } - ctrlc::Error::System(error) => { + ctrlc2::Error::System(error) => { zerror!("Error registering cleanup handler: system error: {error}"); } } } + Self { cleanups: Default::default(), } From 9b4877671cac0b20c98c2d5d1bb9813bb3fde5c2 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 21:25:57 +0300 Subject: [PATCH 5/9] detach cleanup thread before joining --- commons/zenoh-shm/src/cleanup.rs | 39 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index af1b4c2b8d..8cf1da3b98 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,6 +12,8 @@ // ZettaScale Zenoh Team, // +use std::thread::JoinHandle; + use static_init::dynamic; use zenoh_core::zerror; @@ -23,33 +25,39 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, + handle: Option>, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - if let Err(e) = ctrlc2::set_handler(|| { + let handle = match ctrlc2::set_handler(|| { tokio::task::spawn_blocking(|| std::process::exit(0)); - false + true }) { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); + Ok(h) => Some(h), + Err(e) => { + match e { + ctrlc2::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc2::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc2::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } } + None } - } + }; Self { cleanups: Default::default(), + handle, } } @@ -68,6 +76,7 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + self.handle.take(); self.cleanup(); } } From d3872cb54977f0d801b6e2fd05469bb4c9dd870a Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 22:53:24 +0300 Subject: [PATCH 6/9] change cleanup task handle lifetime --- commons/zenoh-shm/src/cleanup.rs | 65 +++++++++++++++++++------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index 8cf1da3b98..9c7cb17ae1 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -25,39 +25,15 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, - handle: Option>, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let handle = match ctrlc2::set_handler(|| { - tokio::task::spawn_blocking(|| std::process::exit(0)); - true - }) { - Ok(h) => Some(h), - Err(e) => { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); - } - } - None - } - }; + let _ = SIGNAL_CLEANUP.read(); Self { cleanups: Default::default(), - handle, } } @@ -76,7 +52,44 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { - self.handle.take(); self.cleanup(); } } + +#[dynamic(lazy)] +static mut SIGNAL_CLEANUP: SignalCleanup = SignalCleanup::new(); + +struct SignalCleanup { + _handle: Option>, +} + +impl SignalCleanup { + fn new() -> Self { + // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals + let _handle = match ctrlc2::set_handler(|| { + tokio::task::spawn_blocking(|| std::process::exit(0)); + false + }) { + Ok(h) => Some(h), + Err(e) => { + match e { + ctrlc2::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc2::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc2::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } + } + None + } + }; + + Self { _handle } + } +} From 0246d62c4592d39f934cfd312763a00596748454 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Fri, 20 Sep 2024 18:08:54 +0300 Subject: [PATCH 7/9] - completely different SHM clenup mechanism --- Cargo.lock | 11 -- Cargo.toml | 1 - commons/zenoh-shm/Cargo.toml | 1 - commons/zenoh-shm/src/api/cleanup.rs | 30 ++--- commons/zenoh-shm/src/cleanup.rs | 52 ++------ commons/zenoh-shm/src/posix_shm/cleanup.rs | 134 +++++++++++++++++++++ commons/zenoh-shm/src/posix_shm/mod.rs | 1 + commons/zenoh-shm/src/posix_shm/segment.rs | 2 +- zenoh/src/lib.rs | 2 +- 9 files changed, 157 insertions(+), 77 deletions(-) create mode 100644 commons/zenoh-shm/src/posix_shm/cleanup.rs diff --git a/Cargo.lock b/Cargo.lock index 6f8d208cb4..33b5367eb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1070,16 +1070,6 @@ dependencies = [ "cipher 0.2.5", ] -[[package]] -name = "ctrlc2" -version = "3.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383ab70e5dc6ffdfe5545b6e6dea714938594be13ecaec73ebacc372ed4e292d" -dependencies = [ - "nix 0.29.0", - "windows-sys 0.59.0", -] - [[package]] name = "data-encoding" version = "2.6.0" @@ -5919,7 +5909,6 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", - "ctrlc2", "libc", "lockfree", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index 760ebc033c..2fcaaa9cd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,7 +163,6 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -ctrlc2 = { version = "3.5.8" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index 683a4d42c0..0b79fd2549 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,6 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -ctrlc2 = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs index dda3637a07..45d78db9ef 100644 --- a/commons/zenoh-shm/src/api/cleanup.rs +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -12,24 +12,20 @@ // ZettaScale Zenoh Team, // -use crate::cleanup::CLEANUP; +use crate::posix_shm::cleanup::cleanup_orphaned_segments; -/// Make forced cleanup -/// NOTE: this is a part of a temporary on-exit-cleanup workaround and it will be very likely removed in the future. -/// WARN: The improper usage can break the application logic, impacting SHM-utilizing Sessions in other processes. -/// Cleanup unlinks SHM segments _created_ by current process from filesystem with the following consequences: -/// - Sessions that are not linked to this segment will fail to link it if they try. Such kind of errors are properly handled. -/// - Already linked processes will still have this shared memory mapped and safely accessible -/// - The actual memory will be reclaimed by the OS only after last process using it will close it or exit +/// UNIX: Trigger cleanup for orphaned SHM segments +/// If process that created named SHM segment crashes or exits by a signal, the segment persists in the system +/// disregarding if it is used by other Zenoh processes or not. This is the detail of POSIX specification for +/// shared memory that is hard to bypass. To deal with this we developed a cleanup routine that enumerates all +/// segments and tries to find processes that are using it. If no such process found, segment will be removed. +/// There is no ideal signal to trigger this cleanup, so by default, zenoh triggers it in the following moments: +/// - first POSIX SHM segment creation +/// - process exit via exit() call or return from maint function +/// It is OK to additionally trigger this function at any time, but be aware that this can be costly. /// -/// In order to properly cleanup some SHM internals upon process exit, Zenoh installs exit handlers (see atexit() API). -/// The atexit handler is executed only on process exit(), the inconvenience is that terminating signal handlers -/// (like SIGINT) bypass it and terminate the process without cleanup. To eliminate this effect, Zenoh overrides -/// SIGHUP, SIGTERM, SIGINT and SIGQUIT handlers and calls exit() inside to make graceful shutdown. If user is going to -/// override these Zenoh's handlers, the workaround will break, and there are two ways to keep this workaround working: -/// - execute overridden Zenoh handlers in overriding handler code -/// - call force_cleanup_before_exit() anywhere at any time before terminating the process +/// For non-unix platforms this function currently does nothing #[zenoh_macros::unstable_doc] -pub fn force_cleanup_before_exit() { - CLEANUP.read().cleanup(); +pub fn cleanup_orphaned_shm_segments() { + cleanup_orphaned_segments(); } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index 9c7cb17ae1..a5c3aacc4f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,10 +12,9 @@ // ZettaScale Zenoh Team, // -use std::thread::JoinHandle; - use static_init::dynamic; -use zenoh_core::zerror; + +use crate::posix_shm::cleanup::cleanup_orphaned_segments; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will /// execute all registered cleanup routines at this moment @@ -29,9 +28,8 @@ pub(crate) struct Cleanup { impl Cleanup { fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let _ = SIGNAL_CLEANUP.read(); - + // on first cleanup subsystem touch we perform zenoh segment cleanup + cleanup_orphaned_segments(); Self { cleanups: Default::default(), } @@ -41,7 +39,7 @@ impl Cleanup { self.cleanups.push(Some(cleanup_fn)); } - pub(crate) fn cleanup(&self) { + fn cleanup(&self) { while let Some(cleanup) = self.cleanups.pop() { if let Some(f) = cleanup { f(); @@ -52,44 +50,8 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + // on finalization stage we perform zenoh segment cleanup + cleanup_orphaned_segments(); self.cleanup(); } } - -#[dynamic(lazy)] -static mut SIGNAL_CLEANUP: SignalCleanup = SignalCleanup::new(); - -struct SignalCleanup { - _handle: Option>, -} - -impl SignalCleanup { - fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let _handle = match ctrlc2::set_handler(|| { - tokio::task::spawn_blocking(|| std::process::exit(0)); - false - }) { - Ok(h) => Some(h), - Err(e) => { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); - } - } - None - } - }; - - Self { _handle } - } -} diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs new file mode 100644 index 0000000000..821fb790f2 --- /dev/null +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -0,0 +1,134 @@ +// +// Copyright (c) 2023 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// + +pub(crate) use platform::cleanup_orphaned_segments; + +#[cfg(not(unix))] +mod platform { + pub(crate) fn cleanup_orphaned_segments() {} +} + +#[cfg(unix)] +mod platform { + use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf}; + + use zenoh_result::ZResult; + + #[derive(PartialEq, Eq, Hash)] + struct ProcFdDir(PathBuf); + + impl ProcFdDir { + fn enumerate_fds(&self) -> ZResult> { + let fds = self.0.read_dir()?; + let fd_map: HashSet = fds + .filter_map(Result::ok) + .map(|f| std::convert::Into::::into(f.path())) + .collect(); + Ok(fd_map) + } + } + + impl From for ProcFdDir { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + #[derive(PartialEq, Eq, Hash)] + struct FdFile(PathBuf); + + impl From for FdFile { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + #[derive(PartialEq, Eq, Hash)] + struct ShmFile(PathBuf); + + impl ShmFile { + fn cleanup_file(self) { + let _ = std::fs::remove_file(self.0); + } + } + + impl Borrow for ShmFile { + fn borrow(&self) -> &PathBuf { + &self.0 + } + } + + impl From for ShmFile { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + pub(crate) fn cleanup_orphaned_segments() { + if let Err(e) = cleanup_orphaned_segments_inner() { + tracing::error!("Error performing orphaned SHM segments cleanup: {e}") + } + } + + fn enumerate_shm_files() -> ZResult> { + let shm_files = fs::read_dir("/dev/shm")?; + Ok(shm_files + .filter_map(Result::ok) + .filter_map(|f| { + if let Some(ext) = f.path().extension() { + if ext == "zenoh" { + return Some(std::convert::Into::::into(f.path())); + } + } + None + }) + .collect()) + } + + fn enumerate_proc_dirs() -> ZResult> { + let proc_dirs = fs::read_dir("/proc")?; + Ok(proc_dirs + .filter_map(Result::ok) + .map(|f| std::convert::Into::::into(f.path().join("fd"))) + .collect()) + } + + fn enumerate_proc_fds() -> ZResult> { + let mut fds = HashSet::default(); + let dirs = enumerate_proc_dirs()?; + for dir in dirs { + if let Ok(dir_fds) = dir.enumerate_fds() { + fds.extend(dir_fds); + } + } + Ok(fds) + } + + fn cleanup_orphaned_segments_inner() -> ZResult<()> { + let fd_map = enumerate_proc_fds()?; + let mut shm_map = enumerate_shm_files()?; + + for fd_file in fd_map { + if let Ok(resolved_link) = fd_file.0.read_link() { + shm_map.remove(&resolved_link); + } + } + + for shm_file_to_cleanup in shm_map { + shm_file_to_cleanup.cleanup_file(); + } + + Ok(()) + } +} diff --git a/commons/zenoh-shm/src/posix_shm/mod.rs b/commons/zenoh-shm/src/posix_shm/mod.rs index a63b1c9e6d..495077f9f0 100644 --- a/commons/zenoh-shm/src/posix_shm/mod.rs +++ b/commons/zenoh-shm/src/posix_shm/mod.rs @@ -14,3 +14,4 @@ pub mod array; tested_crate_module!(segment); +pub(crate) mod cleanup; diff --git a/commons/zenoh-shm/src/posix_shm/segment.rs b/commons/zenoh-shm/src/posix_shm/segment.rs index 6a34506029..0f82be5beb 100644 --- a/commons/zenoh-shm/src/posix_shm/segment.rs +++ b/commons/zenoh-shm/src/posix_shm/segment.rs @@ -106,7 +106,7 @@ where fn os_id(id: ID, id_prefix: &str) -> String { let os_id_str = format!("{id_prefix}_{id}"); let crc_os_id_str = ECMA.checksum(os_id_str.as_bytes()); - format!("{:x}", crc_os_id_str) + format!("{:x}.zenoh", crc_os_id_str) } pub fn as_ptr(&self) -> *mut u8 { diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 092607e484..2eed181543 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -427,7 +427,7 @@ pub mod shm { zshm::{zshm, ZShm}, zshmmut::{zshmmut, ZShmMut}, }, - cleanup::force_cleanup_before_exit, + cleanup::cleanup_orphaned_shm_segments, client::{shm_client::ShmClient, shm_segment::ShmSegment}, client_storage::{ShmClientStorage, GLOBAL_CLIENT_STORAGE}, common::types::{ChunkID, ProtocolID, SegmentID}, From a0c8b31db61dd62718c97d722ee4093bdd111d5f Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 14:18:35 +0300 Subject: [PATCH 8/9] Support Linux only for a while --- commons/zenoh-shm/src/api/cleanup.rs | 4 ++-- commons/zenoh-shm/src/posix_shm/cleanup.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs index 45d78db9ef..7178af29c8 100644 --- a/commons/zenoh-shm/src/api/cleanup.rs +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -14,7 +14,7 @@ use crate::posix_shm::cleanup::cleanup_orphaned_segments; -/// UNIX: Trigger cleanup for orphaned SHM segments +/// Linux: Trigger cleanup for orphaned SHM segments /// If process that created named SHM segment crashes or exits by a signal, the segment persists in the system /// disregarding if it is used by other Zenoh processes or not. This is the detail of POSIX specification for /// shared memory that is hard to bypass. To deal with this we developed a cleanup routine that enumerates all @@ -24,7 +24,7 @@ use crate::posix_shm::cleanup::cleanup_orphaned_segments; /// - process exit via exit() call or return from maint function /// It is OK to additionally trigger this function at any time, but be aware that this can be costly. /// -/// For non-unix platforms this function currently does nothing +/// For non-linux platforms this function currently does nothing #[zenoh_macros::unstable_doc] pub fn cleanup_orphaned_shm_segments() { cleanup_orphaned_segments(); diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs index 821fb790f2..26efed6686 100644 --- a/commons/zenoh-shm/src/posix_shm/cleanup.rs +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -14,12 +14,12 @@ pub(crate) use platform::cleanup_orphaned_segments; -#[cfg(not(unix))] +#[cfg(not(linux))] mod platform { pub(crate) fn cleanup_orphaned_segments() {} } -#[cfg(unix)] +#[cfg(linux)] mod platform { use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf}; From 99fac5fe54ec2c62100d17ea78f92596d2dbc375 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 14:47:53 +0300 Subject: [PATCH 9/9] unix and not macos --- commons/zenoh-shm/src/posix_shm/cleanup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs index 26efed6686..9b74f24a22 100644 --- a/commons/zenoh-shm/src/posix_shm/cleanup.rs +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -14,12 +14,12 @@ pub(crate) use platform::cleanup_orphaned_segments; -#[cfg(not(linux))] +#[cfg(not(all(unix, not(target_os = "macos"))))] mod platform { pub(crate) fn cleanup_orphaned_segments() {} } -#[cfg(linux)] +#[cfg(all(unix, not(target_os = "macos")))] mod platform { use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf};