diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b4b64e..bdf703b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,9 +20,9 @@ jobs: - uses: actions/checkout@v2 - run: cargo fmt -- --check - run: cargo clippy -- -D clippy::all - - run: cargo test --all -- --nocapture + - run: cargo clippy --all-features -- -D clippy::all + - run: cargo test --all --all-features -- --ignored --nocapture - run: cargo test --all --all-features -- --nocapture - - run: cargo bench --all -- --test - run: cargo bench --all --all-features -- --test Linux-Nightly: @@ -31,9 +31,8 @@ jobs: steps: - uses: actions/checkout@v2 - run: rustup default nightly - - run: cargo test --all -- --nocapture + - run: cargo test --all --all-features -- --ignored --nocapture - run: cargo test --all --all-features -- --nocapture - - run: cargo bench --all -- --test - run: cargo bench --all --all-features -- --test Mac-Stable: @@ -41,9 +40,8 @@ jobs: runs-on: macos-latest steps: - uses: actions/checkout@v2 - - run: cargo test --all -- --nocapture + - run: cargo test --all --all-features -- --ignored --nocapture - run: cargo test --all --all-features -- --nocapture - - run: cargo bench --all -- --test - run: cargo bench --all --all-features -- --test Win-Stable: @@ -51,7 +49,6 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v2 - - run: cargo test --all -- --nocapture + - run: cargo test --all --all-features -- --ignored --nocapture - run: cargo test --all --all-features -- --nocapture - - run: cargo bench --all -- --test - run: cargo bench --all --all-features -- --test \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 90ff159..d8cf185 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ //! As an example, here's a simple program that uses a fail point to simulate an //! I/O panic: //! -//! ```rust +//! ```ignore //! use fail::{fail_point, FailScenario}; //! //! fn do_fallible_work() { @@ -88,7 +88,7 @@ //! //! Here's a basic pattern for writing unit tests tests with fail points: //! -//! ```rust +//! ```ignore //! use fail::fail_point; //! //! fn do_fallible_work() { @@ -116,7 +116,7 @@ //! //! Here's a example to show the process: //! -//! ```rust +//! ```ignore //! use fail::FailScenario; //! //! let _scenario = FailScenario::setup(); @@ -167,7 +167,7 @@ //! `fail_point!` macro. To illustrate this, let's modify the `do_fallible_work` //! function we used earlier to return a `Result`: //! -//! ```rust +//! ```ignore //! use fail::{fail_point, FailScenario}; //! use std::io; //! @@ -211,7 +211,7 @@ //! //! Here's a variation that does so: //! -//! ```rust +//! ```ignore //! # use std::io; //! fn do_fallible_work() -> io::Result<()> { //! fail::fail_point!("read-dir", |_| { @@ -262,719 +262,741 @@ #![deny(missing_docs, missing_debug_implementations)] -use std::collections::HashMap; -use std::env::VarError; -use std::fmt::Debug; -use std::str::FromStr; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::MutexGuard; -use std::sync::{Arc, Condvar, Mutex, RwLock, TryLockError}; -use std::time::{Duration, Instant}; -use std::{env, thread}; - -#[derive(Clone)] -struct SyncCallback(Arc); - -impl Debug for SyncCallback { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("SyncCallback()") - } -} - -impl PartialEq for SyncCallback { - fn eq(&self, other: &Self) -> bool { - Arc::ptr_eq(&self.0, &other.0) - } -} +#[cfg(not(feature = "failpoints"))] +pub use disable_failpoint::*; +#[cfg(feature = "failpoints")] +pub use enable_failpoint::*; -impl SyncCallback { - fn new(f: impl Fn() + Send + Sync + 'static) -> SyncCallback { - SyncCallback(Arc::new(f)) +#[cfg(feature = "failpoints")] +#[macro_use] +mod enable_failpoint { + use std::collections::HashMap; + use std::env::VarError; + use std::fmt::Debug; + use std::str::FromStr; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::MutexGuard; + use std::sync::{Arc, Condvar, Mutex, RwLock, TryLockError}; + use std::time::{Duration, Instant}; + use std::{env, thread}; + + /// Registry with failpoints configuration. + type Registry = HashMap>; + + lazy_static::lazy_static! { + static ref REGISTRY_GROUP: RwLock>>> = Default::default(); + static ref REGISTRY_GLOBAL: FailPointRegistry = FailPointRegistry::default(); + static ref SCENARIO: Mutex<&'static FailPointRegistry> = Mutex::new(®ISTRY_GLOBAL); } - fn run(&self) { - let callback = &self.0; - callback(); + /// Fail point registry. Threads bound to the same registry share the same + /// failpoints configuration. + #[derive(Debug, Default, Clone)] + pub struct FailPointRegistry { + // TODO: remove rwlock or store *mut FailPoint + registry: Arc>, } -} -/// Supported tasks. -#[derive(Clone, Debug, PartialEq)] -enum Task { - /// Do nothing. - Off, - /// Return the value. - Return(Option), - /// Sleep for some milliseconds. - Sleep(u64), - /// Panic with the message. - Panic(Option), - /// Print the message. - Print(Option), - /// Sleep until other action is set. - Pause, - /// Yield the CPU. - Yield, - /// Busy waiting for some milliseconds. - Delay(u64), - /// Call callback function. - Callback(SyncCallback), -} + impl FailPointRegistry { + /// Generate a new failpoint registry. The new registry will inherit the + /// global failpoints configuration. + /// + /// Each thread should be bound to exact one registry. Threads bound to the + /// same registry share the same failpoints configuration. + pub fn new() -> Self { + FailPointRegistry { + registry: Arc::new(RwLock::new(Registry::new())), + } + } -#[derive(Debug)] -struct Action { - task: Task, - freq: f32, - count: Option, -} + /// Returns the failpoint registry to which the current thread is bound. If + /// `failpoints` feature is not enabled, the internal registry is none. + pub fn current_registry() -> Self { + let registry = { + let group = REGISTRY_GROUP.read().unwrap(); + let id = thread::current().id(); + group + .get(&id) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) + .clone() + }; + FailPointRegistry { registry } + } -impl PartialEq for Action { - fn eq(&self, hs: &Action) -> bool { - if self.task != hs.task || self.freq != hs.freq { - return false; + /// Register the current thread to this failpoints registry. + pub fn register_current(&self) { + let id = thread::current().id(); + REGISTRY_GROUP + .write() + .unwrap() + .insert(id, self.registry.clone()); } - if let Some(ref lhs) = self.count { - if let Some(ref rhs) = hs.count { - return lhs.load(Ordering::Relaxed) == rhs.load(Ordering::Relaxed); - } - } else if hs.count.is_none() { - return true; + + /// Deregister the current thread to this failpoints registry. + pub fn deregister_current() { + let id = thread::current().id(); + REGISTRY_GROUP.write().unwrap().remove(&id); } - false - } -} -impl Action { - fn new(task: Task, freq: f32, max_cnt: Option) -> Action { - Action { - task, - freq, - count: max_cnt.map(AtomicUsize::new), + /// Clean up registered fail points in this registry. + pub fn teardown(&self) { + let mut registry = self.registry.write().unwrap(); + cleanup(&mut registry); } } - fn from_callback(f: impl Fn() + Send + Sync + 'static) -> Action { - let task = Task::Callback(SyncCallback::new(f)); - Action { - task, - freq: 1.0, - count: None, - } + /// Test scenario with configured fail points. + #[derive(Debug)] + pub struct FailScenario<'a> { + scenario_guard: MutexGuard<'a, &'static FailPointRegistry>, } - fn get_task(&self) -> Option { - use rand::Rng; + impl<'a> FailScenario<'a> { + /// Set up the system for a fail points scenario. + /// + /// Configures global fail points specified in the `FAILPOINTS` environment variable. + /// It does not otherwise change any existing fail point configuration. + /// + /// The format of `FAILPOINTS` is `failpoint=actions;...`, where + /// `failpoint` is the name of the fail point. For more information + /// about fail point actions see the [`cfg`](fn.cfg.html) function and + /// the [`fail_point`](macro.fail_point.html) macro. + /// + /// `FAILPOINTS` may configure fail points that are not actually defined. In + /// this case the configuration has no effect. + /// + /// This function should generally be called prior to running a test with fail + /// points, and afterward paired with [`teardown`](#method.teardown). + /// + /// # Panics + /// + /// Panics if an action is not formatted correctly. + pub fn setup() -> Self { + // Cleanup first, in case of previous failed/panic'ed test scenarios. + let scenario_guard = SCENARIO.lock().unwrap_or_else(|e| e.into_inner()); + let mut registry = scenario_guard.registry.write().unwrap(); + cleanup(&mut registry); - if let Some(ref cnt) = self.count { - let c = cnt.load(Ordering::Acquire); - if c == 0 { - return None; - } - } - if self.freq < 1f32 && !rand::thread_rng().gen_bool(f64::from(self.freq)) { - return None; - } - if let Some(ref cnt) = self.count { - loop { - let c = cnt.load(Ordering::Acquire); - if c == 0 { - return None; + let failpoints = match env::var("FAILPOINTS") { + Ok(s) => s, + Err(VarError::NotPresent) => return Self { scenario_guard }, + Err(e) => panic!("invalid failpoints: {:?}", e), + }; + for mut cfg in failpoints.trim().split(';') { + cfg = cfg.trim(); + if cfg.is_empty() { + continue; } - if c == cnt.compare_and_swap(c, c - 1, Ordering::AcqRel) { - break; + let (name, order) = partition(cfg, '='); + match order { + None => panic!("invalid failpoint: {:?}", cfg), + Some(order) => { + if let Err(e) = set(&mut registry, name.to_owned(), order) { + panic!("unable to configure failpoint \"{}\": {}", name, e); + } + } } } + Self { scenario_guard } } - Some(self.task.clone()) - } -} - -fn partition(s: &str, pattern: char) -> (&str, Option<&str>) { - let mut splits = s.splitn(2, pattern); - (splits.next().unwrap(), splits.next()) -} -impl FromStr for Action { - type Err = String; - - /// Parse an action. - /// - /// `s` should be in the format `[p%][cnt*]task[(args)]`, `p%` is the frequency, - /// `cnt` is the max times the action can be triggered. - fn from_str(s: &str) -> Result { - let mut remain = s.trim(); - let mut args = None; - // in case there is '%' in args, we need to parse it first. - let (first, second) = partition(remain, '('); - if let Some(second) = second { - remain = first; - if !second.ends_with(')') { - return Err("parentheses do not match".to_owned()); - } - args = Some(&second[..second.len() - 1]); + /// Tear down the global fail points registry. + /// + /// Clears the configuration of global fail points. Any paused fail + /// points will be notified before they are deactivated. + /// + /// This function should generally be called after running a test with fail points. + /// Calling `teardown` without previously calling `setup` results in a no-op. + pub fn teardown(self) { + drop(self); } + } - let mut frequency = 1f32; - let (first, second) = partition(remain, '%'); - if let Some(second) = second { - remain = second; - match first.parse::() { - Err(e) => return Err(format!("failed to parse frequency: {}", e)), - Ok(freq) => frequency = freq / 100.0, - } + impl<'a> Drop for FailScenario<'a> { + fn drop(&mut self) { + let mut registry = self.scenario_guard.registry.write().unwrap(); + cleanup(&mut registry); } + } - let mut max_cnt = None; - let (first, second) = partition(remain, '*'); - if let Some(second) = second { - remain = second; - match first.parse() { - Err(e) => return Err(format!("failed to parse count: {}", e)), - Ok(cnt) => max_cnt = Some(cnt), - } + /// Clean all registered fail points. + fn cleanup(registry: &mut std::sync::RwLockWriteGuard) { + for p in registry.values() { + // wake up all pause failpoint. + p.set_actions("", vec![]); } + registry.clear(); + } - let parse_timeout = || match args { - None => Err("sleep require timeout".to_owned()), - Some(timeout_str) => match timeout_str.parse() { - Err(e) => Err(format!("failed to parse timeout: {}", e)), - Ok(timeout) => Ok(timeout), - }, + /// Get all registered fail points in current registry. + /// + /// If current thread is not bound to any local registry. It will try to + /// get from the global registry. + /// + /// Return a vector of `(name, actions)` pairs. + pub fn list() -> Vec<(String, String)> { + let registry = { + let group = REGISTRY_GROUP.read().unwrap(); + let id = thread::current().id(); + group + .get(&id) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) + .clone() }; - let task = match remain { - "off" => Task::Off, - "return" => Task::Return(args.map(str::to_owned)), - "sleep" => Task::Sleep(parse_timeout()?), - "panic" => Task::Panic(args.map(str::to_owned)), - "print" => Task::Print(args.map(str::to_owned)), - "pause" => Task::Pause, - "yield" => Task::Yield, - "delay" => Task::Delay(parse_timeout()?), - _ => return Err(format!("unrecognized command {:?}", remain)), - }; + let registry = registry.read().unwrap(); - Ok(Action::new(task, frequency, max_cnt)) + registry + .iter() + .map(|(name, fp)| (name.to_string(), fp.actions_str.read().unwrap().clone())) + .collect() } -} -#[cfg_attr(feature = "cargo-clippy", allow(clippy::mutex_atomic))] -#[derive(Debug, Default)] -struct FailPoint { - pause: Mutex, - pause_notifier: Condvar, - actions: RwLock>, - actions_str: RwLock, -} - -#[cfg_attr(feature = "cargo-clippy", allow(clippy::mutex_atomic))] -impl FailPoint { - fn new() -> FailPoint { - FailPoint::default() - } + #[doc(hidden)] + pub fn eval) -> R>(name: &str, f: F) -> Option { + let p = { + let registry = { + let group = REGISTRY_GROUP.read().unwrap(); + let id = thread::current().id(); + group + .get(&id) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) + .clone() + }; - fn set_actions(&self, actions_str: &str, actions: Vec) { - loop { - // TODO: maybe busy waiting here. - match self.actions.try_write() { - Err(TryLockError::WouldBlock) => {} - Ok(mut guard) => { - *guard = actions; - *self.actions_str.write().unwrap() = actions_str.to_string(); - return; - } - Err(e) => panic!("unexpected poison: {:?}", e), - } - let mut guard = self.pause.lock().unwrap(); - *guard = false; - self.pause_notifier.notify_all(); - } - } + let registry = registry.read().unwrap(); - #[cfg_attr(feature = "cargo-clippy", allow(clippy::option_option))] - fn eval(&self, name: &str) -> Option> { - let task = { - let actions = self.actions.read().unwrap(); - match actions.iter().filter_map(Action::get_task).next() { - Some(Task::Pause) => { - let mut guard = self.pause.lock().unwrap(); - *guard = true; - loop { - guard = self.pause_notifier.wait(guard).unwrap(); - if !*guard { - break; - } - } - return None; - } - Some(t) => t, + match registry.get(name) { None => return None, + Some(p) => p.clone(), } }; - - match task { - Task::Off => {} - Task::Return(s) => return Some(s), - Task::Sleep(t) => thread::sleep(Duration::from_millis(t)), - Task::Panic(msg) => match msg { - Some(ref msg) => panic!("{}", msg), - None => panic!("failpoint {} panic", name), - }, - Task::Print(msg) => match msg { - Some(ref msg) => log::info!("{}", msg), - None => log::info!("failpoint {} executed.", name), - }, - Task::Pause => unreachable!(), - Task::Yield => thread::yield_now(), - Task::Delay(t) => { - let timer = Instant::now(); - let timeout = Duration::from_millis(t); - while timer.elapsed() < timeout {} - } - Task::Callback(f) => { - f.run(); - } - } - None + p.eval(name).map(f) } -} -/// Registry with failpoints configuration. -type Registry = HashMap>; - -/// Fail point registry. Threads bound to the same registry share the same -/// failpoints configuration. -#[derive(Debug, Default, Clone)] -pub struct FailPointRegistry { - // TODO: remove rwlock or store *mut FailPoint - registry: Option>>, -} - -impl FailPointRegistry { - /// Generate a new failpoint registry. The new registry will inherit the - /// global failpoints configuration. + /// Configure the actions for a fail point in current registry at runtime. /// - /// Each thread should be bound to exact one registry. Threads bound to the - /// same registry share the same failpoints configuration. - pub fn new() -> Self { - FailPointRegistry { - registry: Some(Arc::new(RwLock::new(Registry::new()))), - } - } - - /// Returns the failpoint registry to which the current thread is bound. If - /// `failpoints` feature is not enabled, the internal registry is none. - pub fn current_registry() -> Self { + /// Each fail point can be configured with a series of actions, specified by the + /// `actions` argument. The format of `actions` is `action[->action...]`. When + /// multiple actions are specified, an action will be checked only when its + /// former action is not triggered. + /// + /// The format of a single action is `[p%][cnt*]task[(arg)]`. `p%` is the + /// expected probability that the action is triggered, and `cnt*` is the max + /// times the action can be triggered. The supported values of `task` are: + /// + /// - `off`, the fail point will do nothing. + /// - `return(arg)`, return early when the fail point is triggered. `arg` is passed to `$e` ( + /// defined via the `fail_point!` macro) as a string. + /// - `sleep(milliseconds)`, sleep for the specified time. + /// - `panic(msg)`, panic with the message. + /// - `print(msg)`, log the message, using the `log` crate, at the `info` level. + /// - `pause`, sleep until other action is set to the fail point. + /// - `yield`, yield the CPU. + /// - `delay(milliseconds)`, busy waiting for the specified time. + /// + /// For example, `20%3*print(still alive!)->panic` means the fail point has 20% chance to print a + /// message "still alive!" and 80% chance to panic. And the message will be printed at most 3 + /// times. + /// + /// The `FAILPOINTS` environment variable accepts this same syntax for its fail + /// point actions. + /// + /// A call to `cfg` with a particular fail point name overwrites any existing actions for + /// that fail point, including those set via the `FAILPOINTS` environment variable. + pub fn cfg>(name: S, actions: &str) -> Result<(), String> { if cfg!(feature = "failpoints") { let registry = { let group = REGISTRY_GROUP.read().unwrap(); let id = thread::current().id(); group .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) .clone() }; - FailPointRegistry { - registry: Some(registry), - } + + let mut registry = registry.write().unwrap(); + + set(&mut registry, name.into(), actions) } else { - FailPointRegistry { registry: None } + Err("failpoints is not enabled".to_owned()) } } - /// Register the current thread to this failpoints registry. - pub fn register_current(&self) { - if cfg!(feature = "failpoints") { + /// Configure the actions for a fail point in current registry at runtime. + /// + /// Each fail point can be configured by a callback. Process will call this callback function + /// when it meet this fail-point. + pub fn cfg_callback(name: S, f: F) -> Result<(), String> + where + S: Into, + F: Fn() + Send + Sync + 'static, + { + let registry = { + let group = REGISTRY_GROUP.read().unwrap(); let id = thread::current().id(); - REGISTRY_GROUP - .write() - .unwrap() - .insert(id, self.registry.clone().unwrap()); - } + group + .get(&id) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) + .clone() + }; + + let mut registry = registry.write().unwrap(); + + let p = registry + .entry(name.into()) + .or_insert_with(|| Arc::new(FailPoint::new())); + let action = Action::from_callback(f); + let actions = vec![action]; + p.set_actions("callback", actions); + Ok(()) } - /// Deregister the current thread to this failpoints registry. - pub fn deregister_current() { - if cfg!(feature = "failpoints") { + /// Remove a fail point. + /// + /// If the local registry doesn't exist, it will try to delete the corresponding + /// action in the global registry. + pub fn remove>(name: S) { + let registry = { + let group = REGISTRY_GROUP.read().unwrap(); let id = thread::current().id(); - REGISTRY_GROUP.write().unwrap().remove(&id); - } - } + group + .get(&id) + .unwrap_or_else(|| ®ISTRY_GLOBAL.registry) + .clone() + }; - /// Clean up registered fail points in this registry. - pub fn teardown(&self) { - if cfg!(feature = "failpoints") { - let mut registry = self.registry.as_ref().unwrap().write().unwrap(); - cleanup(&mut registry); + let mut registry = registry.write().unwrap(); + + if let Some(p) = registry.remove(name.as_ref()) { + // wake up all pause failpoint. + p.set_actions("", vec![]); } } -} - -lazy_static::lazy_static! { - static ref REGISTRY_GROUP: RwLock>>> = Default::default(); - static ref REGISTRY_GLOBAL: FailPointRegistry = FailPointRegistry { registry: Some(Default::default()) }; - static ref SCENARIO: Mutex<&'static FailPointRegistry> = Mutex::new(®ISTRY_GLOBAL); -} -/// Test scenario with configured fail points. -#[derive(Debug)] -pub struct FailScenario<'a> { - scenario_guard: MutexGuard<'a, &'static FailPointRegistry>, -} + fn set( + registry: &mut HashMap>, + name: String, + actions: &str, + ) -> Result<(), String> { + let actions_str = actions; + // `actions` are in the format of `failpoint[->failpoint...]`. + let actions = actions + .split("->") + .map(Action::from_str) + .collect::>()?; + // Please note that we can't figure out whether there is a failpoint named `name`, + // so we may insert a failpoint that doesn't exist at all. + let p = registry + .entry(name) + .or_insert_with(|| Arc::new(FailPoint::new())); + p.set_actions(actions_str, actions); + Ok(()) + } -impl<'a> FailScenario<'a> { - /// Set up the system for a fail points scenario. + /// Define a fail point (requires `failpoints` feature). /// - /// Configures global fail points specified in the `FAILPOINTS` environment variable. - /// It does not otherwise change any existing fail point configuration. + /// The `fail_point!` macro has three forms, and they all take a name as the + /// first argument. The simplest form takes only a name and is suitable for + /// executing most fail point behavior, including panicking, but not for early + /// return or conditional execution based on a local flag. /// - /// The format of `FAILPOINTS` is `failpoint=actions;...`, where - /// `failpoint` is the name of the fail point. For more information - /// about fail point actions see the [`cfg`](fn.cfg.html) function and - /// the [`fail_point`](macro.fail_point.html) macro. + /// The three forms of fail points look as follows. /// - /// `FAILPOINTS` may configure fail points that are not actually defined. In - /// this case the configuration has no effect. + /// 1. A basic fail point: /// - /// This function should generally be called prior to running a test with fail - /// points, and afterward paired with [`teardown`](#method.teardown). + /// ```rust + /// # #[macro_use] extern crate fail; + /// fn function_return_unit() { + /// fail_point!("fail-point-1"); + /// } + /// ``` /// - /// # Panics + /// This form of fail point can be configured to panic, print, sleep, pause, etc., but + /// not to return from the function early. /// - /// Panics if an action is not formatted correctly. - pub fn setup() -> Self { - // Cleanup first, in case of previous failed/panic'ed test scenarios. - let scenario_guard = SCENARIO.lock().unwrap_or_else(|e| e.into_inner()); - let mut registry = scenario_guard.registry.as_ref().unwrap().write().unwrap(); - cleanup(&mut registry); - - let failpoints = match env::var("FAILPOINTS") { - Ok(s) => s, - Err(VarError::NotPresent) => return Self { scenario_guard }, - Err(e) => panic!("invalid failpoints: {:?}", e), - }; - for mut cfg in failpoints.trim().split(';') { - cfg = cfg.trim(); - if cfg.is_empty() { - continue; + /// 2. A fail point that may return early: + /// + /// ```rust + /// # #[macro_use] extern crate fail; + /// fn function_return_value() -> u64 { + /// fail_point!("fail-point-2", |r| r.map_or(2, |e| e.parse().unwrap())); + /// 0 + /// } + /// ``` + /// + /// This form of fail point can additionally be configured to return early from + /// the enclosing function. It accepts a closure, which itself accepts an + /// `Option`, and is expected to transform that argument into the early + /// return value. The argument string is sourced from the fail point + /// configuration string. For example configuring this "fail-point-2" as + /// "return(100)" will execute the fail point closure, passing it a `Some` value + /// containing a `String` equal to "100"; the closure then parses it into the + /// return value. + /// + /// 3. A fail point with conditional execution: + /// + /// ```rust + /// # #[macro_use] extern crate fail; + /// fn function_conditional(enable: bool) { + /// fail_point!("fail-point-3", enable, |_| {}); + /// } + /// ``` + /// + /// In this final form, the second argument is a local boolean expression that + /// must evaluate to `true` before the fail point is evaluated. The third + /// argument is again an early-return closure. + /// + /// The three macro arguments (or "designators") are called `$name`, `$cond`, + /// and `$e`. `$name` must be `&str`, `$cond` must be a boolean expression, + /// and`$e` must be a function or closure that accepts an `Option` and + /// returns the same type as the enclosing function. + /// + /// For more examples see the [crate documentation](index.html). For more + /// information about controlling fail points see the [`cfg`](fn.cfg.html) + /// function. + #[macro_export] + macro_rules! fail_point { + ($name:expr) => {{ + $crate::eval($name, |_| { + panic!("Return is not supported for the fail point \"{}\"", $name); + }); + }}; + ($name:expr, $e:expr) => {{ + if let Some(res) = $crate::eval($name, $e) { + return res; } - let (name, order) = partition(cfg, '='); - match order { - None => panic!("invalid failpoint: {:?}", cfg), - Some(order) => { - if let Err(e) = set(&mut registry, name.to_owned(), order) { - panic!("unable to configure failpoint \"{}\": {}", name, e); - } - } + }}; + ($name:expr, $cond:expr, $e:expr) => {{ + if $cond { + fail_point!($name, $e); } - } - Self { scenario_guard } + }}; } - /// Tear down the global fail points registry. - /// - /// Clears the configuration of global fail points. Any paused fail - /// points will be notified before they are deactivated. - /// - /// This function should generally be called after running a test with fail points. - /// Calling `teardown` without previously calling `setup` results in a no-op. - pub fn teardown(self) { - drop(self); + #[derive(Clone)] + pub(crate) struct SyncCallback(Arc); + + impl Debug for SyncCallback { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("SyncCallback()") + } } -} -impl<'a> Drop for FailScenario<'a> { - fn drop(&mut self) { - let mut registry = self - .scenario_guard - .registry - .as_ref() - .unwrap() - .write() - .unwrap(); - cleanup(&mut registry); + impl PartialEq for SyncCallback { + fn eq(&self, _: &Self) -> bool { + unimplemented!() + } } -} -/// Clean all registered fail points. -fn cleanup(registry: &mut std::sync::RwLockWriteGuard) { - for p in registry.values() { - // wake up all pause failpoint. - p.set_actions("", vec![]); + impl SyncCallback { + fn new(f: impl Fn() + Send + Sync + 'static) -> SyncCallback { + SyncCallback(Arc::new(f)) + } + + fn run(&self) { + let callback = &self.0; + callback(); + } } - registry.clear(); -} -/// Returns whether code generation for failpoints is enabled. -/// -/// This function allows consumers to check (at runtime) whether the library -/// was compiled with the (buildtime) `failpoints` feature, which enables -/// code generation for failpoints. -pub const fn has_failpoints() -> bool { - cfg!(feature = "failpoints") -} + /// Supported tasks. + #[derive(Clone, Debug, PartialEq)] + pub(crate) enum Task { + /// Do nothing. + Off, + /// Return the value. + Return(Option), + /// Sleep for some milliseconds. + Sleep(u64), + /// Panic with the message. + Panic(Option), + /// Print the message. + Print(Option), + /// Sleep until other action is set. + Pause, + /// Yield the CPU. + Yield, + /// Busy waiting for some milliseconds. + Delay(u64), + /// Call callback function. + Callback(SyncCallback), + } -/// Get all registered fail points in current registry. -/// -/// If current thread is not bound to any local registry. It will try to -/// get from the global registry. -/// -/// Return a vector of `(name, actions)` pairs. -pub fn list() -> Vec<(String, String)> { - let registry = { - let group = REGISTRY_GROUP.read().unwrap(); - let id = thread::current().id(); - group - .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) - .clone() - }; - - let registry = registry.read().unwrap(); - - registry - .iter() - .map(|(name, fp)| (name.to_string(), fp.actions_str.read().unwrap().clone())) - .collect() -} + #[derive(Debug)] + pub(crate) struct Action { + task: Task, + freq: f32, + count: Option, + } -#[doc(hidden)] -pub fn eval) -> R>(name: &str, f: F) -> Option { - let p = { - let registry = { - let group = REGISTRY_GROUP.read().unwrap(); - let id = thread::current().id(); - group - .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) - .clone() - }; + impl PartialEq for Action { + fn eq(&self, hs: &Action) -> bool { + if self.task != hs.task || self.freq != hs.freq { + return false; + } + if let Some(ref lhs) = self.count { + if let Some(ref rhs) = hs.count { + return lhs.load(Ordering::Relaxed) == rhs.load(Ordering::Relaxed); + } + } else if hs.count.is_none() { + return true; + } + false + } + } - let registry = registry.read().unwrap(); + impl Action { + pub(crate) fn new(task: Task, freq: f32, max_cnt: Option) -> Action { + Action { + task, + freq, + count: max_cnt.map(AtomicUsize::new), + } + } - match registry.get(name) { - None => return None, - Some(p) => p.clone(), + pub(crate) fn from_callback(f: impl Fn() + Send + Sync + 'static) -> Action { + let task = Task::Callback(SyncCallback::new(f)); + Action { + task, + freq: 1.0, + count: None, + } } - }; - p.eval(name).map(f) -} -/// Configure the actions for a fail point in current registry at runtime. -/// -/// Each fail point can be configured with a series of actions, specified by the -/// `actions` argument. The format of `actions` is `action[->action...]`. When -/// multiple actions are specified, an action will be checked only when its -/// former action is not triggered. -/// -/// The format of a single action is `[p%][cnt*]task[(arg)]`. `p%` is the -/// expected probability that the action is triggered, and `cnt*` is the max -/// times the action can be triggered. The supported values of `task` are: -/// -/// - `off`, the fail point will do nothing. -/// - `return(arg)`, return early when the fail point is triggered. `arg` is passed to `$e` ( -/// defined via the `fail_point!` macro) as a string. -/// - `sleep(milliseconds)`, sleep for the specified time. -/// - `panic(msg)`, panic with the message. -/// - `print(msg)`, log the message, using the `log` crate, at the `info` level. -/// - `pause`, sleep until other action is set to the fail point. -/// - `yield`, yield the CPU. -/// - `delay(milliseconds)`, busy waiting for the specified time. -/// -/// For example, `20%3*print(still alive!)->panic` means the fail point has 20% chance to print a -/// message "still alive!" and 80% chance to panic. And the message will be printed at most 3 -/// times. -/// -/// The `FAILPOINTS` environment variable accepts this same syntax for its fail -/// point actions. -/// -/// A call to `cfg` with a particular fail point name overwrites any existing actions for -/// that fail point, including those set via the `FAILPOINTS` environment variable. -pub fn cfg>(name: S, actions: &str) -> Result<(), String> { - if cfg!(feature = "failpoints") { - let registry = { - let group = REGISTRY_GROUP.read().unwrap(); - let id = thread::current().id(); - group - .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) - .clone() - }; + pub(crate) fn get_task(&self) -> Option { + use rand::Rng; - let mut registry = registry.write().unwrap(); + if let Some(ref cnt) = self.count { + let c = cnt.load(Ordering::Acquire); + if c == 0 { + return None; + } + } + if self.freq < 1f32 && !rand::thread_rng().gen_bool(f64::from(self.freq)) { + return None; + } + if let Some(ref cnt) = self.count { + loop { + let c = cnt.load(Ordering::Acquire); + if c == 0 { + return None; + } + if c == cnt.compare_and_swap(c, c - 1, Ordering::AcqRel) { + break; + } + } + } + Some(self.task.clone()) + } + } - set(&mut registry, name.into(), actions) - } else { - Err("failpoints is not enabled".to_owned()) + fn partition(s: &str, pattern: char) -> (&str, Option<&str>) { + let mut splits = s.splitn(2, pattern); + (splits.next().unwrap(), splits.next()) } -} -/// Configure the actions for a fail point in current registry at runtime. -/// -/// Each fail point can be configured by a callback. Process will call this callback function -/// when it meet this fail-point. -pub fn cfg_callback(name: S, f: F) -> Result<(), String> -where - S: Into, - F: Fn() + Send + Sync + 'static, -{ - let registry = { - let group = REGISTRY_GROUP.read().unwrap(); - let id = thread::current().id(); - group - .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) - .clone() - }; - - let mut registry = registry.write().unwrap(); - - let p = registry - .entry(name.into()) - .or_insert_with(|| Arc::new(FailPoint::new())); - let action = Action::from_callback(f); - let actions = vec![action]; - p.set_actions("callback", actions); - Ok(()) -} + impl FromStr for Action { + type Err = String; + + /// Parse an action. + /// + /// `s` should be in the format `[p%][cnt*]task[(args)]`, `p%` is the frequency, + /// `cnt` is the max times the action can be triggered. + fn from_str(s: &str) -> Result { + let mut remain = s.trim(); + let mut args = None; + // in case there is '%' in args, we need to parse it first. + let (first, second) = partition(remain, '('); + if let Some(second) = second { + remain = first; + if !second.ends_with(')') { + return Err("parentheses do not match".to_owned()); + } + args = Some(&second[..second.len() - 1]); + } -/// Remove a fail point. -/// -/// If the local registry doesn't exist, it will try to delete the corresponding -/// action in the global registry. -pub fn remove>(name: S) { - let registry = { - let group = REGISTRY_GROUP.read().unwrap(); - let id = thread::current().id(); - group - .get(&id) - .unwrap_or_else(|| REGISTRY_GLOBAL.registry.as_ref().unwrap()) - .clone() - }; - - let mut registry = registry.write().unwrap(); - - if let Some(p) = registry.remove(name.as_ref()) { - // wake up all pause failpoint. - p.set_actions("", vec![]); + let mut frequency = 1f32; + let (first, second) = partition(remain, '%'); + if let Some(second) = second { + remain = second; + match first.parse::() { + Err(e) => return Err(format!("failed to parse frequency: {}", e)), + Ok(freq) => frequency = freq / 100.0, + } + } + + let mut max_cnt = None; + let (first, second) = partition(remain, '*'); + if let Some(second) = second { + remain = second; + match first.parse() { + Err(e) => return Err(format!("failed to parse count: {}", e)), + Ok(cnt) => max_cnt = Some(cnt), + } + } + + let parse_timeout = || match args { + None => Err("sleep require timeout".to_owned()), + Some(timeout_str) => match timeout_str.parse() { + Err(e) => Err(format!("failed to parse timeout: {}", e)), + Ok(timeout) => Ok(timeout), + }, + }; + + let task = match remain { + "off" => Task::Off, + "return" => Task::Return(args.map(str::to_owned)), + "sleep" => Task::Sleep(parse_timeout()?), + "panic" => Task::Panic(args.map(str::to_owned)), + "print" => Task::Print(args.map(str::to_owned)), + "pause" => Task::Pause, + "yield" => Task::Yield, + "delay" => Task::Delay(parse_timeout()?), + _ => return Err(format!("unrecognized command {:?}", remain)), + }; + + Ok(Action::new(task, frequency, max_cnt)) + } } -} -fn set( - registry: &mut HashMap>, - name: String, - actions: &str, -) -> Result<(), String> { - let actions_str = actions; - // `actions` are in the format of `failpoint[->failpoint...]`. - let actions = actions - .split("->") - .map(Action::from_str) - .collect::>()?; - // Please note that we can't figure out whether there is a failpoint named `name`, - // so we may insert a failpoint that doesn't exist at all. - let p = registry - .entry(name) - .or_insert_with(|| Arc::new(FailPoint::new())); - p.set_actions(actions_str, actions); - Ok(()) -} + #[cfg_attr(feature = "cargo-clippy", allow(clippy::mutex_atomic))] + #[derive(Debug, Default)] + pub(crate) struct FailPoint { + pause: Mutex, + pause_notifier: Condvar, + actions: RwLock>, + actions_str: RwLock, + } -/// Define a fail point (requires `failpoints` feature). -/// -/// The `fail_point!` macro has three forms, and they all take a name as the -/// first argument. The simplest form takes only a name and is suitable for -/// executing most fail point behavior, including panicking, but not for early -/// return or conditional execution based on a local flag. -/// -/// The three forms of fail points look as follows. -/// -/// 1. A basic fail point: -/// -/// ```rust -/// # #[macro_use] extern crate fail; -/// fn function_return_unit() { -/// fail_point!("fail-point-1"); -/// } -/// ``` -/// -/// This form of fail point can be configured to panic, print, sleep, pause, etc., but -/// not to return from the function early. -/// -/// 2. A fail point that may return early: -/// -/// ```rust -/// # #[macro_use] extern crate fail; -/// fn function_return_value() -> u64 { -/// fail_point!("fail-point-2", |r| r.map_or(2, |e| e.parse().unwrap())); -/// 0 -/// } -/// ``` -/// -/// This form of fail point can additionally be configured to return early from -/// the enclosing function. It accepts a closure, which itself accepts an -/// `Option`, and is expected to transform that argument into the early -/// return value. The argument string is sourced from the fail point -/// configuration string. For example configuring this "fail-point-2" as -/// "return(100)" will execute the fail point closure, passing it a `Some` value -/// containing a `String` equal to "100"; the closure then parses it into the -/// return value. -/// -/// 3. A fail point with conditional execution: -/// -/// ```rust -/// # #[macro_use] extern crate fail; -/// fn function_conditional(enable: bool) { -/// fail_point!("fail-point-3", enable, |_| {}); -/// } -/// ``` -/// -/// In this final form, the second argument is a local boolean expression that -/// must evaluate to `true` before the fail point is evaluated. The third -/// argument is again an early-return closure. -/// -/// The three macro arguments (or "designators") are called `$name`, `$cond`, -/// and `$e`. `$name` must be `&str`, `$cond` must be a boolean expression, -/// and`$e` must be a function or closure that accepts an `Option` and -/// returns the same type as the enclosing function. -/// -/// For more examples see the [crate documentation](index.html). For more -/// information about controlling fail points see the [`cfg`](fn.cfg.html) -/// function. -#[macro_export] -#[cfg(feature = "failpoints")] -macro_rules! fail_point { - ($name:expr) => {{ - $crate::eval($name, |_| { - panic!("Return is not supported for the fail point \"{}\"", $name); - }); - }}; - ($name:expr, $e:expr) => {{ - if let Some(res) = $crate::eval($name, $e) { - return res; + #[cfg_attr(feature = "cargo-clippy", allow(clippy::mutex_atomic))] + impl FailPoint { + pub(crate) fn new() -> FailPoint { + FailPoint::default() } - }}; - ($name:expr, $cond:expr, $e:expr) => {{ - if $cond { - fail_point!($name, $e); + + pub(crate) fn set_actions(&self, actions_str: &str, actions: Vec) { + loop { + // TODO: maybe busy waiting here. + match self.actions.try_write() { + Err(TryLockError::WouldBlock) => {} + Ok(mut guard) => { + *guard = actions; + *self.actions_str.write().unwrap() = actions_str.to_string(); + return; + } + Err(e) => panic!("unexpected poison: {:?}", e), + } + let mut guard = self.pause.lock().unwrap(); + *guard = false; + self.pause_notifier.notify_all(); + } + } + + #[cfg_attr(feature = "cargo-clippy", allow(clippy::option_option))] + pub(crate) fn eval(&self, name: &str) -> Option> { + let task = { + let actions = self.actions.read().unwrap(); + match actions.iter().filter_map(Action::get_task).next() { + Some(Task::Pause) => { + let mut guard = self.pause.lock().unwrap(); + *guard = true; + loop { + guard = self.pause_notifier.wait(guard).unwrap(); + if !*guard { + break; + } + } + return None; + } + Some(t) => t, + None => return None, + } + }; + + match task { + Task::Off => {} + Task::Return(s) => return Some(s), + Task::Sleep(t) => thread::sleep(Duration::from_millis(t)), + Task::Panic(msg) => match msg { + Some(ref msg) => panic!("{}", msg), + None => panic!("failpoint {} panic", name), + }, + Task::Print(msg) => match msg { + Some(ref msg) => log::info!("{}", msg), + None => log::info!("failpoint {} executed.", name), + }, + Task::Pause => unreachable!(), + Task::Yield => thread::yield_now(), + Task::Delay(t) => { + let timer = Instant::now(); + let timeout = Duration::from_millis(t); + while timer.elapsed() < timeout {} + } + Task::Callback(f) => { + f.run(); + } + } + None } - }}; + } } -/// Define a fail point (disabled, see `failpoints` feature). -#[macro_export] #[cfg(not(feature = "failpoints"))] -macro_rules! fail_point { - ($name:expr, $e:expr) => {{}}; - ($name:expr) => {{}}; - ($name:expr, $cond:expr, $e:expr) => {{}}; +mod disable_failpoint { + #[derive(Debug, Default, Clone)] + /// Define a fail point registry (disabled, see `failpoints` feature). + pub struct FailPointRegistry; + + impl FailPointRegistry { + /// Generate a new failpoint registry (disabled, see `failpoints` feature). + pub fn new() -> Self { + FailPointRegistry::default() + } + + /// Returns the failpoint registry to which the current thread is bound + /// (disabled, see `failpoints` feature). + pub fn current_registry() -> Self { + FailPointRegistry::default() + } + + /// Register the current thread (disabled, see `failpoints` feature). + pub fn register_current(&self) {} + + /// Deregister the current thread (disabled, see `failpoints` feature). + pub fn deregister_current() {} + + /// Clean up fail points in this registry (disabled, see `failpoints` feature). + pub fn teardown(&self) {} + } + + /// Define a fail point (disabled, see `failpoints` feature). + #[macro_export] + macro_rules! fail_point { + ($name:expr, $e:expr) => {{}}; + ($name:expr) => {{}}; + ($name:expr, $cond:expr, $e:expr) => {{}}; + } +} + +/// Returns whether code generation for failpoints is enabled. +/// +/// This function allows consumers to check (at runtime) whether the library +/// was compiled with the (buildtime) `failpoints` feature, which enables +/// code generation for failpoints. +pub const fn has_failpoints() -> bool { + cfg!(feature = "failpoints") } #[cfg(test)] +#[cfg(feature = "failpoints")] mod tests { + use super::enable_failpoint::*; use super::*; - + use std::env; use std::sync::*; + use std::thread; + use std::time::{Duration, Instant}; #[test] fn test_has_failpoints() { @@ -1153,7 +1175,6 @@ mod tests { // This case should be tested as integration case, but when calling `teardown` other cases // like `test_pause` maybe also affected, so it's better keep it here. #[test] - #[cfg_attr(not(feature = "failpoints"), ignore)] fn test_setup_and_teardown() { let f1 = || { fail_point!("setup_and_teardown1", |_| 1); diff --git a/tests/tests.rs b/tests/tests.rs index 98d682b..a8c636d 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2,13 +2,13 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::*; +use std::thread; use std::time::*; -use std::*; use fail::fail_point; #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_pause() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -42,7 +42,7 @@ fn test_pause() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_off() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -58,7 +58,7 @@ fn test_off() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_return() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -78,7 +78,7 @@ fn test_return() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_sleep() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -98,7 +98,7 @@ fn test_sleep() { #[test] #[should_panic] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_panic() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -111,7 +111,7 @@ fn test_panic() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_print() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -148,7 +148,7 @@ fn test_print() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_yield() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -161,7 +161,7 @@ fn test_yield() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_callback() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -185,7 +185,7 @@ fn test_callback() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_delay() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -198,7 +198,7 @@ fn test_delay() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_freq_and_count() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -222,7 +222,7 @@ fn test_freq_and_count() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_condition() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -240,7 +240,7 @@ fn test_condition() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_list() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current(); @@ -253,7 +253,7 @@ fn test_list() { } #[test] -#[cfg_attr(not(feature = "failpoints"), ignore)] +#[cfg(feature = "failpoints")] fn test_multiple_threads_cleanup() { let local_registry = fail::FailPointRegistry::new(); local_registry.register_current();