diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 7363132c..a391a153 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -374,11 +374,12 @@ fn update_clusters( // Drop crashlines if they're unused let crashlines = if dedup { crashlines } else { Vec::new() }; // Fill cluster info structures - clusters.insert(i, Cluster::new(i, stacktraces, crashlines)); + // NOTE: We don't care about paths of casreps from existing clusters + clusters.insert(i, Cluster::new(i, Vec::new(), stacktraces, crashlines)); } // Init list of casreps, which aren't suitable for any cluster - let mut deviants = Vec::<&PathBuf>::new(); + let mut deviants: Vec<(&PathBuf, (Stacktrace, String))> = Vec::new(); // Init added casreps counter let mut added = 0usize; // Init duplicates counter @@ -404,14 +405,11 @@ fn update_clusters( Relation::Inner(measure) => { inners.push((cluster.number, measure)); } - Relation::Outer(measure) => match tolerance_level { - ToleranceLevel::Loyal => { + Relation::Outer(measure) => { + if let ToleranceLevel::Loyal = tolerance_level { outers.push((cluster.number, measure)); } - _ => { - deviants.push(casrep); - } - }, + } Relation::Oot => { continue; } @@ -426,12 +424,13 @@ fn update_clusters( outers.iter().min_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0 } else { // Out of threshold - deviants.push(casrep); + deviants.push((casrep, (stacktrace.to_vec(), crashline.to_string()))); continue; }; // Update cluster (and dedup crashline) - if !clusters.get_mut(&(number)).unwrap().insert( + if !clusters.get_mut(&number).unwrap().insert( + casrep.to_path_buf(), stacktrace.to_vec(), crashline.to_string(), dedup, @@ -454,24 +453,11 @@ fn update_clusters( // Handle deviant casreps let (result, before, after) = if !deviants.is_empty() { - // Copy casrep to tmp dir - let deviant_dir = format!("{}/deviant", &oldpath.display()); - fs::create_dir_all(&deviant_dir)?; - for casrep in deviants { - fs::copy( - casrep, - format!( - "{}/{}", - &deviant_dir, - &casrep.file_name().unwrap().to_str().unwrap() - ), - )?; - } - // Cluster deviant casreps - let (result, before, after) = - make_clusters(Path::new(&deviant_dir), Some(oldpath), jobs, dedup, max)?; - let _ = fs::remove_dir_all(&deviant_dir); - (result, before, after) + // Get clusters from deviants + let (deviant_clusters, before, after) = gen_clusters(&deviants, max, dedup)?; + // Save deviant clusters + util::save_clusters(&deviant_clusters, oldpath)?; + (deviant_clusters.len(), before, after) } else { (0, 0, 0) }; diff --git a/casr/src/util.rs b/casr/src/util.rs index d7646896..e65d3ed3 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -3,7 +3,7 @@ extern crate libcasr; use libcasr::report::CrashReport; use libcasr::stacktrace::{ - Stacktrace, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES, + Cluster, Stacktrace, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES, }; use anyhow::{bail, Context, Result}; @@ -14,7 +14,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use simplelog::*; use wait_timeout::ChildExt; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::fs::{self, OpenOptions}; use std::io::Write; use std::io::{BufRead, BufReader}; @@ -493,3 +493,28 @@ pub fn reports_from_paths( (casreps, stacktraces, crashlines, badreports) } + +/// Save clusters to directory +/// +/// # Arguments +/// +/// * `clusters` - given `Cluster` structures for saving +/// +/// * `dir` - out directory +pub fn save_clusters(clusters: &HashMap, dir: &Path) -> Result<()> { + for cluster in clusters.values() { + fs::create_dir_all(format!("{}/cl{}", &dir.display(), cluster.number))?; + for casrep in cluster.paths() { + fs::copy( + casrep, + format!( + "{}/cl{}/{}", + &dir.display(), + cluster.number, + &casrep.file_name().unwrap().to_str().unwrap() + ), + )?; + } + } + Ok(()) +} diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 7d603855..65446363 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -16,6 +16,7 @@ use kodama::{linkage, Method}; use regex::Regex; use std::collections::{HashMap, HashSet}; use std::fmt::{self, Write}; +use std::path::PathBuf; use std::sync::RwLock; // Re-export types from gdb_command for convenient use from Casr library @@ -77,6 +78,8 @@ pub enum ToleranceLevel { pub struct Cluster { /// Cluster number pub number: usize, + /// Cluster report paths + paths: Vec, /// Cluster report stacktraces stacktraces: Vec, /// Cluster diameter @@ -87,16 +90,26 @@ pub struct Cluster { impl Cluster { /// Create new `Cluster` - pub fn new(number: usize, stacktraces: Vec, crashlines: Vec) -> Self { + pub fn new( + number: usize, + paths: Vec, + stacktraces: Vec, + crashlines: Vec, + ) -> Self { let mut unique_crashlines: HashSet = HashSet::new(); unique_crashlines.extend(crashlines); Cluster { number, + paths, stacktraces, diam: None, crashlines: unique_crashlines, } } + /// Get CASR report paths + pub fn paths(&self) -> &Vec { + &self.paths + } /// Get CASR report stactraces pub fn stacktraces(&self) -> &Vec { &self.stacktraces @@ -115,10 +128,17 @@ impl Cluster { /// /// `true` if new CASR report may be added, /// `false` if report is duplicate of someone else - pub fn insert(&mut self, stacktrace: Stacktrace, crashline: String, dedup: bool) -> bool { + pub fn insert( + &mut self, + path: PathBuf, + stacktrace: Stacktrace, + crashline: String, + dedup: bool, + ) -> bool { if dedup && !crashline.is_empty() && !self.crashlines.insert(crashline.to_string()) { return false; } + self.paths.push(path); self.stacktraces.push(stacktrace); self.diam = None; true @@ -186,6 +206,63 @@ impl Cluster { } } +// TODO: Write a better description... +// NOTE: It's just interlayer between `Cluster` and `cluster_stacktrace` fn +/// Generate clusters from CASR report info +/// +/// # Arguments +/// +/// * `reports` - slice of report info: path, stacktrace, crashline +/// +/// * `offset` - cluster enumerate offset +/// +/// * `dedup` - deduplicate crashline, if true +/// +/// # Return value +/// +/// * `HashMap` of `Cluster` +/// * Number of valid casreps before crashiline deduplication +/// * Number of valid casreps after crashiline deduplication +pub fn gen_clusters( + reports: &[(&PathBuf, (Stacktrace, String))], + offset: usize, + dedup: bool, +) -> Result<(HashMap, usize, usize)> { + // Unzip casrep info + let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) = + reports.iter().cloned().unzip(); + let len = casreps.len(); + // Get stacktraces cluster numbers + let mut numbers = cluster_stacktraces(&stacktraces)?; + // Deduplicate by crashiline + let after = if dedup { + dedup_crashlines(&crashlines, &mut numbers) + } else { + len + }; + // Create clusters + let mut clusters: HashMap = HashMap::new(); + for i in 0..len { + if numbers[i] == 0 { + // Skip casreps with duplicate crashlines + continue; + } + let number = numbers[i] + offset; + // Add new cluster if not exists + clusters + .entry(number) + .or_insert_with(|| Cluster::new(number, Vec::new(), Vec::new(), Vec::new())); + // Update cluster + clusters.get_mut(&number).unwrap().insert( + casreps[i].to_path_buf(), + stacktraces[i].to_vec(), + crashlines[i].to_string(), + dedup, + ); + } + Ok((clusters, len, after)) +} + /// This macro updates variables used to remove trusted functions from stack trace #[macro_export] macro_rules! init_ignored_frames {