Skip to content

Commit

Permalink
Add interlayer for clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
hkctkuy committed Dec 22, 2023
1 parent 3be8561 commit ed79279
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 32 deletions.
42 changes: 14 additions & 28 deletions casr/src/bin/casr-cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,12 @@ fn update_clusters(
// Drop crashlines if they're unused
let crashlines = if dedup { crashlines } else { Vec::new() };
// Fill cluster info structures
clusters.insert(i, Cluster::new(i, stacktraces, crashlines));
// NOTE: We don't care about paths of casreps from existing clusters
clusters.insert(i, Cluster::new(i, Vec::new(), stacktraces, crashlines));
}

// Init list of casreps, which aren't suitable for any cluster
let mut deviants = Vec::<&PathBuf>::new();
let mut deviants: Vec<(&PathBuf, (Stacktrace, String))> = Vec::new();
// Init added casreps counter
let mut added = 0usize;
// Init duplicates counter
Expand All @@ -404,14 +405,11 @@ fn update_clusters(
Relation::Inner(measure) => {
inners.push((cluster.number, measure));
}

Check warning on line 407 in casr/src/bin/casr-cluster.rs

View check run for this annotation

Codecov / codecov/patch

casr/src/bin/casr-cluster.rs#L405-L407

Added lines #L405 - L407 were not covered by tests
Relation::Outer(measure) => match tolerance_level {
ToleranceLevel::Loyal => {
Relation::Outer(measure) => {
if let ToleranceLevel::Loyal = tolerance_level {
outers.push((cluster.number, measure));
}
_ => {
deviants.push(casrep);
}
},
}
Relation::Oot => {
continue;
}
Expand All @@ -426,12 +424,13 @@ fn update_clusters(
outers.iter().min_by(|a, b| a.1.total_cmp(&b.1)).unwrap().0
} else {
// Out of threshold
deviants.push(casrep);
deviants.push((casrep, (stacktrace.to_vec(), crashline.to_string())));
continue;
};

// Update cluster (and dedup crashline)
if !clusters.get_mut(&(number)).unwrap().insert(
if !clusters.get_mut(&number).unwrap().insert(
casrep.to_path_buf(),
stacktrace.to_vec(),
crashline.to_string(),
dedup,
Expand All @@ -454,24 +453,11 @@ fn update_clusters(

// Handle deviant casreps
let (result, before, after) = if !deviants.is_empty() {
// Copy casrep to tmp dir
let deviant_dir = format!("{}/deviant", &oldpath.display());
fs::create_dir_all(&deviant_dir)?;
for casrep in deviants {
fs::copy(
casrep,
format!(
"{}/{}",
&deviant_dir,
&casrep.file_name().unwrap().to_str().unwrap()
),
)?;
}
// Cluster deviant casreps
let (result, before, after) =
make_clusters(Path::new(&deviant_dir), Some(oldpath), jobs, dedup, max)?;
let _ = fs::remove_dir_all(&deviant_dir);
(result, before, after)
// Get clusters from deviants
let (deviant_clusters, before, after) = gen_clusters(&deviants, max, dedup)?;
// Save deviant clusters
util::save_clusters(&deviant_clusters, oldpath)?;
(deviant_clusters.len(), before, after)
} else {
(0, 0, 0)

Check warning on line 462 in casr/src/bin/casr-cluster.rs

View check run for this annotation

Codecov / codecov/patch

casr/src/bin/casr-cluster.rs#L462

Added line #L462 was not covered by tests
};
Expand Down
29 changes: 27 additions & 2 deletions casr/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extern crate libcasr;

use libcasr::report::CrashReport;
use libcasr::stacktrace::{
Stacktrace, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES,
Cluster, Stacktrace, STACK_FRAME_FILEPATH_IGNORE_REGEXES, STACK_FRAME_FUNCTION_IGNORE_REGEXES,
};

use anyhow::{bail, Context, Result};
Expand All @@ -14,7 +14,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
use simplelog::*;
use wait_timeout::ChildExt;

use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::fs::{self, OpenOptions};
use std::io::Write;
use std::io::{BufRead, BufReader};
Expand Down Expand Up @@ -493,3 +493,28 @@ pub fn reports_from_paths(

(casreps, stacktraces, crashlines, badreports)
}

/// Save clusters to directory
///
/// # Arguments
///
/// * `clusters` - given `Cluster` structures for saving
///
/// * `dir` - out directory
pub fn save_clusters(clusters: &HashMap<usize, Cluster>, dir: &Path) -> Result<()> {
for cluster in clusters.values() {
fs::create_dir_all(format!("{}/cl{}", &dir.display(), cluster.number))?;
for casrep in cluster.paths() {
fs::copy(
casrep,
format!(
"{}/cl{}/{}",
&dir.display(),
cluster.number,
&casrep.file_name().unwrap().to_str().unwrap()
),
)?;
}
}
Ok(())
}
81 changes: 79 additions & 2 deletions libcasr/src/stacktrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use kodama::{linkage, Method};
use regex::Regex;
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Write};
use std::path::PathBuf;
use std::sync::RwLock;

// Re-export types from gdb_command for convenient use from Casr library
Expand Down Expand Up @@ -77,6 +78,8 @@ pub enum ToleranceLevel {
pub struct Cluster {
/// Cluster number
pub number: usize,
/// Cluster report paths
paths: Vec<PathBuf>,
/// Cluster report stacktraces
stacktraces: Vec<Stacktrace>,
/// Cluster diameter
Expand All @@ -87,16 +90,26 @@ pub struct Cluster {

impl Cluster {
/// Create new `Cluster`
pub fn new(number: usize, stacktraces: Vec<Stacktrace>, crashlines: Vec<String>) -> Self {
pub fn new(
number: usize,
paths: Vec<PathBuf>,
stacktraces: Vec<Stacktrace>,
crashlines: Vec<String>,
) -> Self {
let mut unique_crashlines: HashSet<String> = HashSet::new();
unique_crashlines.extend(crashlines);
Cluster {
number,
paths,
stacktraces,
diam: None,
crashlines: unique_crashlines,
}
}
/// Get CASR report paths
pub fn paths(&self) -> &Vec<PathBuf> {
&self.paths
}
/// Get CASR report stactraces
pub fn stacktraces(&self) -> &Vec<Stacktrace> {
&self.stacktraces
Expand All @@ -115,10 +128,17 @@ impl Cluster {
///
/// `true` if new CASR report may be added,
/// `false` if report is duplicate of someone else
pub fn insert(&mut self, stacktrace: Stacktrace, crashline: String, dedup: bool) -> bool {
pub fn insert(
&mut self,
path: PathBuf,
stacktrace: Stacktrace,
crashline: String,
dedup: bool,
) -> bool {
if dedup && !crashline.is_empty() && !self.crashlines.insert(crashline.to_string()) {
return false;

Check warning on line 139 in libcasr/src/stacktrace.rs

View check run for this annotation

Codecov / codecov/patch

libcasr/src/stacktrace.rs#L139

Added line #L139 was not covered by tests
}
self.paths.push(path);
self.stacktraces.push(stacktrace);
self.diam = None;
true
Expand Down Expand Up @@ -186,6 +206,63 @@ impl Cluster {
}
}

// TODO: Write a better description...
// NOTE: It's just interlayer between `Cluster` and `cluster_stacktrace` fn
/// Generate clusters from CASR report info
///
/// # Arguments
///
/// * `reports` - slice of report info: path, stacktrace, crashline
///
/// * `offset` - cluster enumerate offset
///
/// * `dedup` - deduplicate crashline, if true
///
/// # Return value
///
/// * `HashMap` of `Cluster`
/// * Number of valid casreps before crashiline deduplication
/// * Number of valid casreps after crashiline deduplication
pub fn gen_clusters(
reports: &[(&PathBuf, (Stacktrace, String))],
offset: usize,
dedup: bool,
) -> Result<(HashMap<usize, Cluster>, usize, usize)> {
// Unzip casrep info
let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) =
reports.iter().cloned().unzip();
let len = casreps.len();
// Get stacktraces cluster numbers
let mut numbers = cluster_stacktraces(&stacktraces)?;
// Deduplicate by crashiline
let after = if dedup {
dedup_crashlines(&crashlines, &mut numbers)
} else {
len

Check warning on line 241 in libcasr/src/stacktrace.rs

View check run for this annotation

Codecov / codecov/patch

libcasr/src/stacktrace.rs#L241

Added line #L241 was not covered by tests
};
// Create clusters
let mut clusters: HashMap<usize, Cluster> = HashMap::new();
for i in 0..len {
if numbers[i] == 0 {
// Skip casreps with duplicate crashlines
continue;
}
let number = numbers[i] + offset;
// Add new cluster if not exists
clusters
.entry(number)
.or_insert_with(|| Cluster::new(number, Vec::new(), Vec::new(), Vec::new()));
// Update cluster
clusters.get_mut(&number).unwrap().insert(
casreps[i].to_path_buf(),
stacktraces[i].to_vec(),
crashlines[i].to_string(),
dedup,
);
}
Ok((clusters, len, after))
}

/// This macro updates variables used to remove trusted functions from stack trace
#[macro_export]
macro_rules! init_ignored_frames {
Expand Down

0 comments on commit ed79279

Please sign in to comment.