Skip to content

Commit

Permalink
feat: hidden devloper option to strip basemods in the first or last x bp
Browse files Browse the repository at this point in the history
  • Loading branch information
Mitchell R. Vollger committed Nov 18, 2024
1 parent ce5f66f commit 48eaec4
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/fiber.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ impl FiberseqData {
};

// get fiberseq basemods
let base_mods = BaseMods::new(&record, filters.min_ml_score);
let mut base_mods = BaseMods::new(&record, filters.min_ml_score);
base_mods.filter_at_read_ends(filters.strip_starting_basemods);

//let (m6a, cpg) = FiberMods::new(&base_mods);
let m6a = base_mods.m6a();
let cpg = base_mods.cpg();
Expand Down
38 changes: 38 additions & 0 deletions src/utils/bamranges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,44 @@ impl Ranges {
self.reference_lengths = to_keep.iter().map(|&i| self.reference_lengths[i]).collect();
}

/// filter out ranges that are within the first or last X bp of the read
pub fn filter_starts_at_read_ends(&mut self, strip: i64) {
if strip == 0 {
return;
}
let to_keep = self
.starts
.iter()
.enumerate()
.filter_map(|(i, &s)| {
if let Some(s) = s {
if s < strip || s > self.seq_len - strip {
None
} else {
Some(i)
}
} else {
None
}
})
.collect::<Vec<_>>();

if to_keep.len() != self.starts.len() {
log::trace!(
"basemods stripped, {} basemods removed",
self.starts.len() - to_keep.len()
);
}

self.starts = to_keep.iter().map(|&i| self.starts[i]).collect();
self.ends = to_keep.iter().map(|&i| self.ends[i]).collect();
self.lengths = to_keep.iter().map(|&i| self.lengths[i]).collect();
self.qual = to_keep.iter().map(|&i| self.qual[i]).collect();
self.reference_starts = to_keep.iter().map(|&i| self.reference_starts[i]).collect();
self.reference_ends = to_keep.iter().map(|&i| self.reference_ends[i]).collect();
self.reference_lengths = to_keep.iter().map(|&i| self.reference_lengths[i]).collect();
}

pub fn to_strings(&self, reference: bool, skip_none: bool) -> Vec<String> {
let (s, e, l, q) = if reference {
(
Expand Down
17 changes: 17 additions & 0 deletions src/utils/basemods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ impl BaseMod {
pub fn is_cpg(&self) -> bool {
self.modification_type == 'm'
}

pub fn filter_at_read_ends(&mut self, n_strip: i64) {
if n_strip <= 0 {
return;
}
self.ranges.filter_starts_at_read_ends(n_strip);
}
}

#[derive(Eq, PartialEq, Debug, Clone)]
Expand Down Expand Up @@ -257,6 +264,16 @@ impl BaseMods {
.for_each(|bm| bm.ranges.filter_by_qual(min_ml_score));
}

/// filter the basemods at the read ends
pub fn filter_at_read_ends(&mut self, n_strip: i64) {
if n_strip <= 0 {
return;
}
self.base_mods
.iter_mut()
.for_each(|bm| bm.filter_at_read_ends(n_strip));
}

/// combine the forward and reverse m6a data
pub fn m6a(&self) -> Ranges {
let ranges = self
Expand Down
10 changes: 10 additions & 0 deletions src/utils/input_bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ pub struct FiberFilters {
/// Minium score in the ML tag to use or include in the output
#[clap(long="ml", alias="min-ml-score", default_value = MIN_ML_SCORE, help_heading = "BAM-Options", env="FT_MIN_ML_SCORE")]
pub min_ml_score: u8,
/// strip basemods in the first or last X bp of the read
#[clap(
global = true,
long,
default_value = "0",
help_heading = "BAM-Options",
hide = true
)]
pub strip_starting_basemods: i64,
}

impl std::default::Default for FiberFilters {
Expand All @@ -48,6 +57,7 @@ impl std::default::Default for FiberFilters {
bit_flag: 0,
min_ml_score: MIN_ML_SCORE.parse().unwrap(),
filter_expression: None,
strip_starting_basemods: 0,
}
}
}
Expand Down

0 comments on commit 48eaec4

Please sign in to comment.