Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ID, motifs, and motif counts to VCF #2

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ mod tests {
chrom: "chr1".to_string(),
start: 1,
end: 100,
id: "".to_string(),
motifs: "CA".to_string(),
},
);
println!("Consensus: {}", cons.seq.unwrap());
Expand Down
12 changes: 12 additions & 0 deletions src/genotype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: String::from("test-repeat"),
motifs: "CA".to_string(),
};
let flanking = 2000;
let minlen = 5;
Expand Down Expand Up @@ -369,6 +371,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let args = Cli {
bam: String::from("test_data/small-test-phased.bam"),
Expand Down Expand Up @@ -397,6 +401,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let args = Cli {
bam: String::from("test_data/small-test-phased.bam"),
Expand Down Expand Up @@ -441,6 +447,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down Expand Up @@ -469,6 +477,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down Expand Up @@ -498,6 +508,8 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down
32 changes: 32 additions & 0 deletions src/motif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ fn create_motif(seq: &str) -> String {
unimplemented!()
}

pub fn create_mc(motifs_string: &str, repeats: &str) -> String {
let motifs: Vec<String> = motifs_string.split(',').map(|s| s.to_string()).collect();

let mut motif_counts = vec![0; motifs.len()];
for motif in motifs.iter() {
let mut start = 0;
while let Some(index) = repeats[start..].find(motif) {
motif_counts[motifs.iter().position(|x| x == motif).unwrap()] += 1;
start += index + motif.len();
}
}
motif_counts.iter().map(|count| count.to_string()).collect::<Vec<String>>().join("_")
}

Comment on lines +8 to +21
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will not work when the motif contains N though...

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -17,4 +31,22 @@ mod tests {
"(CAG)4(CGG)3(CAG)3"
);
}
#[test]
fn test_create_mc() {
let motifs = "ATG,CGT,GCA";
let repeat = "ATGCGTATGCGTAGCGT";
println!("{:?}", create_mc(&motifs, &repeat));
assert_eq!(
create_mc(&motifs, repeat), "2_3_0"
);
}
#[test]
fn test_create_mc_with_n() {
let motifs = "NCG";
let repeat = "ACGACGACGACG";

assert_eq!(
create_mc(&motifs, repeat), "4"
);
}
}
10 changes: 10 additions & 0 deletions src/parse_bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ fn test_get_overlapping_reads() {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand All @@ -157,6 +159,8 @@ fn test_get_overlapping_reads_url1() {
chrom: String::from("chr20"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand All @@ -171,6 +175,8 @@ fn test_get_overlapping_reads_url2() {
chrom: String::from("chr20"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand All @@ -185,6 +191,8 @@ fn test_get_overlapping_reads_url3() {
chrom: String::from("chr20"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand All @@ -199,6 +207,8 @@ fn test_get_overlapping_reads_url4() {
chrom: String::from("chr20"),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand Down
12 changes: 12 additions & 0 deletions src/phase_insertions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down Expand Up @@ -372,6 +374,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down Expand Up @@ -411,6 +415,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down Expand Up @@ -453,6 +459,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down Expand Up @@ -493,6 +501,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST_RFC1".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down Expand Up @@ -564,6 +574,8 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
id: "TEST_RFC1".to_string(),
motifs: "CA".to_string(),
},
false,
);
Expand Down
33 changes: 28 additions & 5 deletions src/repeats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ impl RepeatIntervalIterator {
let end: u32 = interval.split('-').collect::<Vec<&str>>()[1]
.parse()
.unwrap();
let repeat = RepeatInterval::new_interval(chrom, start, end, fasta)
let id = String::new();
let motifs = String::new();
let repeat = RepeatInterval::new_interval(chrom, start, end, fasta, id, motifs)
.expect("Failed to create repeat interval");
RepeatIntervalIterator {
current_index: 0,
Expand Down Expand Up @@ -74,6 +76,8 @@ impl Clone for RepeatInterval {
chrom: self.chrom.clone(),
start: self.start,
end: self.end,
id: self.id.clone(),
motifs: self.motifs.clone(),
}
}
}
Expand All @@ -99,6 +103,8 @@ pub struct RepeatInterval {
pub chrom: String,
pub start: u32,
pub end: u32,
pub id: String,
pub motifs: String,
}

impl fmt::Display for RepeatInterval {
Expand All @@ -117,10 +123,25 @@ impl RepeatInterval {
let chrom = rec.chrom().to_string();
let start = rec.start().try_into().unwrap();
let end = rec.end().try_into().unwrap();
RepeatInterval::new_interval(chrom, start, end, fasta)
let mut id = String::new();
let mut motifs = String::new();
if let Some(info) = rec.name() {
let key_value_pairs: Vec<&str> = info.split(';').collect();
// Find and extract the value associated with the "ID" key
for pair in key_value_pairs {
if pair.starts_with("ID=") {
id = pair.trim_start_matches("ID=").to_string();
}
else if pair.starts_with("MOTIFS=") {
motifs = pair.trim_start_matches("MOTIFS=").to_string();
break;
}
}
}
RepeatInterval::new_interval(chrom, start, end, fasta, id, motifs)
}

fn new_interval(chrom: String, start: u32, end: u32, fasta: &str) -> Option<Self> {
fn new_interval(chrom: String, start: u32, end: u32, fasta: &str, id: String, motifs: String) -> Option<Self> {
if end < start {
panic!("End coordinate is smaller than start coordinate for {chrom}:{start}-{end}")
}
Expand All @@ -138,19 +159,21 @@ impl RepeatInterval {
.expect("Failed parsing chromosome length from fai file")
> end
{
return Some(Self { chrom, start, end });
return Some(Self { chrom, start, end, id, motifs});
}
}
// if the chromosome is not in the fai file or the end does not fit the interval, return None
panic!(
"Chromosome {chrom} is not in the fasta file or the end coordinate is out of bounds"
);
}
pub fn new(chrom: &str, start: u32, end: u32) -> Self {
pub fn new(chrom: &str, start: u32, end: u32, id: &str, motifs: &str) -> Self {
Self {
chrom: chrom.to_string(),
start,
end,
id: id.to_string(),
motifs: motifs.to_string(),
}
}

Expand Down
Loading
Loading