From 3190be0023710e38e58799ab57899fa0facd8304 Mon Sep 17 00:00:00 2001 From: Ryan Wick Date: Tue, 20 Jul 2021 11:19:31 +1000 Subject: [PATCH] Expanded CIGAR --- Cargo.lock | 2 ++ Cargo.toml | 2 ++ src/alignment.rs | 56 +++++++++++++++++++++++++++++++++++++++++------- src/main.rs | 2 +- 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 22eb9bb..695be87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -250,7 +250,9 @@ dependencies = [ "clap", "colored", "flate2", + "lazy_static", "num-format", + "regex", "term_size", "textwrap 0.14.2", ] diff --git a/Cargo.toml b/Cargo.toml index 641d594..f82cfec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,5 @@ term_size = "0.3" num-format = "0.4" flate2 = "1.0" clap = { version = "3.0.0-beta.2", features = ["wrap_help"] } +regex = "1" +lazy_static = "1.4.0" diff --git a/src/alignment.rs b/src/alignment.rs index ef9f92b..84d80bd 100644 --- a/src/alignment.rs +++ b/src/alignment.rs @@ -11,6 +11,7 @@ use crate::pileup::Pileup; use crate::misc::quit_with_error; + use std::path::PathBuf; use std::collections::HashMap; use std::fs::File; @@ -18,6 +19,14 @@ use std::io; use std::io::{prelude::*, BufReader}; use std::result::Result; +use regex::Regex; +use lazy_static::lazy_static; + + +lazy_static! { + static ref RE: Regex = Regex::new(r"\d+[MIDNSHP=X]").unwrap(); +} + #[derive(Debug)] pub struct Alignment { @@ -26,11 +35,11 @@ pub struct Alignment { sam_flags: i32, ref_start: i64, cigar: String, + expanded_cigar: String, read_seq: String, mismatches: i32, } - impl Alignment { fn new(sam_line: &str) -> Result { let parts = sam_line.split('\t').collect::>(); @@ -62,15 +71,33 @@ impl Alignment { sam_flags: sam_flags, ref_start: ref_start, cigar: cigar.to_string(), + expanded_cigar: get_expanded_cigar(&cigar), read_seq: read_seq.to_string(), mismatches: mismatches, }) } + + fn is_aligned(&self) -> bool { + (self.sam_flags & 4) == 0 + } + + fn is_on_forward_strand(&self) -> bool { + (self.sam_flags & 16) == 0 + } + + fn is_secondary(&self) -> bool { + (self.sam_flags & 256) == 256 + } + + fn starts_and_ends_with_match(&self) -> bool { + self.expanded_cigar.chars().next().unwrap() == 'M' && + self.expanded_cigar.chars().last().unwrap() == 'M' + } } -pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap) { - let result = add_to_pileup(filename, pileups); +pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap, max_errors: i32) { + let result = add_to_pileup(filename, pileups, max_errors); match result { Ok((_, _)) => ( ), Err(_) => quit_with_error(&format!("unable to load alignments from {:?}", filename)), @@ -80,8 +107,8 @@ pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap) { } -pub fn add_to_pileup(filename: &PathBuf, - pileups: &mut HashMap) -> io::Result<(usize, usize)> { +pub fn add_to_pileup(filename: &PathBuf, pileups: &mut HashMap, + max_errors: i32) -> io::Result<(usize, usize)> { let file = File::open(&filename)?; let reader = BufReader::new(file); @@ -112,13 +139,13 @@ pub fn add_to_pileup(filename: &PathBuf, if current_read_name.is_empty() || current_read_name == alignment.read_name { current_read_alignments.push(alignment); } else { - used_count += process_one_read(current_read_alignments, pileups); + used_count += process_one_read(current_read_alignments, pileups, max_errors); current_read_alignments = Vec::new(); current_read_alignments.push(alignment); } current_read_name = read_name; } - used_count += process_one_read(current_read_alignments, pileups); + used_count += process_one_read(current_read_alignments, pileups, max_errors); if alignment_count == 0 { quit_with_error(&format!("no alignments in {:?}", filename)) @@ -127,7 +154,8 @@ pub fn add_to_pileup(filename: &PathBuf, } -fn process_one_read(alignments: Vec, pileups: &mut HashMap) -> usize { +fn process_one_read(alignments: Vec, pileups: &mut HashMap, + max_errors: i32) -> usize { // TODO // TODO // TODO @@ -144,5 +172,17 @@ fn process_one_read(alignments: Vec, pileups: &mut HashMap String { + let mut expanded_cigar = String::new(); + for m in RE.find_iter(cigar) { + let num: i32 = cigar[m.start()..m.end()-1].parse().unwrap(); + let letter = &cigar[m.end()-1..m.end()]; + for _ in 0..num { + expanded_cigar.push_str(letter); + } + } + expanded_cigar +} + // TODO: define reverse complement function diff --git a/src/main.rs b/src/main.rs index 205b1f9..9627631 100644 --- a/src/main.rs +++ b/src/main.rs @@ -57,7 +57,7 @@ fn main() { starting_message(&opts); let (seq_names, mut pileups) = load_assembly(&opts.assembly); for s in &opts.sam { - alignment::process_sam(&s, &mut pileups); + alignment::process_sam(&s, &mut pileups, opts.max_errors); } // TEMP