Skip to content

Commit

Permalink
Expanded CIGAR
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Jul 20, 2021
1 parent 65b6871 commit 3190be0
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 9 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ term_size = "0.3"
num-format = "0.4"
flate2 = "1.0"
clap = { version = "3.0.0-beta.2", features = ["wrap_help"] }
regex = "1"
lazy_static = "1.4.0"
56 changes: 48 additions & 8 deletions src/alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@

use crate::pileup::Pileup;
use crate::misc::quit_with_error;

use std::path::PathBuf;
use std::collections::HashMap;
use std::fs::File;
use std::io;
use std::io::{prelude::*, BufReader};
use std::result::Result;

use regex::Regex;
use lazy_static::lazy_static;


lazy_static! {
static ref RE: Regex = Regex::new(r"\d+[MIDNSHP=X]").unwrap();
}


#[derive(Debug)]
pub struct Alignment {
Expand All @@ -26,11 +35,11 @@ pub struct Alignment {
sam_flags: i32,
ref_start: i64,
cigar: String,
expanded_cigar: String,
read_seq: String,
mismatches: i32,
}


impl Alignment {
fn new(sam_line: &str) -> Result<Alignment, &str> {
let parts = sam_line.split('\t').collect::<Vec<&str>>();
Expand Down Expand Up @@ -62,15 +71,33 @@ impl Alignment {
sam_flags: sam_flags,
ref_start: ref_start,
cigar: cigar.to_string(),
expanded_cigar: get_expanded_cigar(&cigar),
read_seq: read_seq.to_string(),
mismatches: mismatches,
})
}

fn is_aligned(&self) -> bool {
(self.sam_flags & 4) == 0
}

fn is_on_forward_strand(&self) -> bool {
(self.sam_flags & 16) == 0
}

fn is_secondary(&self) -> bool {
(self.sam_flags & 256) == 256
}

fn starts_and_ends_with_match(&self) -> bool {
self.expanded_cigar.chars().next().unwrap() == 'M' &&
self.expanded_cigar.chars().last().unwrap() == 'M'
}
}


pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap<String, Pileup>) {
let result = add_to_pileup(filename, pileups);
pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap<String, Pileup>, max_errors: i32) {
let result = add_to_pileup(filename, pileups, max_errors);
match result {
Ok((_, _)) => ( ),
Err(_) => quit_with_error(&format!("unable to load alignments from {:?}", filename)),
Expand All @@ -80,8 +107,8 @@ pub fn process_sam(filename: &PathBuf, pileups: &mut HashMap<String, Pileup>) {
}


pub fn add_to_pileup(filename: &PathBuf,
pileups: &mut HashMap<String, Pileup>) -> io::Result<(usize, usize)> {
pub fn add_to_pileup(filename: &PathBuf, pileups: &mut HashMap<String, Pileup>,
max_errors: i32) -> io::Result<(usize, usize)> {
let file = File::open(&filename)?;
let reader = BufReader::new(file);

Expand Down Expand Up @@ -112,13 +139,13 @@ pub fn add_to_pileup(filename: &PathBuf,
if current_read_name.is_empty() || current_read_name == alignment.read_name {
current_read_alignments.push(alignment);
} else {
used_count += process_one_read(current_read_alignments, pileups);
used_count += process_one_read(current_read_alignments, pileups, max_errors);
current_read_alignments = Vec::new();
current_read_alignments.push(alignment);
}
current_read_name = read_name;
}
used_count += process_one_read(current_read_alignments, pileups);
used_count += process_one_read(current_read_alignments, pileups, max_errors);

if alignment_count == 0 {
quit_with_error(&format!("no alignments in {:?}", filename))
Expand All @@ -127,7 +154,8 @@ pub fn add_to_pileup(filename: &PathBuf,
}


fn process_one_read(alignments: Vec<Alignment>, pileups: &mut HashMap<String, Pileup>) -> usize {
fn process_one_read(alignments: Vec<Alignment>, pileups: &mut HashMap<String, Pileup>,
max_errors: i32) -> usize {
// TODO
// TODO
// TODO
Expand All @@ -144,5 +172,17 @@ fn process_one_read(alignments: Vec<Alignment>, pileups: &mut HashMap<String, Pi
}


fn get_expanded_cigar(cigar: &str) -> String {
let mut expanded_cigar = String::new();
for m in RE.find_iter(cigar) {
let num: i32 = cigar[m.start()..m.end()-1].parse().unwrap();
let letter = &cigar[m.end()-1..m.end()];
for _ in 0..num {
expanded_cigar.push_str(letter);
}
}
expanded_cigar
}


// TODO: define reverse complement function
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn main() {
starting_message(&opts);
let (seq_names, mut pileups) = load_assembly(&opts.assembly);
for s in &opts.sam {
alignment::process_sam(&s, &mut pileups);
alignment::process_sam(&s, &mut pileups, opts.max_errors);
}

// TEMP
Expand Down

0 comments on commit 3190be0

Please sign in to comment.