From f7b6df72388cb53da8d19b679cdcd420c7c7e765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 13:57:23 +0100 Subject: [PATCH 1/6] Changes for rust_htslib --- Cargo.toml | 2 +- src/bam/call_consensus_reads/mod.rs | 4 ++-- src/bam/call_consensus_reads/pipeline.rs | 6 +++--- src/bcf/baf.rs | 6 +++--- src/bcf/fix_iupac_alleles.rs | 4 ++-- src/bcf/match_variants.rs | 26 +++++++++++------------- src/bcf/to_txt.rs | 13 +++++------- 7 files changed, 28 insertions(+), 33 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3927d3c5..8df29853 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ indicatif = "0.11" itertools = "0.6" log = "0.4.6" fern = "0.5.7" -rust-htslib = "0.24" +rust-htslib = "0.26" csv = "1.0.2" rustc-serialize = "0.3" quick-error = "1.2" diff --git a/src/bam/call_consensus_reads/mod.rs b/src/bam/call_consensus_reads/mod.rs index 187b5784..0eead2e8 100644 --- a/src/bam/call_consensus_reads/mod.rs +++ b/src/bam/call_consensus_reads/mod.rs @@ -4,7 +4,7 @@ mod pipeline; use log::info; use pipeline::CallConsensusRead; use rust_htslib::bam; -use rust_htslib::bam::{Header, Read}; +use rust_htslib::bam::{Header, Read, Format}; use std::error::Error; pub fn call_consensus_reads_from_paths( @@ -16,7 +16,7 @@ pub fn call_consensus_reads_from_paths( info!("Reading input files:\n {}", bam_in); info!("Writing output to:\n {}", bam_out); let bam_reader = bam::Reader::from_path(bam_in)?; - let bam_writer = bam::Writer::from_path(bam_out, &Header::from_template(bam_reader.header()))?; + let bam_writer = bam::Writer::from_path(bam_out, &Header::from_template(bam_reader.header()), Format::BAM)?; CallConsensusRead::new(bam_reader, bam_writer, seq_dist, verbose_read_names) .call_consensus_reads() } diff --git a/src/bam/call_consensus_reads/pipeline.rs b/src/bam/call_consensus_reads/pipeline.rs index c4a1fadc..fc92864a 100644 --- a/src/bam/call_consensus_reads/pipeline.rs +++ b/src/bam/call_consensus_reads/pipeline.rs @@ -65,7 +65,7 @@ impl CallConsensusRead { Some(record_pair) => { //For right record save end position and duplicate group ID group_end_idx - .entry(record.cigar_cached().unwrap().end_pos()? - 1) + .entry(record.cigar_cached().unwrap().end_pos() - 1) .or_insert_with(HashSet::new) .insert(duplicate_id.integer()); match record_pair { @@ -86,7 +86,7 @@ impl CallConsensusRead { if !record.is_paired() || record.is_mate_unmapped() { //If right or single record save end position and duplicate group ID group_end_idx - .entry(record.cigar_cached().unwrap().end_pos()? - 1) + .entry(record.cigar_cached().unwrap().end_pos() - 1) .or_insert_with(HashSet::new) .insert(duplicate_id.integer()); record_storage.insert( @@ -314,7 +314,7 @@ pub fn calc_consensus_complete_groups( } fn calc_overlap(l_rec: &bam::Record, r_rec: &bam::Record) -> Result> { - let l_end_pos = l_rec.cigar_cached().unwrap().end_pos()?; + let l_end_pos = l_rec.cigar_cached().unwrap().end_pos(); let r_start_pos = r_rec.pos(); let l_softclips = count_softclips(l_rec.cigar_cached().unwrap().into_iter().rev())?; let r_softclips = count_softclips(r_rec.cigar_cached().unwrap().into_iter())?; diff --git a/src/bcf/baf.rs b/src/bcf/baf.rs index 0bedce2e..0207e56c 100644 --- a/src/bcf/baf.rs +++ b/src/bcf/baf.rs @@ -8,8 +8,8 @@ use itertools::repeat_n; use itertools::Itertools; use rust_htslib::bcf; -use rust_htslib::bcf::Read; -use rust_htslib::prelude::*; +use rust_htslib::bcf::record::Numeric; +use rust_htslib::bcf::{Read, Format}; use std::error::Error; use std::f32; @@ -19,7 +19,7 @@ pub fn calculate_baf() -> Result<(), Box> { let mut header = bcf::Header::from_template(reader.header()); header.push_record(b"##FORMAT="); - let mut writer = bcf::Writer::from_stdout(&header, false, false)?; + let mut writer = bcf::Writer::from_stdout(&header, false, Format::BCF)?; for record in reader.records() { let mut record = record?; diff --git a/src/bcf/fix_iupac_alleles.rs b/src/bcf/fix_iupac_alleles.rs index f968b640..e96b077e 100644 --- a/src/bcf/fix_iupac_alleles.rs +++ b/src/bcf/fix_iupac_alleles.rs @@ -2,11 +2,11 @@ use std::error::Error; use itertools::Itertools; use bio::alphabets::dna::n_alphabet; -use rust_htslib::bcf::{self, Read}; +use rust_htslib::bcf::{self, Read, Format}; pub fn fix_iupac_alleles() -> Result<(), Box> { let mut inbcf = bcf::Reader::from_stdin()?; - let mut outbcf = bcf::Writer::from_stdout(&bcf::Header::from_template(inbcf.header()), false, false)?; + let mut outbcf = bcf::Writer::from_stdout(&bcf::Header::from_template(inbcf.header()), false, Format::BCF)?; let valid_alphabet = n_alphabet(); for res in inbcf.records() { diff --git a/src/bcf/match_variants.rs b/src/bcf/match_variants.rs index d3308420..e138a02f 100644 --- a/src/bcf/match_variants.rs +++ b/src/bcf/match_variants.rs @@ -13,7 +13,7 @@ use itertools::Itertools; use log::{info, warn}; use quick_error::quick_error; use rust_htslib::bcf; -use rust_htslib::bcf::Read; +use rust_htslib::bcf::{Read, Format}; use std::collections::{btree_map, BTreeMap, HashMap}; use std::error::Error; use std::str; @@ -29,12 +29,11 @@ impl VarIndex { let mut i = 0; let mut rec = reader.empty_record(); loop { - if let Err(e) = reader.read(&mut rec) { - if e.is_eof() { - break; - } - return Err(Box::new(e)); - } + match reader.read(&mut rec) { + Ok(true) => (), + Ok(false) => break, + Err(e) => return Err(Box::new(e)) + }; if let Some(rid) = rec.rid() { let chrom = reader.header().rid2name(rid)?; let recs = inner.entry(chrom.to_owned()).or_insert(BTreeMap::new()); @@ -74,18 +73,17 @@ pub fn match_variants( alternative allele separately). For indels, matching is fuzzy: distance of centres <= {}, difference of \ lengths <= {}\">", max_dist, max_len_diff).as_bytes() ); - let mut outbcf = bcf::Writer::from_path(&"-", &header, false, false)?; + let mut outbcf = bcf::Writer::from_path(&"-", &header, false, Format::BCF)?; let index = VarIndex::new(bcf::Reader::from_path(matchbcf)?, max_dist)?; let mut rec = inbcf.empty_record(); let mut i = 0; loop { - if let Err(e) = inbcf.read(&mut rec) { - if e.is_eof() { - break; - } - return Err(Box::new(e)); - } + match inbcf.read(&mut rec) { + Ok(true) => (), + Ok(false) => break, + Err(e) => return Err(Box::new(e)) + }; outbcf.translate(&mut rec); if let Some(rid) = rec.rid() { diff --git a/src/bcf/to_txt.rs b/src/bcf/to_txt.rs index 57a33ee8..15c1f298 100644 --- a/src/bcf/to_txt.rs +++ b/src/bcf/to_txt.rs @@ -108,14 +108,11 @@ pub fn to_txt( writer.newline()?; let mut rec = reader.empty_record(); loop { - if let Err(e) = reader.read(&mut rec) { - if e.is_eof() { - break; - } else { - return Err(Box::new(e)); - } - } - + match reader.read(&mut rec) { + Ok(true) => (), + Ok(false) => break, + Err(e) => return Err(Box::new(e)) + }; let alleles = rec .alleles() .into_iter() From 7744ba8f10f41e80dfd4091f86f4f160991a4322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 13:58:36 +0100 Subject: [PATCH 2/6] Changes for rust_htslib --- src/bcf/annotate_dgidb.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bcf/annotate_dgidb.rs b/src/bcf/annotate_dgidb.rs index 496e9273..fbbbc13a 100644 --- a/src/bcf/annotate_dgidb.rs +++ b/src/bcf/annotate_dgidb.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use regex::Regex; use reqwest; use rust_htslib::bcf; -use rust_htslib::bcf::Read; +use rust_htslib::bcf::{Read, Format}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::error::Error; @@ -122,7 +122,7 @@ fn modify_vcf_entries( let mut reader = bcf::Reader::from_path(vcf_path)?; let mut header = bcf::header::Header::from_template(reader.header()); header.push_record(format!("##INFO=", field_name).as_bytes()); - let mut writer = bcf::Writer::from_stdout(&header, true, true)?; + let mut writer = bcf::Writer::from_stdout(&header, true, Format::VCF)?; match gene_drug_interactions_opt { None => { for result in reader.records() { From 053bff11d7adf5b381836660ab474f10b49c5433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 14:51:05 +0100 Subject: [PATCH 3/6] Fixed redundancy in dgidb annotation --- src/bcf/annotate_dgidb.rs | 4 ++-- tests/expected/annotate_dgidb_test.vcf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bcf/annotate_dgidb.rs b/src/bcf/annotate_dgidb.rs index fbbbc13a..50570495 100644 --- a/src/bcf/annotate_dgidb.rs +++ b/src/bcf/annotate_dgidb.rs @@ -97,13 +97,12 @@ fn collect_genes(vcf_path: &str) -> Result, Box> { Ok(total_genes) } -//TODO Remove split by ',' when updating to latest htslib release fn extract_genes<'a>( rec: &'a mut bcf::Record, ) -> Result + 'a>, Box> { let annotation = rec.info("ANN".as_bytes()).string()?; match annotation { - Some(transcripts) => Ok(Some(transcripts[0].split(|c| *c == b',').map( + Some(transcripts) => Ok(Some(transcripts.into_iter().map( |transcript| { str::from_utf8(transcript.split(|c| *c == b'|').nth(3).unwrap()) .unwrap() @@ -137,6 +136,7 @@ fn modify_vcf_entries( writer.translate(&mut rec); let genes = extract_genes(&mut rec)?.map(|genes| genes.collect_vec()); if let Some(mut genes) = genes { + genes.sort(); genes.dedup(); let field_entries = build_dgidb_field(&gene_drug_interactions, genes)?; let field_entries: Vec<&[u8]> = diff --git a/tests/expected/annotate_dgidb_test.vcf b/tests/expected/annotate_dgidb_test.vcf index 1e2769e3..b1dd5c15 100644 --- a/tests/expected/annotate_dgidb_test.vcf +++ b/tests/expected/annotate_dgidb_test.vcf @@ -60,4 +60,4 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT normal tumor -8 27461737 . C G . . SVLEN=.;PROB_SOMATIC_NORMAL=18.3288;PROB_ABSENT=987.672;PROB_GERMLINE_HET=349.144;PROB_ARTIFACT=272.885;PROB_SOMATIC_TUMOR_MEDIUM=0.168458;PROB_SOMATIC_TUMOR_HIGH=16.3168;PROB_SOMATIC_TUMOR_LOW=59.5167;PROB_GERMLINE_HOM=5489.1;ANN=G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000240132.7|protein_coding|7/7|c.1437G>C|p.Lys479Asn|1601/1735|1437/1545|479/514||,G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000407991.2|protein_coding|7/7|c.1482G>C|p.Lys494Asn|2091/4067|1482/1590|494/529||,G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000520933.7|protein_coding|7/7|c.1416G>C|p.Lys472Asn|1858/2497|1416/1524|472/507||,G|3_prime_UTR_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000523695.5|nonsense_mediated_decay|7/7|c.*884G>C|||||2108|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000346049.9|protein_coding||c.*3228C>G|||||2351|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000397501.5|protein_coding||c.*3228C>G|||||2347|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000517339.5|protein_coding||c.*3228C>G|||||2351|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000522245.1|retained_intron||n.*3263C>G|||||3263|,G|downstream_gene_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000522008.1|retained_intron||n.*1891G>C|||||1891|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000420218.3|protein_coding||c.*3228C>G|||||2346|,G|downstream_gene_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000637241.1|nonsense_mediated_decay||c.*7916G>C|||||1654|,G|non_coding_transcript_exon_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000520600.1|retained_intron|2/2|n.307G>C||||||;dbNSFP_DEOGEN2_score=0.805964,.,.;dbNSFP_Ensembl_proteinid=ENSP00000385026,ENSP00000429616,ENSP00000240132;dbNSFP_Ensembl_transcriptid=ENST00000407991,ENST00000520933,ENST00000240132;dbNSFP_FATHMM_score=-0.7,.,-0.7;dbNSFP_LRT_score=0;dbNSFP_M_CAP_score=0.225545;dbNSFP_MPC_score=0.77049,.,.;dbNSFP_MVP_score=0.925048,0.925048,0.925048;dbNSFP_MetaLR_score=0.5717;dbNSFP_MetaSVM_score=0.1915;dbNSFP_MutPred_score=0.803;dbNSFP_MutationAssessor_score=3.385,.,.;dbNSFP_MutationTaster_score=0.999992,0.999992,0.999992;dbNSFP_PROVEAN_score=-4.7,.,-4.7;dbNSFP_Polyphen2_HDIV_score=0.972,.,.;dbNSFP_Polyphen2_HVAR_score=0.968,.,.;dbNSFP_PrimateAI_score=0.582399;dbNSFP_REVEL_score=0.629;dbNSFP_SIFT4G_score=0.011,0.01,0.01;dbNSFP_SIFT_score=0.001,.,0;dbNSFP_Uniprot_acc=Q15822,A0A0X1KG79,Q15822-2;dbNSFP_Uniprot_entry=ACHA2_HUMAN,A0A0X1KG79_HUMAN,ACHA2_HUMAN;dbNSFP_VEST4_score=0.431,0.426,0.641;dgiDB_drugs=CHRNA2|METOCURINE IODIDE|antagonist,CHRNA2|GALLAMINE TRIETHIODIDE|antagonist,CHRNA2|MECAMYLAMINE HYDROCHLORIDE|inhibitor,CHRNA2|PROCAINE|antagonist,CHRNA2|ATRACURIUM BESYLATE|inhibitor,CHRNA2|DOXACURIUM CHLORIDE|antagonist,CHRNA2|TUBOCURARINE|antagonist,CHRNA2|MIVACURIUM|antagonist,CHRNA2|DECAMETHONIUM|antagonist,CHRNA2|DECAMETHONIUM|partial agonist,CHRNA2|METOCURINE|antagonist,CHRNA2|PIPECURONIUM|antagonist,CHRNA2|EPIBATIDINE|agonist,CHRNA2|CP-601927|agonist,CHRNA2|HEXAMETHONIUM|channel blocker,CHRNA2|ATRACURIUM|.,CHRNA2|CISATRACURIUM BESYLATE|.,CHRNA2|DOXACURIUM|.,CHRNA2|CARBACHOL|antagonist,CHRNA2|ROCURONIUM|antagonist,CHRNA2|PANCURONIUM|.,CHRNA2|VECURONIUM|antagonist,CHRNA2|GANTACURIUM|.,CHRNA2|NICOTINE|.,CHRNA2|BIPERIDEN|.,CHRNA2|LEVALLORPHAN|antagonist,PTK2B|LEFLUNOMIDE|antagonist,PTK2B|GENISTEIN|.,PTK2B|CHEMBL509485|.,PTK2B|ALOISINE|inhibitor,PTK2B|CHEMBL458997|inhibitor,PTK2B|PF-562271|inhibitor,PTK2B|RUXOLITINIB PHOSPHATE|inhibitor,PTK2B|PIBOSEROD|inhibitor,PTK2B|DEFACTINIB|inhibitor,CHRNA2|METOCURINE IODIDE|antagonist,CHRNA2|GALLAMINE TRIETHIODIDE|antagonist,CHRNA2|MECAMYLAMINE HYDROCHLORIDE|inhibitor,CHRNA2|PROCAINE|antagonist,CHRNA2|ATRACURIUM BESYLATE|inhibitor,CHRNA2|DOXACURIUM CHLORIDE|antagonist,CHRNA2|TUBOCURARINE|antagonist,CHRNA2|MIVACURIUM|antagonist,CHRNA2|DECAMETHONIUM|antagonist,CHRNA2|DECAMETHONIUM|partial agonist,CHRNA2|METOCURINE|antagonist,CHRNA2|PIPECURONIUM|antagonist,CHRNA2|EPIBATIDINE|agonist,CHRNA2|CP-601927|agonist,CHRNA2|HEXAMETHONIUM|channel blocker,CHRNA2|ATRACURIUM|.,CHRNA2|CISATRACURIUM BESYLATE|.,CHRNA2|DOXACURIUM|.,CHRNA2|CARBACHOL|antagonist,CHRNA2|ROCURONIUM|antagonist,CHRNA2|PANCURONIUM|.,CHRNA2|VECURONIUM|antagonist,CHRNA2|GANTACURIUM|.,CHRNA2|NICOTINE|.,CHRNA2|BIPERIDEN|.,CHRNA2|LEVALLORPHAN|antagonist,PTK2B|LEFLUNOMIDE|antagonist,PTK2B|GENISTEIN|.,PTK2B|CHEMBL509485|.,PTK2B|ALOISINE|inhibitor,PTK2B|CHEMBL458997|inhibitor,PTK2B|PF-562271|inhibitor,PTK2B|RUXOLITINIB PHOSPHATE|inhibitor,PTK2B|PIBOSEROD|inhibitor,PTK2B|DEFACTINIB|inhibitor,CHRNA2|METOCURINE IODIDE|antagonist,CHRNA2|GALLAMINE TRIETHIODIDE|antagonist,CHRNA2|MECAMYLAMINE HYDROCHLORIDE|inhibitor,CHRNA2|PROCAINE|antagonist,CHRNA2|ATRACURIUM BESYLATE|inhibitor,CHRNA2|DOXACURIUM CHLORIDE|antagonist,CHRNA2|TUBOCURARINE|antagonist,CHRNA2|MIVACURIUM|antagonist,CHRNA2|DECAMETHONIUM|antagonist,CHRNA2|DECAMETHONIUM|partial agonist,CHRNA2|METOCURINE|antagonist,CHRNA2|PIPECURONIUM|antagonist,CHRNA2|EPIBATIDINE|agonist,CHRNA2|CP-601927|agonist,CHRNA2|HEXAMETHONIUM|channel blocker,CHRNA2|ATRACURIUM|.,CHRNA2|CISATRACURIUM BESYLATE|.,CHRNA2|DOXACURIUM|.,CHRNA2|CARBACHOL|antagonist,CHRNA2|ROCURONIUM|antagonist,CHRNA2|PANCURONIUM|.,CHRNA2|VECURONIUM|antagonist,CHRNA2|GANTACURIUM|.,CHRNA2|NICOTINE|.,CHRNA2|BIPERIDEN|.,CHRNA2|LEVALLORPHAN|antagonist DP:AF:OBS:SB 116:0:83N-33N+:. 145:0.227034:91N-23V-21N+10V+:. +8 27461737 . C G . . SVLEN=.;PROB_SOMATIC_NORMAL=18.3288;PROB_ABSENT=987.672;PROB_GERMLINE_HET=349.144;PROB_ARTIFACT=272.885;PROB_SOMATIC_TUMOR_MEDIUM=0.168458;PROB_SOMATIC_TUMOR_HIGH=16.3168;PROB_SOMATIC_TUMOR_LOW=59.5167;PROB_GERMLINE_HOM=5489.1;ANN=G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000240132.7|protein_coding|7/7|c.1437G>C|p.Lys479Asn|1601/1735|1437/1545|479/514||,G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000407991.2|protein_coding|7/7|c.1482G>C|p.Lys494Asn|2091/4067|1482/1590|494/529||,G|missense_variant|MODERATE|CHRNA2|ENSG00000120903|transcript|ENST00000520933.7|protein_coding|7/7|c.1416G>C|p.Lys472Asn|1858/2497|1416/1524|472/507||,G|3_prime_UTR_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000523695.5|nonsense_mediated_decay|7/7|c.*884G>C|||||2108|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000346049.9|protein_coding||c.*3228C>G|||||2351|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000397501.5|protein_coding||c.*3228C>G|||||2347|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000517339.5|protein_coding||c.*3228C>G|||||2351|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000522245.1|retained_intron||n.*3263C>G|||||3263|,G|downstream_gene_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000522008.1|retained_intron||n.*1891G>C|||||1891|,G|downstream_gene_variant|MODIFIER|PTK2B|ENSG00000120899|transcript|ENST00000420218.3|protein_coding||c.*3228C>G|||||2346|,G|downstream_gene_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000637241.1|nonsense_mediated_decay||c.*7916G>C|||||1654|,G|non_coding_transcript_exon_variant|MODIFIER|CHRNA2|ENSG00000120903|transcript|ENST00000520600.1|retained_intron|2/2|n.307G>C||||||;dbNSFP_DEOGEN2_score=0.805964,.,.;dbNSFP_Ensembl_proteinid=ENSP00000385026,ENSP00000429616,ENSP00000240132;dbNSFP_Ensembl_transcriptid=ENST00000407991,ENST00000520933,ENST00000240132;dbNSFP_FATHMM_score=-0.7,.,-0.7;dbNSFP_LRT_score=0;dbNSFP_M_CAP_score=0.225545;dbNSFP_MPC_score=0.77049,.,.;dbNSFP_MVP_score=0.925048,0.925048,0.925048;dbNSFP_MetaLR_score=0.5717;dbNSFP_MetaSVM_score=0.1915;dbNSFP_MutPred_score=0.803;dbNSFP_MutationAssessor_score=3.385,.,.;dbNSFP_MutationTaster_score=0.999992,0.999992,0.999992;dbNSFP_PROVEAN_score=-4.7,.,-4.7;dbNSFP_Polyphen2_HDIV_score=0.972,.,.;dbNSFP_Polyphen2_HVAR_score=0.968,.,.;dbNSFP_PrimateAI_score=0.582399;dbNSFP_REVEL_score=0.629;dbNSFP_SIFT4G_score=0.011,0.01,0.01;dbNSFP_SIFT_score=0.001,.,0;dbNSFP_Uniprot_acc=Q15822,A0A0X1KG79,Q15822-2;dbNSFP_Uniprot_entry=ACHA2_HUMAN,A0A0X1KG79_HUMAN,ACHA2_HUMAN;dbNSFP_VEST4_score=0.431,0.426,0.641;dgiDB_drugs=CHRNA2|METOCURINE IODIDE|antagonist,CHRNA2|GALLAMINE TRIETHIODIDE|antagonist,CHRNA2|MECAMYLAMINE HYDROCHLORIDE|inhibitor,CHRNA2|PROCAINE|antagonist,CHRNA2|ATRACURIUM BESYLATE|inhibitor,CHRNA2|DOXACURIUM CHLORIDE|antagonist,CHRNA2|TUBOCURARINE|antagonist,CHRNA2|MIVACURIUM|antagonist,CHRNA2|DECAMETHONIUM|antagonist,CHRNA2|DECAMETHONIUM|partial agonist,CHRNA2|METOCURINE|antagonist,CHRNA2|PIPECURONIUM|antagonist,CHRNA2|EPIBATIDINE|agonist,CHRNA2|CP-601927|agonist,CHRNA2|HEXAMETHONIUM|channel blocker,CHRNA2|ATRACURIUM|.,CHRNA2|CISATRACURIUM BESYLATE|.,CHRNA2|DOXACURIUM|.,CHRNA2|CARBACHOL|antagonist,CHRNA2|ROCURONIUM|antagonist,CHRNA2|PANCURONIUM|.,CHRNA2|VECURONIUM|antagonist,CHRNA2|GANTACURIUM|.,CHRNA2|NICOTINE|.,CHRNA2|BIPERIDEN|.,CHRNA2|LEVALLORPHAN|antagonist,PTK2B|LEFLUNOMIDE|antagonist,PTK2B|GENISTEIN|.,PTK2B|CHEMBL509485|.,PTK2B|ALOISINE|inhibitor,PTK2B|CHEMBL458997|inhibitor,PTK2B|PF-562271|inhibitor,PTK2B|RUXOLITINIB PHOSPHATE|inhibitor,PTK2B|PIBOSEROD|inhibitor,PTK2B|DEFACTINIB|inhibitor DP:AF:OBS:SB 116:0:83N-33N+:. 145:0.227034:91N-23V-21N+10V+:. From 66a1eec90cc7311c29f00ccb9073a3298043a94d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 15:19:32 +0100 Subject: [PATCH 4/6] Updated rust-bio --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8df29853..ebfcd984 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ readme = "README.md" [dependencies] -bio = "0.27" +bio = "0.29" clap = { version = "2.31", features = ["yaml", "color", "suggestions"]} indicatif = "0.11" itertools = "0.6" From fc4a5d96436dc7f694c9af0e1b5ecc8c8ce56575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 15:35:07 +0100 Subject: [PATCH 5/6] Updated rust-bio --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ebfcd984..5c03cdcf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ readme = "README.md" [dependencies] -bio = "0.29" +bio = "0.30" clap = { version = "2.31", features = ["yaml", "color", "suggestions"]} indicatif = "0.11" itertools = "0.6" From ab57edb1236af3ff46d0c4f5e079c688da2eac68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=B6lder?= Date: Fri, 28 Feb 2020 15:49:10 +0100 Subject: [PATCH 6/6] Cargo fmt --- src/bam/call_consensus_reads/mod.rs | 8 ++++-- src/bcf/annotate_dgidb.rs | 14 +++++----- src/bcf/baf.rs | 2 +- src/bcf/fix_iupac_alleles.rs | 40 ++++++++++++++++++----------- src/bcf/match_variants.rs | 6 ++--- src/bcf/mod.rs | 2 +- src/bcf/to_txt.rs | 2 +- tests/lib.rs | 12 ++++++--- 8 files changed, 52 insertions(+), 34 deletions(-) diff --git a/src/bam/call_consensus_reads/mod.rs b/src/bam/call_consensus_reads/mod.rs index 0eead2e8..f6ef13a8 100644 --- a/src/bam/call_consensus_reads/mod.rs +++ b/src/bam/call_consensus_reads/mod.rs @@ -4,7 +4,7 @@ mod pipeline; use log::info; use pipeline::CallConsensusRead; use rust_htslib::bam; -use rust_htslib::bam::{Header, Read, Format}; +use rust_htslib::bam::{Format, Header, Read}; use std::error::Error; pub fn call_consensus_reads_from_paths( @@ -16,7 +16,11 @@ pub fn call_consensus_reads_from_paths( info!("Reading input files:\n {}", bam_in); info!("Writing output to:\n {}", bam_out); let bam_reader = bam::Reader::from_path(bam_in)?; - let bam_writer = bam::Writer::from_path(bam_out, &Header::from_template(bam_reader.header()), Format::BAM)?; + let bam_writer = bam::Writer::from_path( + bam_out, + &Header::from_template(bam_reader.header()), + Format::BAM, + )?; CallConsensusRead::new(bam_reader, bam_writer, seq_dist, verbose_read_names) .call_consensus_reads() } diff --git a/src/bcf/annotate_dgidb.rs b/src/bcf/annotate_dgidb.rs index 50570495..dfea7b43 100644 --- a/src/bcf/annotate_dgidb.rs +++ b/src/bcf/annotate_dgidb.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use regex::Regex; use reqwest; use rust_htslib::bcf; -use rust_htslib::bcf::{Read, Format}; +use rust_htslib::bcf::{Format, Read}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::error::Error; @@ -102,13 +102,11 @@ fn extract_genes<'a>( ) -> Result + 'a>, Box> { let annotation = rec.info("ANN".as_bytes()).string()?; match annotation { - Some(transcripts) => Ok(Some(transcripts.into_iter().map( - |transcript| { - str::from_utf8(transcript.split(|c| *c == b'|').nth(3).unwrap()) - .unwrap() - .to_owned() - }, - ))), + Some(transcripts) => Ok(Some(transcripts.into_iter().map(|transcript| { + str::from_utf8(transcript.split(|c| *c == b'|').nth(3).unwrap()) + .unwrap() + .to_owned() + }))), None => Ok(None), } } diff --git a/src/bcf/baf.rs b/src/bcf/baf.rs index 0207e56c..48eaa175 100644 --- a/src/bcf/baf.rs +++ b/src/bcf/baf.rs @@ -9,7 +9,7 @@ use itertools::repeat_n; use itertools::Itertools; use rust_htslib::bcf; use rust_htslib::bcf::record::Numeric; -use rust_htslib::bcf::{Read, Format}; +use rust_htslib::bcf::{Format, Read}; use std::error::Error; use std::f32; diff --git a/src/bcf/fix_iupac_alleles.rs b/src/bcf/fix_iupac_alleles.rs index e96b077e..6e6f9a8c 100644 --- a/src/bcf/fix_iupac_alleles.rs +++ b/src/bcf/fix_iupac_alleles.rs @@ -1,12 +1,16 @@ use std::error::Error; -use itertools::Itertools; use bio::alphabets::dna::n_alphabet; -use rust_htslib::bcf::{self, Read, Format}; +use itertools::Itertools; +use rust_htslib::bcf::{self, Format, Read}; pub fn fix_iupac_alleles() -> Result<(), Box> { let mut inbcf = bcf::Reader::from_stdin()?; - let mut outbcf = bcf::Writer::from_stdout(&bcf::Header::from_template(inbcf.header()), false, Format::BCF)?; + let mut outbcf = bcf::Writer::from_stdout( + &bcf::Header::from_template(inbcf.header()), + false, + Format::BCF, + )?; let valid_alphabet = n_alphabet(); for res in inbcf.records() { @@ -14,22 +18,28 @@ pub fn fix_iupac_alleles() -> Result<(), Box> { let alleles = rec.alleles(); if !alleles.iter().all(|allele| valid_alphabet.is_word(*allele)) { - let fixed = alleles.into_iter().map(|allele| { - let fixed = allele.into_iter().map(|base| { - if valid_alphabet.is_word(&[*base]) { - *base - } else { - b'N' - } - }).collect_vec(); - fixed - }).collect_vec(); + let fixed = alleles + .into_iter() + .map(|allele| { + let fixed = allele + .into_iter() + .map(|base| { + if valid_alphabet.is_word(&[*base]) { + *base + } else { + b'N' + } + }) + .collect_vec(); + fixed + }) + .collect_vec(); rec.set_alleles(&fixed.iter().map(|allele| allele.as_slice()).collect_vec())?; } - + outbcf.write(&rec)?; } Ok(()) -} \ No newline at end of file +} diff --git a/src/bcf/match_variants.rs b/src/bcf/match_variants.rs index e138a02f..8e0662ca 100644 --- a/src/bcf/match_variants.rs +++ b/src/bcf/match_variants.rs @@ -13,7 +13,7 @@ use itertools::Itertools; use log::{info, warn}; use quick_error::quick_error; use rust_htslib::bcf; -use rust_htslib::bcf::{Read, Format}; +use rust_htslib::bcf::{Format, Read}; use std::collections::{btree_map, BTreeMap, HashMap}; use std::error::Error; use std::str; @@ -32,7 +32,7 @@ impl VarIndex { match reader.read(&mut rec) { Ok(true) => (), Ok(false) => break, - Err(e) => return Err(Box::new(e)) + Err(e) => return Err(Box::new(e)), }; if let Some(rid) = rec.rid() { let chrom = reader.header().rid2name(rid)?; @@ -82,7 +82,7 @@ pub fn match_variants( match inbcf.read(&mut rec) { Ok(true) => (), Ok(false) => break, - Err(e) => return Err(Box::new(e)) + Err(e) => return Err(Box::new(e)), }; outbcf.translate(&mut rec); diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index 56c7a133..9a0f7dbf 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -1,6 +1,6 @@ //! Tools that work on VCF and BCF files. pub mod annotate_dgidb; pub mod baf; +pub mod fix_iupac_alleles; pub mod match_variants; pub mod to_txt; -pub mod fix_iupac_alleles; \ No newline at end of file diff --git a/src/bcf/to_txt.rs b/src/bcf/to_txt.rs index 15c1f298..08584816 100644 --- a/src/bcf/to_txt.rs +++ b/src/bcf/to_txt.rs @@ -111,7 +111,7 @@ pub fn to_txt( match reader.read(&mut rec) { Ok(true) => (), Ok(false) => break, - Err(e) => return Err(Box::new(e)) + Err(e) => return Err(Box::new(e)), }; let alleles = rec .alleles() diff --git a/tests/lib.rs b/tests/lib.rs index 5ee17f6b..a2f990b1 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -128,9 +128,15 @@ fn vcf_match_same() { #[test] fn vcf_fix_iupac_alleles() { assert!(Command::new("bash") - .arg("-c") - .arg("target/debug/rbt vcf-fix-iupac-alleles < tests/test-iupac.vcf > tests/iupac-fixed.bcf") - .spawn().unwrap().wait().unwrap().success()); + .arg("-c") + .arg( + "target/debug/rbt vcf-fix-iupac-alleles < tests/test-iupac.vcf > tests/iupac-fixed.bcf" + ) + .spawn() + .unwrap() + .wait() + .unwrap() + .success()); test_output("tests/iupac-fixed.bcf", "tests/expected/iupac-fixed.bcf"); }