diff --git a/CHANGELOG.md b/CHANGELOG.md index 1457e12..18b1e5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 1.4.3 +## Fixed +* Replaced a panic caused by a chromosome appearing in a VCF but not in the BAM file with a more descriptive error message +* Fixed an error caused by a multi-sample VCF with a mixture of haploid and diploid genotypes + # v1.4.2 ## Changes * Removes a 1 basepair shift from tandem repeat region calculation to support anchor base changes in TRGT v1.0.0; internal results are nearly identical before and after this change diff --git a/Cargo.lock b/Cargo.lock index 43df4e7..28deead 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -362,9 +362,12 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.7" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7684a49fb1af197853ef7b2ee694bc1f5b4179556f1e5710e1760c5db6f5e929" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] [[package]] name = "derive-new" @@ -523,7 +526,7 @@ dependencies = [ [[package]] name = "hiphase" -version = "1.4.2" +version = "1.4.3" dependencies = [ "bio", "bit-vec", @@ -810,6 +813,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.45" @@ -909,6 +918,12 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -1113,22 +1128,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.147" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.147" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", - "syn 1.0.102", + "syn 2.0.48", ] [[package]] @@ -1261,14 +1276,16 @@ dependencies = [ [[package]] name = "time" -version = "0.3.25" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fdd63d58b18d663fbdf70e049f00a22c8e42be082203be7f26589213cd75ea" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "itoa 1.0.9", "libc", + "num-conv", "num_threads", + "powerfmt", "serde", "time-core", "time-macros", @@ -1276,16 +1293,17 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.11" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb71511c991639bb078fd5bf97757e03914361c48100d52878b8e52b46fb92cd" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ + "num-conv", "time-core", ] @@ -1377,7 +1395,7 @@ checksum = "bbc5ad0d9d26b2c49a5ab7da76c3e79d3ee37e7821799f8223fcb8f2f391a2e7" dependencies = [ "anyhow", "rustversion", - "time 0.3.25", + "time 0.3.36", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1aa0011..6b5ab01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hiphase" -version = "1.4.2" +version = "1.4.3" authors = ["J. Matthew Holt "] description = "A tool for jointly phasing small, structural, and tandem repeat variants for PacBio sequencing data" edition = "2021" diff --git a/LICENSE-THIRDPARTY.json b/LICENSE-THIRDPARTY.json index 28c289e..37f11a4 100644 --- a/LICENSE-THIRDPARTY.json +++ b/LICENSE-THIRDPARTY.json @@ -325,7 +325,7 @@ }, { "name": "deranged", - "version": "0.3.7", + "version": "0.3.11", "authors": "Jacob Pratt ", "repository": "https://github.com/jhpratt/deranged", "license": "Apache-2.0 OR MIT", @@ -496,7 +496,7 @@ }, { "name": "hiphase", - "version": "1.4.2", + "version": "1.4.3", "authors": "J. Matthew Holt ", "repository": null, "license": null, @@ -764,6 +764,15 @@ "license_file": null, "description": "Complex numbers implementation for Rust" }, + { + "name": "num-conv", + "version": "0.1.0", + "authors": "Jacob Pratt ", + "repository": "https://github.com/jhpratt/num-conv", + "license": "Apache-2.0 OR MIT", + "license_file": null, + "description": "`num_conv` is a crate to convert between integer types without using `as` casts. This provides better certainty when refactoring, makes the exact behavior of code more explicit, and allows using turbofish syntax." + }, { "name": "num-integer", "version": "0.1.45", @@ -872,6 +881,15 @@ "license_file": null, "description": "A library to run the pkg-config system tool at build time in order to be used in Cargo build scripts." }, + { + "name": "powerfmt", + "version": "0.2.0", + "authors": "Jacob Pratt ", + "repository": "https://github.com/jhpratt/powerfmt", + "license": "Apache-2.0 OR MIT", + "license_file": null, + "description": "`powerfmt` is a library that provides utilities for formatting values. This crate makes it significantly easier to support filling to a minimum width with alignment, avoid heap allocation, and avoid repetitive calculations." + }, { "name": "ppv-lite86", "version": "0.2.16", @@ -1081,7 +1099,7 @@ }, { "name": "serde", - "version": "1.0.147", + "version": "1.0.205", "authors": "Erick Tryzelaar |David Tolnay ", "repository": "https://github.com/serde-rs/serde", "license": "Apache-2.0 OR MIT", @@ -1090,7 +1108,7 @@ }, { "name": "serde_derive", - "version": "1.0.147", + "version": "1.0.205", "authors": "Erick Tryzelaar |David Tolnay ", "repository": "https://github.com/serde-rs/serde", "license": "Apache-2.0 OR MIT", @@ -1216,7 +1234,7 @@ }, { "name": "time", - "version": "0.3.25", + "version": "0.3.36", "authors": "Jacob Pratt |Time contributors", "repository": "https://github.com/time-rs/time", "license": "Apache-2.0 OR MIT", @@ -1225,7 +1243,7 @@ }, { "name": "time-core", - "version": "0.1.1", + "version": "0.1.2", "authors": "Jacob Pratt |Time contributors", "repository": "https://github.com/time-rs/time", "license": "Apache-2.0 OR MIT", @@ -1234,7 +1252,7 @@ }, { "name": "time-macros", - "version": "0.2.11", + "version": "0.2.18", "authors": "Jacob Pratt |Time contributors", "repository": "https://github.com/time-rs/time", "license": "Apache-2.0 OR MIT", diff --git a/src/block_gen.rs b/src/block_gen.rs index 8130249..29d9b3e 100644 --- a/src/block_gen.rs +++ b/src/block_gen.rs @@ -627,13 +627,18 @@ impl PhaseBlockIterator { /// # Arguments /// * `chrom` - the chromosome of the locus /// * `pos` - the position of the locus - fn get_longest_multispan(&self, chrom: &str, pos: u64) -> u64 { + fn get_longest_multispan(&self, chrom: &str, pos: u64) -> Result> { use bio::bio_types::genome::AbstractInterval; use rust_htslib::bam::Read; let mut span_list: Vec = vec![]; - for bam_ref in self.bam_readers.iter() { + for (bam_index, bam_ref) in self.bam_readers.iter().enumerate() { let mut bam = bam_ref.borrow_mut(); - bam.fetch((chrom, pos, pos+1)).unwrap(); + match bam.fetch((chrom, pos, pos+1)) { + Ok(()) => {}, + Err(e) => { + bail!("Error while fetching \"{}:{}\" in aligned file #{}: {}", chrom, pos, bam_index, e); + } + }; // calling .records() is what is triggering the URL warning for read_entry in bam.records() { @@ -656,10 +661,10 @@ impl PhaseBlockIterator { if span_list.len() < self.min_spanning_reads { // this is a sentinel indicating that the range is effectively empty - pos + Ok(pos) } else { span_list.sort(); - span_list[span_list.len() - self.min_spanning_reads] + Ok(span_list[span_list.len() - self.min_spanning_reads]) } } @@ -891,7 +896,10 @@ impl Iterator for PhaseBlockIterator { phase_block.add_locus_variant(&chrom_name, variant_pos, pop_index); // go ahead and run the max span calculation - max_span = self.get_longest_multispan(&chrom_name, variant_pos); + max_span = match self.get_longest_multispan(&chrom_name, variant_pos) { + Ok(ms) => ms, + Err(e) => return Some(Err(e)) + }; if max_span == variant_pos { // there are not enough reads overlapping this position, it will be unphased phase_block.set_unphased_block(); @@ -916,7 +924,10 @@ impl Iterator for PhaseBlockIterator { } } else { //we check the reads from the most recent locus - max_span = self.get_longest_multispan(&chrom_name, previous_pos); + max_span = match self.get_longest_multispan(&chrom_name, previous_pos) { + Ok(ms) => ms, + Err(e) => return Some(Err(e)) + }; assert!(max_span != previous_pos); if max_span > variant_pos { //new max span connects diff --git a/src/writers/ordered_vcf_writer.rs b/src/writers/ordered_vcf_writer.rs index afb761a..750c75b 100644 --- a/src/writers/ordered_vcf_writer.rs +++ b/src/writers/ordered_vcf_writer.rs @@ -308,7 +308,7 @@ impl OrderedVcfWriter { // we have already written though, so don't write it again continue; } - + // we now have to iterate over each sample and modify the entries as necessary let vcf_sample_indices = &self.sample_indices[vcf_index]; let mut changes_made: bool = false; @@ -323,16 +323,17 @@ impl OrderedVcfWriter { 1 => { // TRGT can make single-allele GT calls, just copy it over as normal // it will not be modified below because it is a homozygous allele - alleles.push(genotype[0]); + alleles.push(i32::from(genotype[0])); + alleles.push(i32::MIN+1); // sentinel value for end }, 2 => { // this is 99.99999999% path - alleles.push(genotype[0]); - alleles.push(genotype[1]); + alleles.push(i32::from(genotype[0])); + alleles.push(i32::from(genotype[1])); }, gt_len => { // we do not have 3+ GT entries implemented - bail!("Encountered GT of length {} at position {}", gt_len, record.pos()) + bail!("Encountered GT of length {} at record {}", gt_len, record.desc()) } } } @@ -373,8 +374,8 @@ impl OrderedVcfWriter { } else { // we need to alter the genotypes for this sample to phased let sample_gt_offset: usize = 2 * sample_index; - alleles[sample_gt_offset] = GenotypeAllele::Unphased(h1 as i32); - alleles[sample_gt_offset + 1] = GenotypeAllele::Phased(h2 as i32); + alleles[sample_gt_offset] = i32::from(GenotypeAllele::Unphased(h1 as i32)); + alleles[sample_gt_offset + 1] = i32::from(GenotypeAllele::Phased(h2 as i32)); // the push_format_string expects &[u8] bytes so we have to: // 1. convert the output to a String @@ -391,7 +392,7 @@ impl OrderedVcfWriter { if changes_made { // if we altered something, then alter the record and add PS - record.push_genotypes(&alleles)?; + record.push_format_integer(b"GT", &alleles)?; record.push_format_string("PS".as_bytes(), &ps_blocks).unwrap(); } if flagged_variants {