Skip to content

Commit

Permalink
Merge pull request #35 from PacificBiosciences/minor_patch_warn
Browse files Browse the repository at this point in the history
sync changes for v1.4.1
  • Loading branch information
holtjma authored May 1, 2024
2 parents 99b92ac + e035468 commit ad1539f
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 18 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# v1.4.1
## Changes
* Reclassifies warnings during VCF writing to debug
* Adds a section to the quickstart guide on resource requirements

# v1.4.0
## Changes
* **Major changes to dual-mode allele assignment:** Prior to this version, global realignment would revert to local realignment if the CPU cost (in seconds) exceeded a user provided threshold. While this was useful for fast-tracking noisy phase blocks, it could lead to non-deterministic output as CPU costs can vary. The thresholding has been reworked such that global realignment will revert to local realignment *for an individual mapping* if the edit distance exceeds a user provided threshold (default: 500). Additionally, global realignment will revert to local realignment *for the remainder of a putative phase block* if too many reads have reverted to local realignment (default: 50%, minimum number of failures: 50 mappings). This has the following downstream impact on results:
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hiphase"
version = "1.4.0"
version = "1.4.1"
authors = ["J. Matthew Holt <[email protected]>"]
description = "A tool for jointly phasing small, structural, and tandem repeat variants for PacBio sequencing data"
edition = "2021"
Expand Down
4 changes: 2 additions & 2 deletions LICENSE-THIRDPARTY.json
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@
},
{
"name": "env_logger",
"version": "0.9.1",
"version": "0.9.3",
"authors": null,
"repository": "https://github.com/env-logger-rs/env_logger/",
"license": "Apache-2.0 OR MIT",
Expand Down Expand Up @@ -496,7 +496,7 @@
},
{
"name": "hiphase",
"version": "1.4.0",
"version": "1.4.1",
"authors": "J. Matthew Holt <[email protected]>",
"repository": null,
"license": null,
Expand Down
5 changes: 5 additions & 0 deletions docs/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ hiphase \
[2023-02-02T20:40:11.347Z INFO hiphase] All phase blocks finished successfully after 1605.008968506 seconds.
```

## Recommended resources
HiPhase has built in parallel processing via the `--threads` parameter.
We recommend reserving 4 GB of memory per thread allocated to HiPhase.
For example, most of our internal tests use 16 threads and reserve 64 GB of memory.

# Common use cases
## Joint phasing small variants, structural variants, and tandem repeats
To *jointly* phase small variants, structural variants, and tandem repeats, pass all VCF files to HiPhase and specify one output VCF file for each in the same order as input.
Expand Down
20 changes: 11 additions & 9 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use clap::Parser;
use chrono::Datelike;
use flate2::bufread::MultiGzDecoder;
use lazy_static::lazy_static;
use log::{error, info, warn};
use log::{error, info, trace, warn};
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::{Path, PathBuf};
Expand All @@ -26,30 +26,30 @@ lazy_static! {
This program comes with ABSOLUTELY NO WARRANTY; it is intended for
Research Use Only and not for use in diagnostic procedures.", chrono::Utc::now().year()))]
pub struct Settings {
/// Input alignment file in BAM format.
/// Input alignment file in BAM format
#[clap(required = true)]
#[clap(short = 'b')]
#[clap(long = "bam")]
#[clap(value_name = "BAM")]
#[clap(help_heading = Some("Input/Output"))]
pub bam_filenames: Vec<PathBuf>,

/// Output haplotagged alignment file in BAM format.
/// Output haplotagged alignment file in BAM format
#[clap(short = 'p')]
#[clap(long = "output-bam")]
#[clap(value_name = "BAM")]
#[clap(help_heading = Some("Input/Output"))]
pub output_bam_filenames: Vec<PathBuf>,

/// Input variant file in VCF format.
/// Input variant file in VCF format
#[clap(required = true)]
#[clap(short = 'c')]
#[clap(long = "vcf")]
#[clap(value_name = "VCF")]
#[clap(help_heading = Some("Input/Output"))]
pub vcf_filenames: Vec<PathBuf>,

/// Output phased variant file in VCF format.
/// Output phased variant file in VCF format
#[clap(required = true)]
#[clap(short = 'o')]
#[clap(long = "output-vcf")]
Expand Down Expand Up @@ -112,14 +112,14 @@ pub struct Settings {
#[clap(help_heading = Some("Input/Output"))]
pub csi_index: bool,

/// Number of threads to use for phasing.
/// Number of threads to use for phasing
#[clap(short = 't')]
#[clap(long = "threads")]
#[clap(value_name = "THREADS")]
#[clap(default_value = "1")]
pub threads: usize,

/// Enable verbose output.
/// Enable verbose output
#[clap(short = 'v')]
#[clap(long = "verbose")]
#[clap(action = clap::ArgAction::Count)]
Expand Down Expand Up @@ -267,7 +267,9 @@ fn check_required_vcf(filename: &Path, label: &str) {
let mut gz_decoder = MultiGzDecoder::new(file_reader);
let mut small_buffer: [u8; 10] = [0; 10];
match gz_decoder.read(&mut small_buffer) {
Ok(_) => {},
Ok(bytes_read) => {
trace!("Successfully read {bytes_read} bytes from VCF.")
},
Err(e) => {
if e.to_string() == "invalid gzip header" {
error!("Error while checking {filename:?}: {e}; is the VCF bgzipped?");
Expand Down Expand Up @@ -410,7 +412,7 @@ pub fn check_settings(mut settings: Settings) -> Settings {
info!("Processing threads: {}", settings.threads);
info!("I/O threads: {}", settings.io_threads.unwrap());
if settings.csi_index {
info!("CSI indexing: enabled");
info!("CSI indexing: ENABLED");
}

//send the settings back
Expand Down
6 changes: 3 additions & 3 deletions src/writers/ordered_vcf_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use crate::block_gen::is_phasable_variant;
use crate::phaser::PhaseResult;

use log::{warn,debug,trace};
use log::{debug, trace};
use rust_htslib::bcf;
use rust_htslib::bcf::Read;
use rust_htslib::bcf::record::GenotypeAllele;
Expand Down Expand Up @@ -404,9 +404,9 @@ impl OrderedVcfWriter {
},
Err(e) => {
if final_position == 0 {
warn!("Empty problem block received, no heterozygous variants on chromosome {}", self.current_chrom);
debug!("Empty problem block received, no heterozygous variants on chromosome {}", self.current_chrom);
} else {
warn!("Received \'{}\', while seeking to {}:{}-{} in vcf #{}, likely no variants present", e, self.current_chrom, start_pos, final_position, vcf_index);
debug!("Received \'{}\', while seeking to {}:{}-{} in vcf #{}, likely no variants present", e, self.current_chrom, start_pos, final_position, vcf_index);
}
}
}
Expand Down

0 comments on commit ad1539f

Please sign in to comment.