From 7c575ef549908745f34d9371986551f3d70ed444 Mon Sep 17 00:00:00 2001 From: Daniel <104648079+PB-DB@users.noreply.github.com> Date: Wed, 27 Mar 2024 07:38:05 -0700 Subject: [PATCH] feat: Add fasta::build function + FaidxBuildError (#418) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add fasta::build function + FaidxBuildError rust_htslib::errors::Error enum entry. * Add Reader::seq_names() function to rust_htslib::faidx::Reader. --------- Co-authored-by: Johannes Köster --- src/errors.rs | 2 ++ src/faidx/mod.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/errors.rs b/src/errors.rs index 4c94d2af5..2e89854f0 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -33,6 +33,8 @@ pub enum Error { FaidxPositionTooLarge, #[error("bad conversion of sequence name")] FaidxBadSeqName, + #[error("failed to build index for fasta file {path:?}")] + FaidxBuildFailed { path: std::path::PathBuf }, // Errors for Tbx #[error("previous iterator generation failed")] diff --git a/src/faidx/mod.rs b/src/faidx/mod.rs index ba44e28d7..50dd33016 100644 --- a/src/faidx/mod.rs +++ b/src/faidx/mod.rs @@ -22,6 +22,31 @@ pub struct Reader { inner: *mut htslib::faidx_t, } +/// +/// Build a faidx for input path. +/// +/// # Errors +/// If indexing fails. Could be malformatted or file could not be accessible. +/// +///``` +/// use rust_htslib::faidx::build; +/// let path = std::path::PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"),"/test/test_cram.fa")); +/// build(&path).expect("Failed to build fasta index"); +///``` +/// +pub fn build( + path: impl Into, +) -> Result<(), std::boxed::Box> { + let path = path.into(); + let os_path = std::ffi::CString::new(path.display().to_string())?; + let rc = unsafe { htslib::fai_build(os_path.as_ptr()) }; + if rc < 0 { + Err(Error::FaidxBuildFailed { path })? + } else { + Ok(()) + } +} + impl Reader { /// Create a new Reader from a path. /// @@ -137,6 +162,29 @@ impl Reader { let seq_len = unsafe { htslib::faidx_seq_len(self.inner, cname.as_ptr()) }; seq_len as u64 } + + /// Returns a Result> for all seq names. + /// # Errors + /// + /// * `errors::Error::FaidxBadSeqName` - missing sequence name for sequence id. + /// + /// If thrown, the index is malformed, and the number of sequences in the index does not match the number of sequence names available. + ///``` + /// use rust_htslib::faidx::build; + /// let path = std::path::PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"),"/test/test_cram.fa")); + /// build(&path).expect("Failed to build fasta index"); + /// let reader = rust_htslib::faidx::Reader::from_path(path).expect("Failed to open faidx"); + /// assert_eq!(reader.seq_names(), Ok(vec!["chr1".to_string(), "chr2".to_string(), "chr3".to_string()])); + ///``` + /// + pub fn seq_names(&self) -> Result> { + let num_seq = self.n_seqs(); + let mut ret = Vec::with_capacity(num_seq as usize); + for seq_id in 0..num_seq { + ret.push(self.seq_name(seq_id as i32)?); + } + Ok(ret) + } } impl Drop for Reader {