Skip to content

Commit

Permalink
MRG: refactor sketching utilities (#112)
Browse files Browse the repository at this point in the history
- refactor for reusability: move all generalized sketch building utils
--> `utils/buildutils.rs`
- do minor refactoring to use `sourmash::encodings::HashFunctions` for
`moltype`, created `Abund` enum.
- requires sourmash-bio/sourmash#3344 to use
`Hash` for `sourmash::encodings::HashFunctions`

Related:
-
#113
  • Loading branch information
bluegenes authored Oct 8, 2024
1 parent 1edfc9c commit e3b28ff
Show file tree
Hide file tree
Showing 6 changed files with 574 additions and 560 deletions.
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ crate-type = ["cdylib"]
pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] }
rayon = "1.10.0"
serde = { version = "1.0.204", features = ["derive"] }
sourmash = { version = "0.15.2"}
# sourmash = { version = "0.15.2"}
sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest"}
serde_json = "1.0.120"
niffler = "2.4.0"
needletail = "0.5.1"
Expand Down
29 changes: 17 additions & 12 deletions src/directsketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ use tokio_util::compat::Compat;
use pyo3::prelude::*;

use crate::utils::{
load_accession_info, load_gbassembly_info, parse_params_str, AccessionData, BuildCollection,
BuildManifest, GBAssemblyData, GenBankFileType, InputMolType, MultiBuildCollection,
MultiCollection,
load_accession_info, load_gbassembly_info, AccessionData, GBAssemblyData, GenBankFileType,
InputMolType, MultiCollection,
};

use crate::utils::buildutils::{
BuildCollection, BuildManifest, BuildParamsSet, MultiBuildCollection,
};
use reqwest::Url;

Expand Down Expand Up @@ -918,16 +921,17 @@ pub async fn gbsketch(
}

// parse param string into params_vec, print error if fail
let param_result = parse_params_str(param_str);
let params_vec = match param_result {
let param_result = BuildParamsSet::from_params_str(param_str);
let params_set = match param_result {
Ok(params) => params,
Err(e) => {
bail!("Failed to parse params string: {}", e);
}
};
let dna_template_collection = BuildCollection::from_buildparams(&params_vec, "DNA");
// prot will build protein, dayhoff, hp
let prot_template_collection = BuildCollection::from_buildparams(&params_vec, "protein");
// Use the BuildParamsSet to create template collections for DNA and protein
let dna_template_collection = BuildCollection::from_buildparams_set(&params_set, "DNA");
// // prot will build protein, dayhoff, hp
let prot_template_collection = BuildCollection::from_buildparams_set(&params_set, "protein");

let mut genomes_only = genomes_only;
let mut proteomes_only = proteomes_only;
Expand Down Expand Up @@ -1157,15 +1161,16 @@ pub async fn urlsketch(
}

// parse param string into params_vec, print error if fail
let param_result = parse_params_str(param_str);
let params_vec = match param_result {
let param_result = BuildParamsSet::from_params_str(param_str);
let params_set = match param_result {
Ok(params) => params,
Err(e) => {
bail!("Failed to parse params string: {}", e);
}
};
let dna_template_collection = BuildCollection::from_buildparams(&params_vec, "DNA");
let prot_template_collection = BuildCollection::from_buildparams(&params_vec, "protein");
// Use the BuildParamsSet to create template collections for DNA and protein
let dna_template_collection = BuildCollection::from_buildparams_set(&params_set, "DNA");
let prot_template_collection = BuildCollection::from_buildparams_set(&params_set, "protein");

let mut genomes_only = false;
let mut proteomes_only = false;
Expand Down
Loading

0 comments on commit e3b28ff

Please sign in to comment.