From 082e70c9fd87a3afe1a572b0532b6c999ef6bc49 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Wed, 21 Aug 2024 19:28:13 +0800 Subject: [PATCH 01/12] feat(contacts): add detection of repulsion between like charges --- CHANGELOG.md | 15 +++++-------- src/interactions/complex.rs | 15 ++++++++++--- src/interactions/ionic.rs | 44 +++++++++++++++++++++++++++++++++++-- src/interactions/mod.rs | 2 +- src/interactions/structs.rs | 2 ++ src/utils.rs | 3 +-- 6 files changed, 64 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 762d725..0e89028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,23 +5,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.3.2] ### Added -- PLACEHOLDER +- Detection of repulsion between like charges ### Fixed -- PLACEHOLDER +- Better error messages when rings have missing atoms for finding the center and normal vector ### Changed -- PLACEHOLDER - -### Removed - -- PLACEHOLDER +- Distance cutoff for T-shaped Pi-stacking lowered from 6Å to 5Å ## [0.3.1] @@ -75,7 +71,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release - Detection of common protein-protein interactions in a PDB or mmCIF file -[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.1...HEAD +[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.2...HEAD +[0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.2 [0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.1 [0.3.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0 [0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.2.0 diff --git a/src/interactions/complex.rs b/src/interactions/complex.rs index cd51f43..4000255 100644 --- a/src/interactions/complex.rs +++ b/src/interactions/complex.rs @@ -1,7 +1,7 @@ use super::{ - find_cation_pi, find_hydrogen_bond, find_hydrophobic_contact, find_ionic_bond, find_pi_pi, - find_vdw_contact, find_weak_hydrogen_bond, point_ring_dist, InteractingEntity, Interaction, - ResultEntry, + find_cation_pi, find_hydrogen_bond, find_hydrophobic_contact, find_ionic_bond, + find_ionic_repulsion, find_pi_pi, find_vdw_contact, find_weak_hydrogen_bond, point_ring_dist, + InteractingEntity, Interaction, ResultEntry, }; use crate::{ residues::{ResidueExt, ResidueId, Ring}, @@ -176,6 +176,15 @@ impl<'a> Interactions for InteractionComplex<'a> { }); atomic_contacts.extend(ionic_bonds); + // Charge-charge repulsions + let charge_repulsions = find_ionic_repulsion(e1, e2).map(|intxn| ResultEntry { + interaction: intxn, + ligand: InteractingEntity::from_hier(e1), + receptor: InteractingEntity::from_hier(e2), + distance: e1.atom().distance(e2.atom()), + }); + atomic_contacts.extend(charge_repulsions); + // Hydrophobic contacts let hydrophobic_contacts = find_hydrophobic_contact(e1, e2).map(|intxn| ResultEntry { diff --git a/src/interactions/ionic.rs b/src/interactions/ionic.rs index be0b3c3..a733eff 100644 --- a/src/interactions/ionic.rs +++ b/src/interactions/ionic.rs @@ -12,7 +12,7 @@ pub fn find_ionic_bond( entity1: &AtomConformerResidueChainModel, entity2: &AtomConformerResidueChainModel, ) -> Option { - if let Some((pos, neg)) = is_ionic_pair(entity1, entity2) { + if let Some((pos, neg)) = is_ionic_bond_pair(entity1, entity2) { if pos.atom().distance(neg.atom()) <= IONIC_BOND_DIST { // Polar interactions are more relaxed as only distance is checked Some(Interaction::IonicBond) @@ -24,8 +24,24 @@ pub fn find_ionic_bond( } } +/// Search for like charges that repel each other. +pub fn find_ionic_repulsion( + entity1: &AtomConformerResidueChainModel, + entity2: &AtomConformerResidueChainModel, +) -> Option { + if let Some((hier1, hier2)) = is_same_charge_pair(entity1, entity2) { + if hier1.atom().distance(hier2.atom()) <= IONIC_BOND_DIST { + Some(Interaction::IonicRepulsion) + } else { + None + } + } else { + None + } +} + /// Check if the two entities are a pair of ionizables. -fn is_ionic_pair<'a>( +fn is_ionic_bond_pair<'a>( entity1: &'a AtomConformerResidueChainModel<'a>, entity2: &'a AtomConformerResidueChainModel<'a>, ) -> Option<( @@ -46,6 +62,30 @@ fn is_ionic_pair<'a>( } } +fn is_same_charge_pair<'a>( + entity1: &'a AtomConformerResidueChainModel<'a>, + entity2: &'a AtomConformerResidueChainModel<'a>, +) -> Option<( + &'a AtomConformerResidueChainModel<'a>, + &'a AtomConformerResidueChainModel<'a>, +)> { + let e1_conformer = entity1.conformer().name(); + let e2_conformer = entity2.conformer().name(); + let e1_atom = entity1.atom().name(); + let e2_atom = entity2.atom().name(); + + let both_pos_charged = + is_pos_ionizable(e1_conformer, e1_atom) & is_pos_ionizable(e2_conformer, e2_atom); + let both_neg_charged = + is_neg_ionizable(e1_conformer, e1_atom) & is_neg_ionizable(e2_conformer, e2_atom); + + if both_pos_charged | both_neg_charged { + Some((entity1, entity2)) + } else { + None + } +} + /// Check if the entity contains ionizable groups that are positively charged at pH 7.0. pub fn is_pos_ionizable(res_name: &str, atom_name: &str) -> bool { matches!( diff --git a/src/interactions/mod.rs b/src/interactions/mod.rs index 0a7ab39..0eabb94 100644 --- a/src/interactions/mod.rs +++ b/src/interactions/mod.rs @@ -11,6 +11,6 @@ pub use aromatic::{find_cation_pi, find_pi_pi, point_ring_dist}; pub use complex::*; pub use hbond::{find_hydrogen_bond, find_weak_hydrogen_bond}; pub use hydrophobic::find_hydrophobic_contact; -pub use ionic::find_ionic_bond; +pub use ionic::{find_ionic_bond, find_ionic_repulsion}; pub use structs::*; pub use vdw::find_vdw_contact; diff --git a/src/interactions/structs.rs b/src/interactions/structs.rs index 93e22de..38986e1 100644 --- a/src/interactions/structs.rs +++ b/src/interactions/structs.rs @@ -26,6 +26,8 @@ pub enum Interaction { PolarContact, /// C-H...O bonding without angle terms WeakPolarContact, + /// Like charges repelling each other + IonicRepulsion, // Aromatic /// Pi-pi staggered stacking, parallel displaced (`of`) diff --git a/src/utils.rs b/src/utils.rs index 3e8b3d8..399f939 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,8 +1,7 @@ -use std::{collections::HashSet, path::PathBuf}; - use crate::residues::ResidueExt; use pdbtbx::*; use polars::prelude::*; +use std::{collections::HashSet, path::PathBuf}; /// Open an atomic data file with [`pdbtbx::open`] and remove non-protein residues. pub fn load_model(input_file: &String) -> (PDB, Vec) { From 69e2407082c58200ae6206dba061e14ddffb8ec6 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Thu, 22 Aug 2024 12:10:45 +0800 Subject: [PATCH 02/12] fix(contacts): add check for hydrogen bond length upper limit --- src/interactions/hbond.rs | 103 +++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/src/interactions/hbond.rs b/src/interactions/hbond.rs index 1fa199d..5615e24 100644 --- a/src/interactions/hbond.rs +++ b/src/interactions/hbond.rs @@ -4,7 +4,8 @@ use pdbtbx::*; use rayon::prelude::*; use std::collections::HashSet; -const HYDROGEN_BOND_POLAR_DIST: f64 = 3.5; +const HYDROGEN_BOND_DIST: f64 = 4.0; +const POLAR_DIST: f64 = 3.5; /// Search for hydrogen bonds and polar contacts. /// @@ -32,35 +33,35 @@ pub fn find_hydrogen_bond( vdw_comp_factor: f64, ) -> Option { if let Some((donor, acceptor)) = is_donor_acceptor_pair(entity1, entity2) { - let donor_h: Vec<&Atom> = donor - .residue() - .par_atoms() - .filter(|atom| atom.element().unwrap() == &Element::H) - .collect(); + let da_dist = donor.atom().distance(acceptor.atom()); + if da_dist <= HYDROGEN_BOND_DIST { + let donor_h: Vec<&Atom> = donor + .residue() + .par_atoms() + .filter(|atom| atom.element().unwrap() == &Element::H) + .collect(); - // Hydrogen bonds are stricter as they have angle restrictions - let acceptor_vdw: f64 = acceptor - .atom() - .element() - .unwrap() - .atomic_radius() - .van_der_waals - .unwrap(); - let h_vdw: f64 = Element::H.atomic_radius().van_der_waals.unwrap(); - if donor_h.par_iter().any(|h| { - (h.distance(acceptor.atom()) <= h_vdw + acceptor_vdw + vdw_comp_factor) - & (donor.atom().angle(h, acceptor.atom()) >= 90.0) - }) { - Some(Interaction::HydrogenBond) - } else if donor.atom().distance(acceptor.atom()) <= HYDROGEN_BOND_POLAR_DIST { + // Hydrogen bonds are stricter as they have angle restrictions + let acceptor_vdw: f64 = acceptor + .atom() + .element() + .unwrap() + .atomic_radius() + .van_der_waals + .unwrap(); + let h_vdw: f64 = Element::H.atomic_radius().van_der_waals.unwrap(); + if donor_h.par_iter().any(|h| { + (h.distance(acceptor.atom()) <= h_vdw + acceptor_vdw + vdw_comp_factor) + & (donor.atom().angle(h, acceptor.atom()) >= 90.0) + }) { + return Some(Interaction::HydrogenBond); + } + } else if da_dist <= POLAR_DIST { // Polar interactions are more relaxed as only distance is checked - Some(Interaction::PolarContact) - } else { - None + return Some(Interaction::PolarContact); } - } else { - None } + None } /// Search for weak hydrogen bonds and weak polar contacts. @@ -75,35 +76,35 @@ pub fn find_weak_hydrogen_bond( vdw_comp_factor: f64, ) -> Option { if let Some((donor, acceptor)) = is_weak_donor_acceptor_pair(entity1, entity2) { - let donor_h: Vec<&Atom> = donor - .residue() - .par_atoms() - .filter(|atom| atom.element().unwrap() == &Element::H) - .collect(); + let da_dist = donor.atom().distance(acceptor.atom()); + if da_dist <= HYDROGEN_BOND_DIST { + let donor_h: Vec<&Atom> = donor + .residue() + .par_atoms() + .filter(|atom| atom.element().unwrap() == &Element::H) + .collect(); - // Hydrogen bonds are stricter as they have angle restrictions - let acceptor_vdw: f64 = acceptor - .atom() - .element() - .unwrap() - .atomic_radius() - .van_der_waals - .unwrap(); - let h_vdw: f64 = Element::H.atomic_radius().van_der_waals.unwrap(); - if donor_h.par_iter().any(|h| { - (h.distance(acceptor.atom()) <= h_vdw + acceptor_vdw + vdw_comp_factor) - & (donor.atom().angle(h, acceptor.atom()) >= 130.0) - }) { - Some(Interaction::WeakHydrogenBond) - } else if donor.atom().distance(acceptor.atom()) <= HYDROGEN_BOND_POLAR_DIST { + // Hydrogen bonds are stricter as they have angle restrictions + let acceptor_vdw: f64 = acceptor + .atom() + .element() + .unwrap() + .atomic_radius() + .van_der_waals + .unwrap(); + let h_vdw: f64 = Element::H.atomic_radius().van_der_waals.unwrap(); + if donor_h.par_iter().any(|h| { + (h.distance(acceptor.atom()) <= h_vdw + acceptor_vdw + vdw_comp_factor) + & (donor.atom().angle(h, acceptor.atom()) >= 130.0) + }) { + return Some(Interaction::WeakHydrogenBond); + } + } else if da_dist <= POLAR_DIST { // Polar interactions are more relaxed as only distance is checked - Some(Interaction::WeakPolarContact) - } else { - None + return Some(Interaction::WeakPolarContact); } - } else { - None } + None } /// Determine if the two entities are a valid hydrogen bond donor-acceptor pair From 3d976bbd7cc6b7008f872513375719c1f854a7b1 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Thu, 22 Aug 2024 12:13:05 +0800 Subject: [PATCH 03/12] refactor(contacts): reduce the amount of else None returns --- src/interactions/aromatic.rs | 7 ++----- src/interactions/ionic.rs | 14 ++++---------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/interactions/aromatic.rs b/src/interactions/aromatic.rs index f544d17..3c765b4 100644 --- a/src/interactions/aromatic.rs +++ b/src/interactions/aromatic.rs @@ -19,13 +19,10 @@ pub fn find_cation_pi(ring: &Ring, entity: &AtomConformerResidueChainModel) -> O let theta = point_ring_angle(ring, &atom_point); if (theta <= CATION_PI_ANGLE_THRESHOLD) & (dist <= CATION_PI_DIST_THRESHOLD) { - Some(Interaction::CationPi) - } else { - None + return Some(Interaction::CationPi); } - } else { - None } + None } /// Identify pi-pi interactions using the classification by [Chakrabarti and Bhattacharyya (2007)](https://doi.org/10.1016/j.pbiomolbio.2007.03.016), Fig. 11. diff --git a/src/interactions/ionic.rs b/src/interactions/ionic.rs index a733eff..f8fe39f 100644 --- a/src/interactions/ionic.rs +++ b/src/interactions/ionic.rs @@ -15,13 +15,10 @@ pub fn find_ionic_bond( if let Some((pos, neg)) = is_ionic_bond_pair(entity1, entity2) { if pos.atom().distance(neg.atom()) <= IONIC_BOND_DIST { // Polar interactions are more relaxed as only distance is checked - Some(Interaction::IonicBond) - } else { - None + return Some(Interaction::IonicBond); } - } else { - None } + None } /// Search for like charges that repel each other. @@ -31,13 +28,10 @@ pub fn find_ionic_repulsion( ) -> Option { if let Some((hier1, hier2)) = is_same_charge_pair(entity1, entity2) { if hier1.atom().distance(hier2.atom()) <= IONIC_BOND_DIST { - Some(Interaction::IonicRepulsion) - } else { - None + return Some(Interaction::IonicRepulsion); } - } else { - None } + None } /// Check if the two entities are a pair of ionizables. From 772811dc911fa6b250747a2093abc8b7626977ff Mon Sep 17 00:00:00 2001 From: y1zhou Date: Mon, 2 Sep 2024 15:42:30 +0800 Subject: [PATCH 04/12] fix(hbond): do not skip polar contact id when hbonds are not satisfied --- src/interactions/hbond.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/interactions/hbond.rs b/src/interactions/hbond.rs index 5615e24..73f3f8f 100644 --- a/src/interactions/hbond.rs +++ b/src/interactions/hbond.rs @@ -19,7 +19,7 @@ const POLAR_DIST: f64 = 3.5; /// /// where `vdw_comp_factor` is the compensation factor for VdW radii dependent interactions. /// If the conditions are met, it returns [`Interaction::HydrogenBond`]. -/// If not, it checks for [`Interaction::PolarContact`] by: dist(donor, acceptor) <= [`HYDROGEN_BOND_POLAR_DIST`]. +/// If not, it checks for [`Interaction::PolarContact`] by: dist(donor, acceptor) <= [`POLAR_DIST`]. /// /// For further details, see: /// @@ -56,7 +56,8 @@ pub fn find_hydrogen_bond( }) { return Some(Interaction::HydrogenBond); } - } else if da_dist <= POLAR_DIST { + } + if da_dist <= POLAR_DIST { // Polar interactions are more relaxed as only distance is checked return Some(Interaction::PolarContact); } @@ -99,7 +100,8 @@ pub fn find_weak_hydrogen_bond( }) { return Some(Interaction::WeakHydrogenBond); } - } else if da_dist <= POLAR_DIST { + } + if da_dist <= POLAR_DIST { // Polar interactions are more relaxed as only distance is checked return Some(Interaction::WeakPolarContact); } From bcc5563b9bd9e29bd32441afc3b80a984d2ecce5 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Mon, 23 Sep 2024 16:36:11 +0800 Subject: [PATCH 05/12] feat(io): support parquet, json, and ndjson output formats --- Cargo.toml | 9 ++++++--- src/cli/contacts.rs | 12 ++++++++--- src/cli/sasa.rs | 11 +++++++--- src/utils.rs | 49 ++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fcf080d..2d4f4b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "arpeggia" -version = "0.3.1" +version = "0.3.2" description = "A port of the Arpeggio library for Rust" edition = "2021" authors = ["Yi Zhou "] @@ -10,8 +10,11 @@ authors = ["Yi Zhou "] [dependencies] clap = { version = "4.5.4", features = ["derive"] } nalgebra = "0.32.5" -pdbtbx = { version = "0.11.0", features = ["rayon", "rstar"], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" } -polars = { version = "0.39.2", features = ["lazy"] } +pdbtbx = { version = "0.11.0", features = [ + "rayon", + "rstar", +], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" } +polars = { version = "0.43.1", features = ["lazy", "parquet", "json"] } rayon = "1.10.0" rstar = "0.12.0" rust-sasa = "0.2.2" diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index 4940b8a..f2e6e22 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -1,5 +1,5 @@ use crate::interactions::{InteractionComplex, Interactions, ResultEntry}; -use crate::utils::{load_model, write_df_to_csv}; +use crate::utils::{load_model, write_df_to_file, DataFrameFileType}; use clap::Parser; use pdbtbx::*; use polars::prelude::*; @@ -23,6 +23,10 @@ pub(crate) struct Args { #[arg(short, long)] groups: String, + /// Output file type + #[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)] + output_format: DataFrameFileType, + /// Compensation factor for VdW radii dependent interaction types #[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)] vdw_comp: f64, @@ -83,7 +87,9 @@ pub(crate) fn run(args: &Args) { // Prepare output directory let output_path = Path::new(&args.output).canonicalize().unwrap(); let _ = std::fs::create_dir_all(output_path.clone()); - let output_file = output_path.join("contacts.csv"); + let output_file = output_path + .join("contacts.csv") + .with_extension(args.output_format.to_string()); let output_file_str = output_file.to_str().unwrap(); debug!("Results will be saved to {output_file_str}"); @@ -112,7 +118,7 @@ pub(crate) fn run(args: &Args) { .unwrap(); // Save res to CSV files - write_df_to_csv(&mut df_contacts, output_file); + write_df_to_file(&mut df_contacts, output_file, args.output_format); } pub fn get_contacts<'a>( diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs index e77fd04..c7dcff0 100644 --- a/src/cli/sasa.rs +++ b/src/cli/sasa.rs @@ -1,6 +1,6 @@ use crate::interactions::InteractingEntity; use crate::residues::ResidueExt; -use crate::utils::{load_model, write_df_to_csv}; +use crate::utils::{load_model, write_df_to_file, DataFrameFileType}; use clap::Parser; use pdbtbx::*; use polars::prelude::*; @@ -20,6 +20,10 @@ pub(crate) struct Args { #[arg(short, long)] output: PathBuf, + /// Output file type + #[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)] + output_format: DataFrameFileType, + /// Probe radius r (smaller r detects more surface details and reports a larger surface) #[arg(short = 'r', long = "probe-radius", default_value_t = 1.4)] probe_radius: f32, @@ -65,7 +69,8 @@ pub(crate) fn run(args: &Args) { let output_file = match output_path.is_dir() { true => output_path.join("sasa.csv"), false => output_path, - }; + } + .with_extension(args.output_format.to_string()); let output_file_str = output_file.to_str().unwrap(); debug!("Results will be saved to {output_file_str}"); @@ -80,7 +85,7 @@ pub(crate) fn run(args: &Args) { ); // Save res to CSV files - write_df_to_csv(&mut df_sasa, output_file); + write_df_to_file(&mut df_sasa, output_file, args.output_format); } pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame { diff --git a/src/utils.rs b/src/utils.rs index 399f939..ea4fa2f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -66,9 +66,52 @@ pub fn parse_groups( } /// Write a DataFrame to a CSV file -pub(crate) fn write_df_to_csv(df: &mut DataFrame, file_path: PathBuf) { - let mut file = std::fs::File::create(file_path).unwrap(); - CsvWriter::new(&mut file).finish(df).unwrap(); +pub(crate) fn write_df_to_file( + df: &mut DataFrame, + file_path: PathBuf, + file_type: DataFrameFileType, +) { + let file_suffix = file_type.to_string(); + let mut file = std::fs::File::create(file_path.with_extension(file_suffix)).unwrap(); + match file_type { + DataFrameFileType::Csv => { + CsvWriter::new(&mut file).finish(df).unwrap(); + } + DataFrameFileType::Parquet => { + ParquetWriter::new(&mut file).finish(df).unwrap(); + } + DataFrameFileType::Json => { + JsonWriter::new(&mut file) + .with_json_format(JsonFormat::Json) + .finish(df) + .unwrap(); + } + DataFrameFileType::NDJson => { + JsonWriter::new(&mut file) + .with_json_format(JsonFormat::JsonLines) + .finish(df) + .unwrap(); + } + } +} + +#[derive(clap::ValueEnum, Clone, Debug, Copy)] +pub enum DataFrameFileType { + Csv, + Parquet, + Json, + NDJson, +} + +impl std::fmt::Display for DataFrameFileType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + DataFrameFileType::Csv => write!(f, "csv"), + DataFrameFileType::Parquet => write!(f, "parquet"), + DataFrameFileType::Json => write!(f, "json"), + DataFrameFileType::NDJson => write!(f, "ndjson"), + } + } } #[cfg(test)] From 38f3c09e4d3fe95b158ba3fa9473e178eaa7e47e Mon Sep 17 00:00:00 2001 From: y1zhou Date: Mon, 23 Sep 2024 16:52:13 +0800 Subject: [PATCH 06/12] perf(io): no need to use f64 and i64 for saving results --- src/cli/contacts.rs | 10 +++++----- src/cli/sasa.rs | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index f2e6e22..122da1b 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -157,19 +157,19 @@ pub fn get_contacts<'a>( fn results_to_df(res: &[ResultEntry]) -> DataFrame { df!( "interaction" => res.iter().map(|x| x.interaction.to_string()).collect::>(), - "distance" => res.iter().map(|x| x.distance).collect::>(), + "distance" => res.iter().map(|x| x.distance as f32).collect::>(), "from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::>(), "from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::>(), - "from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::>(), + "from_resi" => res.iter().map(|x| x.ligand.resi as i32).collect::>(), "from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::>(), "from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::>(), - "from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::>(), + "from_atomi" => res.iter().map(|x| x.ligand.atomi as i32).collect::>(), "to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::>(), "to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::>(), - "to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::>(), + "to_resi" => res.iter().map(|x| x.receptor.resi as i32).collect::>(), "to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::>(), "to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::>(), - "to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::>(), + "to_atomi" => res.iter().map(|x| x.receptor.atomi as i32).collect::>(), ) .unwrap() } diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs index c7dcff0..67e29ac 100644 --- a/src/cli/sasa.rs +++ b/src/cli/sasa.rs @@ -123,15 +123,15 @@ pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame let atom_annot_df = df!( "chain" => atom_annotations.iter().map(|x| x.chain.to_owned()).collect::>(), "resn" => atom_annotations.iter().map(|x| x.resn.to_owned()).collect::>(), - "resi" => atom_annotations.iter().map(|x| x.resi as i64).collect::>(), + "resi" => atom_annotations.iter().map(|x| x.resi as i32).collect::>(), "altloc" => atom_annotations.iter().map(|x| x.altloc.to_owned()).collect::>(), "atomn" => atom_annotations.iter().map(|x| x.atomn.to_owned()).collect::>(), - "atomi" => atom_annotations.iter().map(|x| x.atomi as i64).collect::>(), + "atomi" => atom_annotations.iter().map(|x| x.atomi as i32).collect::>(), ) .unwrap(); df!( - "atomi" => atoms.iter().map(|x| x.id as i64).collect::>(), + "atomi" => atoms.iter().map(|x| x.id as i32).collect::>(), "sasa" => atom_sasa ) .unwrap() From 4a4e816be79cbd58dcb09bb1dc9c8dfdc1484227 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Wed, 16 Oct 2024 14:18:30 +0800 Subject: [PATCH 07/12] feat(cli): allow renaming output file of contacts and sasa --- src/cli/contacts.rs | 6 +++++- src/cli/sasa.rs | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index 122da1b..7d746d4 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -23,6 +23,10 @@ pub(crate) struct Args { #[arg(short, long)] groups: String, + /// Name of the output file + #[arg(short, long, default_value_t = String::from("contacts"))] + name: String, + /// Output file type #[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)] output_format: DataFrameFileType, @@ -88,7 +92,7 @@ pub(crate) fn run(args: &Args) { let output_path = Path::new(&args.output).canonicalize().unwrap(); let _ = std::fs::create_dir_all(output_path.clone()); let output_file = output_path - .join("contacts.csv") + .join(args.name.clone()) .with_extension(args.output_format.to_string()); let output_file_str = output_file.to_str().unwrap(); diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs index 67e29ac..1b74eb3 100644 --- a/src/cli/sasa.rs +++ b/src/cli/sasa.rs @@ -16,10 +16,14 @@ pub(crate) struct Args { #[arg(short, long)] input: PathBuf, - /// Output CSV file path + /// Output directory #[arg(short, long)] output: PathBuf, + /// Name of the output file + #[arg(short, long, default_value_t = String::from("sasa"))] + name: String, + /// Output file type #[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)] output_format: DataFrameFileType, @@ -67,7 +71,7 @@ pub(crate) fn run(args: &Args) { let output_path = Path::new(&args.output).canonicalize().unwrap(); let _ = std::fs::create_dir_all(output_path.clone()); let output_file = match output_path.is_dir() { - true => output_path.join("sasa.csv"), + true => output_path.join(args.name.clone()), false => output_path, } .with_extension(args.output_format.to_string()); From 7121b575698f4a9411f49a7783cb1921df910ec3 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Thu, 14 Nov 2024 17:45:32 +0800 Subject: [PATCH 08/12] feat(cli): modify logging levels to be less verbose by default --- src/cli/contacts.rs | 11 +++++------ src/cli/sasa.rs | 9 ++++----- src/utils.rs | 8 ++------ 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index 7d746d4..cacf883 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -95,11 +95,8 @@ pub(crate) fn run(args: &Args) { .join(args.name.clone()) .with_extension(args.output_format.to_string()); - let output_file_str = output_file.to_str().unwrap(); - debug!("Results will be saved to {output_file_str}"); - // Save results and log the identified interactions - info!( + debug!( "Found {} atom-atom contacts\n{}", df_atomic.shape().0, df_atomic @@ -113,7 +110,7 @@ pub(crate) fn run(args: &Args) { if df_clash.height() > 0 { warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash); } - info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring); + debug!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring); // Concate dataframes for saving to CSV let mut df_contacts = concat([df_atomic.lazy(), df_ring.lazy()], UnionArgs::default()) @@ -122,7 +119,9 @@ pub(crate) fn run(args: &Args) { .unwrap(); // Save res to CSV files - write_df_to_file(&mut df_contacts, output_file, args.output_format); + write_df_to_file(&mut df_contacts, &output_file, args.output_format); + let output_file_str = output_file.to_str().unwrap(); + info!("Results saved to {output_file_str}"); } pub fn get_contacts<'a>( diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs index 1b74eb3..39509d9 100644 --- a/src/cli/sasa.rs +++ b/src/cli/sasa.rs @@ -76,20 +76,19 @@ pub(crate) fn run(args: &Args) { } .with_extension(args.output_format.to_string()); - let output_file_str = output_file.to_str().unwrap(); - debug!("Results will be saved to {output_file_str}"); - // Save results and log the identified SASA let non_zero_sasa_mask = df_sasa.column("sasa").unwrap().not_equal(0.0).unwrap(); let df_sasa_nonzero = df_sasa.filter(&non_zero_sasa_mask).unwrap(); - info!( + debug!( "Found {} atoms with non-zero SASA\n{}", df_sasa_nonzero.shape().0, df_sasa_nonzero ); // Save res to CSV files - write_df_to_file(&mut df_sasa, output_file, args.output_format); + write_df_to_file(&mut df_sasa, &output_file, args.output_format); + let output_file_str = output_file.to_str().unwrap(); + info!("Results saved to {output_file_str}"); } pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame { diff --git a/src/utils.rs b/src/utils.rs index ea4fa2f..ed6b277 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,7 +1,7 @@ use crate::residues::ResidueExt; use pdbtbx::*; use polars::prelude::*; -use std::{collections::HashSet, path::PathBuf}; +use std::{collections::HashSet, path::Path}; /// Open an atomic data file with [`pdbtbx::open`] and remove non-protein residues. pub fn load_model(input_file: &String) -> (PDB, Vec) { @@ -66,11 +66,7 @@ pub fn parse_groups( } /// Write a DataFrame to a CSV file -pub(crate) fn write_df_to_file( - df: &mut DataFrame, - file_path: PathBuf, - file_type: DataFrameFileType, -) { +pub(crate) fn write_df_to_file(df: &mut DataFrame, file_path: &Path, file_type: DataFrameFileType) { let file_suffix = file_type.to_string(); let mut file = std::fs::File::create(file_path.with_extension(file_suffix)).unwrap(); match file_type { From 58a2f7194ba5f75a2d523252b59b91a32e84b476 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Thu, 14 Nov 2024 17:47:25 +0800 Subject: [PATCH 09/12] fix(ring): skip ring calculation if atoms are missing --- src/cli/contacts.rs | 7 +++- src/interactions/complex.rs | 70 ++++++++++++++++++++++--------------- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index cacf883..b712b98 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -130,7 +130,12 @@ pub fn get_contacts<'a>( vdw_comp: f64, dist_cutoff: f64, ) -> (DataFrame, DataFrame, InteractionComplex<'a>) { - let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff); + let (i_complex, build_ring_err) = + InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff).unwrap(); + + if !build_ring_err.is_empty() { + build_ring_err.iter().for_each(|e| warn!("{e}")); + } // Find interactions let atomic_contacts = i_complex.get_atomic_contacts(); diff --git a/src/interactions/complex.rs b/src/interactions/complex.rs index 4000255..c1c3d89 100644 --- a/src/interactions/complex.rs +++ b/src/interactions/complex.rs @@ -11,6 +11,8 @@ use pdbtbx::*; use rayon::prelude::*; use std::collections::{HashMap, HashSet}; +type RingPositionResult<'a> = Result<(HashMap, Ring>, Vec), Vec>; + /// The workhorse struct for identifying interactions in the model pub struct InteractionComplex<'a> { /// All information present in the atomic model @@ -36,7 +38,7 @@ impl<'a> InteractionComplex<'a> { groups: &'a str, vdw_comp_factor: f64, interacting_threshold: f64, - ) -> Self { + ) -> Result<(Self, Vec), Vec> { // Parse all chains and input chain groups let all_chains: HashSet = model.par_chains().map(|c| c.id().to_string()).collect(); let (ligand, receptor) = parse_groups(&all_chains, groups); @@ -45,17 +47,20 @@ impl<'a> InteractionComplex<'a> { let res2idx = build_residue_index(model); // Build a mapping of ring residue names to ring centers and normals - let rings = build_ring_positions(model); - - Self { - model, - ligand, - receptor, - vdw_comp_factor, - interacting_threshold, - res2idx, - rings, - } + let (rings, ring_err) = build_ring_positions(model).expect("Error building ring positions"); + + Ok(( + Self { + model, + ligand, + receptor, + vdw_comp_factor, + interacting_threshold, + res2idx, + rings, + }, + ring_err, + )) } fn is_neighboring_hierarchy( @@ -315,22 +320,29 @@ fn build_residue_index(model: &PDB) -> HashMap { .collect::>() } -fn build_ring_positions(model: &PDB) -> HashMap { +fn build_ring_positions(model: &PDB) -> RingPositionResult { let ring_res = HashSet::from(["HIS", "PHE", "TYR", "TRP"]); - model - .chains() - .flat_map(|c| { - c.residues() - .filter(|r| ring_res.contains(r.name().unwrap())) - .map(|r| { - ( - ResidueId::from_residue(r, c.id()), - match r.ring_center_and_normal() { - Some(ring) => ring, - None => panic!("Failed to calculate ring position for residue {:?}", r), - }, - ) - }) - }) - .collect() + let mut ring_positions = HashMap::new(); + let mut errors = Vec::new(); + + for c in model.chains() { + for r in c + .residues() + .filter(|r| ring_res.contains(r.name().unwrap())) + { + let res_id = ResidueId::from_residue(r, c.id()); + match r.ring_center_and_normal() { + Some(ring) => { + ring_positions.insert(res_id, ring); + } + None => { + errors.push(format!("Failed to calculate ring position for {:?}", r)); + } + } + } + } + if ring_positions.is_empty() { + return Err(errors); + } + Ok((ring_positions, errors)) } From d8b32392182ffe62ee98fb7d0aecde867bf63bea Mon Sep 17 00:00:00 2001 From: y1zhou Date: Fri, 14 Feb 2025 17:08:10 +0800 Subject: [PATCH 10/12] fix(parser): correctly parse residue insertion codes and conformer altloc positions --- src/cli/contacts.rs | 4 +++ src/interactions/complex.rs | 65 +++++++++++++++++++++++++++++-------- src/interactions/structs.rs | 8 ++++- src/residues.rs | 45 ++++++++++++++++++------- 4 files changed, 95 insertions(+), 27 deletions(-) diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs index b712b98..3ec0aa1 100644 --- a/src/cli/contacts.rs +++ b/src/cli/contacts.rs @@ -150,9 +150,11 @@ pub fn get_contacts<'a>( [ "from_chain", "from_resi", + "from_insertion", "from_altloc", "to_chain", "to_resi", + "to_insertion", "to_altloc", ], Default::default(), @@ -169,12 +171,14 @@ fn results_to_df(res: &[ResultEntry]) -> DataFrame { "from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::>(), "from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::>(), "from_resi" => res.iter().map(|x| x.ligand.resi as i32).collect::>(), + "from_insertion" => res.iter().map(|x| x.ligand.insertion.to_owned()).collect::>(), "from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::>(), "from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::>(), "from_atomi" => res.iter().map(|x| x.ligand.atomi as i32).collect::>(), "to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::>(), "to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::>(), "to_resi" => res.iter().map(|x| x.receptor.resi as i32).collect::>(), + "to_insertion" => res.iter().map(|x| x.receptor.insertion.to_owned()).collect::>(), "to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::>(), "to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::>(), "to_atomi" => res.iter().map(|x| x.receptor.atomi as i32).collect::>(), diff --git a/src/interactions/complex.rs b/src/interactions/complex.rs index c1c3d89..cbdadb1 100644 --- a/src/interactions/complex.rs +++ b/src/interactions/complex.rs @@ -248,7 +248,15 @@ impl<'a> Interactions for InteractionComplex<'a> { let dist = point_ring_dist(ring, &y.atom().pos()); let cation_pi_contacts = find_cation_pi(ring, y).map(|intxn| ResultEntry { interaction: intxn, - ligand: InteractingEntity::new(k.chain, k.resi, k.altloc, k.resn, "Ring", 0), + ligand: InteractingEntity::new( + k.chain, + k.resi, + k.insertion, + k.altloc, + k.resn, + "Ring", + 0, + ), receptor: InteractingEntity::from_hier(y), distance: dist, }); @@ -289,10 +297,22 @@ impl<'a> Interactions for InteractionComplex<'a> { let pi_pi_contacts = find_pi_pi(ring1, ring2).map(|intxn| ResultEntry { interaction: intxn, ligand: InteractingEntity::new( - k1.chain, k1.resi, k1.altloc, k1.resn, "Ring", 0, + k1.chain, + k1.resi, + k1.insertion, + k1.altloc, + k1.resn, + "Ring", + 0, ), receptor: InteractingEntity::new( - k2.chain, k2.resi, k2.altloc, k2.resn, "Ring", 0, + k2.chain, + k2.resi, + k2.insertion, + k2.altloc, + k2.resn, + "Ring", + 0, ), distance: dist, }); @@ -311,10 +331,21 @@ fn build_residue_index(model: &PDB) -> HashMap { model .chains() .flat_map(|c| { - c.residues().enumerate().map(move |(i, residue)| { - let res_id = ResidueId::from_residue(residue, c.id()); - - (res_id, i) + c.residues().enumerate().flat_map(move |(i, residue)| { + // All conformers in the same residue should share the same index + let (resi, insertion) = residue.id(); + let insertion = insertion.unwrap_or(""); + let resn = residue.name().unwrap_or(""); + + residue + .conformers() + .map(move |conformer| { + let res_id = + ResidueId::from_conformer(conformer, c.id(), resi, insertion, resn); + + (res_id, i) + }) + .collect::>() }) }) .collect::>() @@ -330,13 +361,19 @@ fn build_ring_positions(model: &PDB) -> RingPositionResult { .residues() .filter(|r| ring_res.contains(r.name().unwrap())) { - let res_id = ResidueId::from_residue(r, c.id()); - match r.ring_center_and_normal() { - Some(ring) => { - ring_positions.insert(res_id, ring); - } - None => { - errors.push(format!("Failed to calculate ring position for {:?}", r)); + let (resi, insertion_code) = r.id(); + let insertion_code = insertion_code.unwrap_or(""); + let resn = r.name().unwrap_or(""); + for conformer in r.conformers() { + let res_id = + ResidueId::from_conformer(conformer, c.id(), resi, insertion_code, resn); + match r.ring_center_and_normal(None) { + Some(ring) => { + ring_positions.insert(res_id, ring); + } + None => { + errors.push(format!("Failed to calculate ring position for {:?}", r)); + } } } } diff --git a/src/interactions/structs.rs b/src/interactions/structs.rs index 38986e1..45da7bb 100644 --- a/src/interactions/structs.rs +++ b/src/interactions/structs.rs @@ -58,6 +58,8 @@ pub struct InteractingEntity { pub resn: String, /// Residue index pub resi: isize, + /// residue insertion code + pub insertion: String, /// Alternate location identifier pub altloc: String, /// Atom name @@ -83,6 +85,7 @@ impl InteractingEntity { pub fn new( chain: &str, resi: isize, + insertion: &str, altloc: &str, resn: &str, atomn: &str, @@ -91,6 +94,7 @@ impl InteractingEntity { Self { chain: chain.to_string(), resi, + insertion: insertion.to_string(), altloc: altloc.to_string(), resn: resn.to_string(), atomn: atomn.to_string(), @@ -102,6 +106,7 @@ impl InteractingEntity { Self::new( hierarchy.chain().id(), hierarchy.residue().serial_number(), + hierarchy.residue().insertion_code().unwrap_or(""), hierarchy.conformer().alternative_location().unwrap_or(""), hierarchy.residue().name().unwrap(), hierarchy.atom().name(), @@ -114,10 +119,11 @@ impl fmt::Display for InteractingEntity { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "Chain {chain}, Residue {resn} {resi}{altloc}, Atom {atom_name} {atom_idx}", + "Chain {chain}, Residue {resn} {resi}{insertion} {altloc}, Atom {atom_name} {atom_idx}", chain = self.chain, resn = self.resn, resi = self.resi, + insertion = self.insertion, altloc = self.altloc, atom_name = self.atomn, atom_idx = self.atomi diff --git a/src/residues.rs b/src/residues.rs index 59f3e21..4118c7e 100644 --- a/src/residues.rs +++ b/src/residues.rs @@ -11,6 +11,8 @@ pub struct ResidueId<'a> { pub resi: isize, /// Alternate location identifier pub altloc: &'a str, + /// Residue insertion code + pub insertion: &'a str, /// Residue name pub resn: &'a str, } @@ -22,11 +24,18 @@ pub struct Ring { } impl<'a> ResidueId<'a> { - pub fn new(chain: &'a str, resi: isize, altloc: &'a str, resn: &'a str) -> Self { + pub fn new( + chain: &'a str, + resi: isize, + altloc: &'a str, + insertion: &'a str, + resn: &'a str, + ) -> Self { Self { chain, resi, altloc, + insertion, resn, } } @@ -34,20 +43,26 @@ impl<'a> ResidueId<'a> { /// Helper function to convert an [`pdbtbx::AtomConformerResidueChainModel`] to a residue identifier pub fn from_hier(hier: &'a AtomConformerResidueChainModel) -> Self { let (resi, insertion) = hier.residue().id(); - let altloc = insertion.unwrap_or(""); + let altloc = hier.conformer().alternative_location(); Self::new( hier.chain().id(), resi, - altloc, + altloc.unwrap_or(""), + insertion.unwrap_or(""), hier.residue().name().unwrap_or(""), ) } - /// Helper function to convert an [`pdbtbx::Residue`] to a residue identifier - pub fn from_residue(residue: &'a Residue, chain_id: &'a str) -> Self { - let (resi, insertion) = residue.id(); - let altloc = insertion.unwrap_or(""); - Self::new(chain_id, resi, altloc, residue.name().unwrap_or("")) + /// Helper function to convert an [`pdbtbx::Conformer`] to a residue identifier + pub fn from_conformer( + conformer: &'a Conformer, + chain_id: &'a str, + resi: isize, + insertion: &'a str, + resn: &'a str, + ) -> Self { + let altloc = conformer.alternative_location(); + Self::new(chain_id, resi, altloc.unwrap_or(""), insertion, resn) } } @@ -56,8 +71,13 @@ pub trait ResidueExt { fn resn(&self) -> Option<&str>; /// Return the atoms in the aromatic ring of the residue. fn ring_atoms(&self) -> Vec<&Atom>; - /// Return the center and normal of the aromatic ring of the residue. - fn ring_center_and_normal(&self) -> Option; + + // TODO: Implement this + /// Return the atoms that form a plane in the side chain. + // fn sc_plane_atoms(&self) -> Vec<&Atom>; + + /// Return the center and normal of the given atoms. + fn ring_center_and_normal(&self, ring_atoms: Option>) -> Option; } impl ResidueExt for Residue { @@ -118,8 +138,9 @@ impl ResidueExt for Residue { } } - fn ring_center_and_normal(&self) -> Option { - let ring_atoms = self.ring_atoms(); + fn ring_center_and_normal(&self, ring_atoms: Option>) -> Option { + let ring_atoms = ring_atoms.unwrap_or(self.ring_atoms()); + if ring_atoms.is_empty() { return None; } From 4f7c13f72875506ce3e528d82f930e54be848607 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Mon, 17 Feb 2025 10:54:39 +0800 Subject: [PATCH 11/12] build(cargo): update dependencies --- Cargo.toml | 25 +++++++++++++------------ src/cli/sasa.rs | 8 +++++++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2d4f4b6..5d8e3a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "arpeggia" -version = "0.3.2" +version = "0.4.0" description = "A port of the Arpeggio library for Rust" edition = "2021" authors = ["Yi Zhou "] @@ -8,15 +8,16 @@ authors = ["Yi Zhou "] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.5.4", features = ["derive"] } -nalgebra = "0.32.5" -pdbtbx = { version = "0.11.0", features = [ - "rayon", - "rstar", -], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" } -polars = { version = "0.43.1", features = ["lazy", "parquet", "json"] } +clap = { version = "4.5.29", features = ["derive"] } +nalgebra = "0.33.2" +# pdbtbx = { version = "0.11.0", features = [ +# "rayon", +# "rstar", +# ], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" } +pdbtbx = { version = "0.12.0", features = ["rayon", "rstar"] } +polars = { version = "0.46.0", features = ["lazy", "parquet", "json"] } rayon = "1.10.0" -rstar = "0.12.0" -rust-sasa = "0.2.2" -tracing = "0.1.40" -tracing-subscriber = "0.3.18" +rstar = "0.12.2" +rust-sasa = "0.2.4" +tracing = "0.1.41" +tracing-subscriber = "0.3.19" diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs index 39509d9..3958746 100644 --- a/src/cli/sasa.rs +++ b/src/cli/sasa.rs @@ -77,7 +77,12 @@ pub(crate) fn run(args: &Args) { .with_extension(args.output_format.to_string()); // Save results and log the identified SASA - let non_zero_sasa_mask = df_sasa.column("sasa").unwrap().not_equal(0.0).unwrap(); + let non_zero_sasa_mask = df_sasa + .column("sasa") + .unwrap() + .as_materialized_series() + .not_equal(0.0) + .unwrap(); let df_sasa_nonzero = df_sasa.filter(&non_zero_sasa_mask).unwrap(); debug!( "Found {} atoms with non-zero SASA\n{}", @@ -143,6 +148,7 @@ pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame ["atomi"], ["atomi"], JoinArgs::new(JoinType::Inner), + None, ) .unwrap() .sort(["chain", "resi", "altloc", "atomi"], Default::default()) From d910d0172ad4c77959773e7b6db9ad617398cc99 Mon Sep 17 00:00:00 2001 From: y1zhou Date: Mon, 17 Feb 2025 10:56:37 +0800 Subject: [PATCH 12/12] docs: changelog for v0.4.0 release --- CHANGELOG.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e89028..024ac86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,19 +5,26 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.3.2] +## [0.4.0] - 2025-02-17 ### Added - Detection of repulsion between like charges +- Support of parquet, json, and ndjson output formats +- Added a `-name` flag in the CLI to rename the output file of `contacts` and `sasa` commands ### Fixed +- Checks for polar contacts were skipped when hydrogen bond criteria are not satisfied - Better error messages when rings have missing atoms for finding the center and normal vector +- Nomenclature mix of residue insertion codes and alternative locations; the two are now stored under separate columns (`*_insertion` and `*_altloc`) in the output files ### Changed - Distance cutoff for T-shaped Pi-stacking lowered from 6Å to 5Å +- Added hydrogen bond distance check to better differentiate Hbonds and polar contacts +- Performance/memory footprint improvement by switching from 64-bit numbers to 32-bit +- Logging is now less verbose ## [0.3.1] @@ -71,8 +78,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release - Detection of common protein-protein interactions in a PDB or mmCIF file -[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.2...HEAD -[0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.2 +[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.4.0...HEAD +[0.4.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.4.0 [0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.1 [0.3.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0 [0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.2.0