Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support ionic repulsion, parquet output, rename out file, and lots of bugfixes #10

Merged
merged 12 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.4.0] - 2025-02-17

### Added

- PLACEHOLDER
- Detection of repulsion between like charges
- Support of parquet, json, and ndjson output formats
- Added a `-name` flag in the CLI to rename the output file of `contacts` and `sasa` commands

### Fixed

- PLACEHOLDER
- Checks for polar contacts were skipped when hydrogen bond criteria are not satisfied
- Better error messages when rings have missing atoms for finding the center and normal vector
- Nomenclature mix of residue insertion codes and alternative locations; the two are now stored under separate columns (`*_insertion` and `*_altloc`) in the output files

### Changed

- PLACEHOLDER

### Removed

- PLACEHOLDER
- Distance cutoff for T-shaped Pi-stacking lowered from 6Å to 5Å
- Added hydrogen bond distance check to better differentiate Hbonds and polar contacts
- Performance/memory footprint improvement by switching from 64-bit numbers to 32-bit
- Logging is now less verbose


## [0.3.1]
Expand Down Expand Up @@ -75,7 +78,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial release
- Detection of common protein-protein interactions in a PDB or mmCIF file

[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.1...HEAD
[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.4.0...HEAD
[0.4.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.4.0
[0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.1
[0.3.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0
[0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.2.0
Expand Down
22 changes: 13 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
[package]
name = "arpeggia"
version = "0.3.1"
version = "0.4.0"
description = "A port of the Arpeggio library for Rust"
edition = "2021"
authors = ["Yi Zhou <[email protected]>"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "4.5.4", features = ["derive"] }
nalgebra = "0.32.5"
pdbtbx = { version = "0.11.0", features = ["rayon", "rstar"], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" }
polars = { version = "0.39.2", features = ["lazy"] }
clap = { version = "4.5.29", features = ["derive"] }
nalgebra = "0.33.2"
# pdbtbx = { version = "0.11.0", features = [
# "rayon",
# "rstar",
# ], git = "https://github.com/douweschulte/pdbtbx", rev = "1298c60e8573efbc3f7c77a9a2d6f0656fa8c638" }
pdbtbx = { version = "0.12.0", features = ["rayon", "rstar"] }
polars = { version = "0.46.0", features = ["lazy", "parquet", "json"] }
rayon = "1.10.0"
rstar = "0.12.0"
rust-sasa = "0.2.2"
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
rstar = "0.12.2"
rust-sasa = "0.2.4"
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
46 changes: 32 additions & 14 deletions src/cli/contacts.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::interactions::{InteractionComplex, Interactions, ResultEntry};
use crate::utils::{load_model, write_df_to_csv};
use crate::utils::{load_model, write_df_to_file, DataFrameFileType};
use clap::Parser;
use pdbtbx::*;
use polars::prelude::*;
Expand All @@ -23,6 +23,14 @@ pub(crate) struct Args {
#[arg(short, long)]
groups: String,

/// Name of the output file
#[arg(short, long, default_value_t = String::from("contacts"))]
name: String,

/// Output file type
#[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)]
output_format: DataFrameFileType,

/// Compensation factor for VdW radii dependent interaction types
#[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)]
vdw_comp: f64,
Expand Down Expand Up @@ -83,13 +91,12 @@ pub(crate) fn run(args: &Args) {
// Prepare output directory
let output_path = Path::new(&args.output).canonicalize().unwrap();
let _ = std::fs::create_dir_all(output_path.clone());
let output_file = output_path.join("contacts.csv");

let output_file_str = output_file.to_str().unwrap();
debug!("Results will be saved to {output_file_str}");
let output_file = output_path
.join(args.name.clone())
.with_extension(args.output_format.to_string());

// Save results and log the identified interactions
info!(
debug!(
"Found {} atom-atom contacts\n{}",
df_atomic.shape().0,
df_atomic
Expand All @@ -103,7 +110,7 @@ pub(crate) fn run(args: &Args) {
if df_clash.height() > 0 {
warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash);
}
info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring);
debug!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring);

// Concate dataframes for saving to CSV
let mut df_contacts = concat([df_atomic.lazy(), df_ring.lazy()], UnionArgs::default())
Expand All @@ -112,7 +119,9 @@ pub(crate) fn run(args: &Args) {
.unwrap();

// Save res to CSV files
write_df_to_csv(&mut df_contacts, output_file);
write_df_to_file(&mut df_contacts, &output_file, args.output_format);
let output_file_str = output_file.to_str().unwrap();
info!("Results saved to {output_file_str}");
}

pub fn get_contacts<'a>(
Expand All @@ -121,7 +130,12 @@ pub fn get_contacts<'a>(
vdw_comp: f64,
dist_cutoff: f64,
) -> (DataFrame, DataFrame, InteractionComplex<'a>) {
let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff);
let (i_complex, build_ring_err) =
InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff).unwrap();

if !build_ring_err.is_empty() {
build_ring_err.iter().for_each(|e| warn!("{e}"));
}

// Find interactions
let atomic_contacts = i_complex.get_atomic_contacts();
Expand All @@ -136,9 +150,11 @@ pub fn get_contacts<'a>(
[
"from_chain",
"from_resi",
"from_insertion",
"from_altloc",
"to_chain",
"to_resi",
"to_insertion",
"to_altloc",
],
Default::default(),
Expand All @@ -151,19 +167,21 @@ pub fn get_contacts<'a>(
fn results_to_df(res: &[ResultEntry]) -> DataFrame {
df!(
"interaction" => res.iter().map(|x| x.interaction.to_string()).collect::<Vec<String>>(),
"distance" => res.iter().map(|x| x.distance).collect::<Vec<f64>>(),
"distance" => res.iter().map(|x| x.distance as f32).collect::<Vec<f32>>(),
"from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::<Vec<String>>(),
"from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::<Vec<String>>(),
"from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::<Vec<i64>>(),
"from_resi" => res.iter().map(|x| x.ligand.resi as i32).collect::<Vec<i32>>(),
"from_insertion" => res.iter().map(|x| x.ligand.insertion.to_owned()).collect::<Vec<String>>(),
"from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::<Vec<String>>(),
"from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::<Vec<String>>(),
"from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::<Vec<i64>>(),
"from_atomi" => res.iter().map(|x| x.ligand.atomi as i32).collect::<Vec<i32>>(),
"to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::<Vec<String>>(),
"to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::<Vec<String>>(),
"to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::<Vec<i64>>(),
"to_resi" => res.iter().map(|x| x.receptor.resi as i32).collect::<Vec<i32>>(),
"to_insertion" => res.iter().map(|x| x.receptor.insertion.to_owned()).collect::<Vec<String>>(),
"to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::<Vec<String>>(),
"to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::<Vec<String>>(),
"to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::<Vec<i64>>(),
"to_atomi" => res.iter().map(|x| x.receptor.atomi as i32).collect::<Vec<i32>>(),
)
.unwrap()
}
40 changes: 27 additions & 13 deletions src/cli/sasa.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::interactions::InteractingEntity;
use crate::residues::ResidueExt;
use crate::utils::{load_model, write_df_to_csv};
use crate::utils::{load_model, write_df_to_file, DataFrameFileType};
use clap::Parser;
use pdbtbx::*;
use polars::prelude::*;
Expand All @@ -16,10 +16,18 @@ pub(crate) struct Args {
#[arg(short, long)]
input: PathBuf,

/// Output CSV file path
/// Output directory
#[arg(short, long)]
output: PathBuf,

/// Name of the output file
#[arg(short, long, default_value_t = String::from("sasa"))]
name: String,

/// Output file type
#[arg(short = 't', long, default_value_t = DataFrameFileType::Csv)]
output_format: DataFrameFileType,

/// Probe radius r (smaller r detects more surface details and reports a larger surface)
#[arg(short = 'r', long = "probe-radius", default_value_t = 1.4)]
probe_radius: f32,
Expand Down Expand Up @@ -63,24 +71,29 @@ pub(crate) fn run(args: &Args) {
let output_path = Path::new(&args.output).canonicalize().unwrap();
let _ = std::fs::create_dir_all(output_path.clone());
let output_file = match output_path.is_dir() {
true => output_path.join("sasa.csv"),
true => output_path.join(args.name.clone()),
false => output_path,
};

let output_file_str = output_file.to_str().unwrap();
debug!("Results will be saved to {output_file_str}");
}
.with_extension(args.output_format.to_string());

// Save results and log the identified SASA
let non_zero_sasa_mask = df_sasa.column("sasa").unwrap().not_equal(0.0).unwrap();
let non_zero_sasa_mask = df_sasa
.column("sasa")
.unwrap()
.as_materialized_series()
.not_equal(0.0)
.unwrap();
let df_sasa_nonzero = df_sasa.filter(&non_zero_sasa_mask).unwrap();
info!(
debug!(
"Found {} atoms with non-zero SASA\n{}",
df_sasa_nonzero.shape().0,
df_sasa_nonzero
);

// Save res to CSV files
write_df_to_csv(&mut df_sasa, output_file);
write_df_to_file(&mut df_sasa, &output_file, args.output_format);
let output_file_str = output_file.to_str().unwrap();
info!("Results saved to {output_file_str}");
}

pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame {
Expand Down Expand Up @@ -118,15 +131,15 @@ pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame
let atom_annot_df = df!(
"chain" => atom_annotations.iter().map(|x| x.chain.to_owned()).collect::<Vec<String>>(),
"resn" => atom_annotations.iter().map(|x| x.resn.to_owned()).collect::<Vec<String>>(),
"resi" => atom_annotations.iter().map(|x| x.resi as i64).collect::<Vec<i64>>(),
"resi" => atom_annotations.iter().map(|x| x.resi as i32).collect::<Vec<i32>>(),
"altloc" => atom_annotations.iter().map(|x| x.altloc.to_owned()).collect::<Vec<String>>(),
"atomn" => atom_annotations.iter().map(|x| x.atomn.to_owned()).collect::<Vec<String>>(),
"atomi" => atom_annotations.iter().map(|x| x.atomi as i64).collect::<Vec<i64>>(),
"atomi" => atom_annotations.iter().map(|x| x.atomi as i32).collect::<Vec<i32>>(),
)
.unwrap();

df!(
"atomi" => atoms.iter().map(|x| x.id as i64).collect::<Vec<i64>>(),
"atomi" => atoms.iter().map(|x| x.id as i32).collect::<Vec<i32>>(),
"sasa" => atom_sasa
)
.unwrap()
Expand All @@ -135,6 +148,7 @@ pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame
["atomi"],
["atomi"],
JoinArgs::new(JoinType::Inner),
None,
)
.unwrap()
.sort(["chain", "resi", "altloc", "atomi"], Default::default())
Expand Down
7 changes: 2 additions & 5 deletions src/interactions/aromatic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ pub fn find_cation_pi(ring: &Ring, entity: &AtomConformerResidueChainModel) -> O
let theta = point_ring_angle(ring, &atom_point);

if (theta <= CATION_PI_ANGLE_THRESHOLD) & (dist <= CATION_PI_DIST_THRESHOLD) {
Some(Interaction::CationPi)
} else {
None
return Some(Interaction::CationPi);
}
} else {
None
}
None
}

/// Identify pi-pi interactions using the classification by [Chakrabarti and Bhattacharyya (2007)](https://doi.org/10.1016/j.pbiomolbio.2007.03.016), Fig. 11.
Expand Down
Loading