Skip to content

Commit

Permalink
Hotfix: remove interactions between different models (#11)
Browse files Browse the repository at this point in the history
* fix(io): handle input/output paths and error messages more robustly
* fix(itxn): only consider interactions within the same model of the input file
* feat(cli): better help msgs and more sane defaults
* fix(io): correctly sort entries in the output of `contacts`
* docs: add changelog for hotfix version
  • Loading branch information
y1zhou authored Feb 19, 2025
1 parent c192255 commit 286262c
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 76 deletions.
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.4.1] - 2025-02-19

### Added

- Better error messages and flag documentations

### Fixed

- Create parent directories if the output directory does not exist
- Only interactions within the same model of the input file is considered
- Rows in the output of `contacts` are now sorted more naturally

## [0.4.0] - 2025-02-17

### Added
Expand Down Expand Up @@ -78,7 +90,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial release
- Detection of common protein-protein interactions in a PDB or mmCIF file

[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.4.0...HEAD
[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.4.1...HEAD
[0.4.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.4.1
[0.4.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.4.0
[0.3.1]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.1
[0.3.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "arpeggia"
version = "0.4.0"
version = "0.4.1"
description = "A port of the Arpeggio library for Rust"
edition = "2021"
authors = ["Yi Zhou <[email protected]>"]
Expand Down
59 changes: 36 additions & 23 deletions src/cli/contacts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ pub(crate) struct Args {

/// Group chains for interactions:
/// e.g. A,B/C,D
/// where chains A and B are the "ligand" and C and D are the "receptor"
#[arg(short, long)]
/// where chains A and B are the "ligand" and C and D are the "receptor".
/// Chains can exist on both sides, in which case intra-chain interactions will be calculated.
/// If only one group is provided, all remaining chains will be considered as the other group.
/// If no groups are provided ('/'), all inter- and intra-chain interactions will be calculated.
#[arg(short, long, default_value_t = String::from("contacts"))]
groups: String,

/// Name of the output file
Expand All @@ -36,11 +39,11 @@ pub(crate) struct Args {
vdw_comp: f64,

/// Distance cutoff when searching for neighboring atoms
#[arg(short, long, default_value_t = 4.5)]
#[arg(short, long, default_value_t = 6.5)]
dist_cutoff: f64,

/// Number of threads to use for parallel processing
#[arg(short = 'j', long = "num-threads", default_value_t = 0)]
/// Number of threads to use for parallel processing. One thread should be sufficient unless the system is very large
#[arg(short = 'j', long = "num-threads", default_value_t = 1)]
num_threads: usize,
}

Expand All @@ -55,7 +58,20 @@ pub(crate) fn run(args: &Args) {
debug!("Using {} thread(s)", rayon::current_num_threads());

// Make sure `input` exists
let input_path = Path::new(&args.input).canonicalize().unwrap();
let input_path = match Path::new(&args.input).canonicalize() {
Ok(path) => path,
Err(e) => {
error!("Failed to retrieve input file: {}", e);
return;
}
};
let output_path = match std::path::absolute(&args.output) {
Ok(path) => path,
Err(e) => {
error!("Failed to resolve the output directory: {}", e);
return;
}
};
let input_file: String = input_path.to_str().unwrap().parse().unwrap();

// Load file as complex structure
Expand Down Expand Up @@ -89,7 +105,6 @@ pub(crate) fn run(args: &Args) {
);

// Prepare output directory
let output_path = Path::new(&args.output).canonicalize().unwrap();
let _ = std::fs::create_dir_all(output_path.clone());
let output_file = output_path
.join(args.name.clone())
Expand All @@ -115,6 +130,18 @@ pub(crate) fn run(args: &Args) {
// Concate dataframes for saving to CSV
let mut df_contacts = concat([df_atomic.lazy(), df_ring.lazy()], UnionArgs::default())
.unwrap()
.sort(
[
"model",
"from_resi",
"from_altloc",
"from_atomi",
"to_resi",
"to_altloc",
"to_atomi",
],
Default::default(),
)
.collect()
.unwrap();

Expand Down Expand Up @@ -144,28 +171,14 @@ pub fn get_contacts<'a>(
let mut ring_contacts: Vec<ResultEntry> = Vec::new();
ring_contacts.extend(i_complex.get_ring_atom_contacts());
ring_contacts.extend(i_complex.get_ring_ring_contacts());
let df_ring = results_to_df(&ring_contacts)
// .drop_many(&["from_atomn", "from_atomi", "to_atomn", "to_atomi"])
.sort(
[
"from_chain",
"from_resi",
"from_insertion",
"from_altloc",
"to_chain",
"to_resi",
"to_insertion",
"to_altloc",
],
Default::default(),
)
.unwrap();
let df_ring = results_to_df(&ring_contacts);

(df_atomic, df_ring, i_complex)
}

fn results_to_df(res: &[ResultEntry]) -> DataFrame {
df!(
"model" => res.iter().map(|x| x.model as u32).collect::<Vec<u32>>(),
"interaction" => res.iter().map(|x| x.interaction.to_string()).collect::<Vec<String>>(),
"distance" => res.iter().map(|x| x.distance as f32).collect::<Vec<f32>>(),
"from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::<Vec<String>>(),
Expand Down
97 changes: 63 additions & 34 deletions src/interactions/complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl<'a> InteractionComplex<'a> {
}

fn is_neighboring_res_pair(&self, x: &ResidueId, y: &ResidueId) -> bool {
if x.chain != y.chain {
if (x.model != y.model) | (x.chain != y.chain) {
return false;
}
let x_idx = self.res2idx[x];
Expand All @@ -97,7 +97,7 @@ pub trait Interactions {
fn get_ring_ring_contacts(&self) -> Vec<ResultEntry>;
}

impl<'a> Interactions for InteractionComplex<'a> {
impl Interactions for InteractionComplex<'_> {
fn get_atomic_contacts(&self) -> Vec<ResultEntry> {
let tree = self.model.create_hierarchy_rtree();
let max_radius_squared = self.interacting_threshold * self.interacting_threshold;
Expand All @@ -114,6 +114,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
})
.flat_map(|x| {
tree.locate_within_distance(x.atom().pos(), max_radius_squared)
.filter(|y| x.model().serial_number() == y.model().serial_number())
.filter(|y| {
self.receptor.contains(y.chain().id())
& (y.atom().element().unwrap() != &Element::H)
Expand All @@ -135,9 +136,11 @@ impl<'a> Interactions for InteractionComplex<'a> {
.par_iter()
.filter_map(|(e1, e2)| {
let mut atomic_contacts: Vec<ResultEntry> = vec![];
let model_id = e1.model().serial_number();

// Clashes and VdW contacts
let vdw = find_vdw_contact(e1, e2, self.vdw_comp_factor).map(|intxn| ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -153,6 +156,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
// Hydrogen bonds and polar contacts
let hbonds =
find_hydrogen_bond(e1, e2, self.vdw_comp_factor).map(|intxn| ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -164,6 +168,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
let weak_hbonds =
find_weak_hydrogen_bond(e1, e2, self.vdw_comp_factor).map(|intxn| {
ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -174,6 +179,7 @@ impl<'a> Interactions for InteractionComplex<'a> {

// Ionic bonds
let ionic_bonds = find_ionic_bond(e1, e2).map(|intxn| ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -183,6 +189,7 @@ impl<'a> Interactions for InteractionComplex<'a> {

// Charge-charge repulsions
let charge_repulsions = find_ionic_repulsion(e1, e2).map(|intxn| ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -193,6 +200,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
// Hydrophobic contacts
let hydrophobic_contacts =
find_hydrophobic_contact(e1, e2).map(|intxn| ResultEntry {
model: model_id,
interaction: intxn,
ligand: InteractingEntity::from_hier(e1),
receptor: InteractingEntity::from_hier(e2),
Expand All @@ -219,6 +227,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
(v.center.x, v.center.y, v.center.z),
max_radius_squared,
)
.filter(|y| x.model == y.model().serial_number())
// Ring on ligand or receptor, and atom on the other side
.filter(|y| {
(self.ligand.contains(x.chain) & self.receptor.contains(y.chain().id()))
Expand Down Expand Up @@ -247,6 +256,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
// Cation-pi interactions
let dist = point_ring_dist(ring, &y.atom().pos());
let cation_pi_contacts = find_cation_pi(ring, y).map(|intxn| ResultEntry {
model: k.model,
interaction: intxn,
ligand: InteractingEntity::new(
k.chain,
Expand Down Expand Up @@ -277,6 +287,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
.flat_map(|(k1, ring1)| {
self.rings
.iter()
.filter(|(k2, _)| k1.model == k2.model)
.filter(
|(k2, _)| {
self.receptor.contains(k2.chain)
Expand All @@ -295,6 +306,7 @@ impl<'a> Interactions for InteractionComplex<'a> {
.filter_map(|(k1, ring1, k2, ring2)| {
let dist = (ring1.center - ring2.center).norm();
let pi_pi_contacts = find_pi_pi(ring1, ring2).map(|intxn| ResultEntry {
model: k1.model,
interaction: intxn,
ligand: InteractingEntity::new(
k1.chain,
Expand Down Expand Up @@ -326,26 +338,33 @@ impl<'a> Interactions for InteractionComplex<'a> {

/// Find the absolute index of a residue in each chain.
///
/// Returns two mappings, one from residue to index, and one from index to residue.
/// Returns a mapping from residue to index.
fn build_residue_index(model: &PDB) -> HashMap<ResidueId, usize> {
model
.chains()
.flat_map(|c| {
c.residues().enumerate().flat_map(move |(i, residue)| {
// All conformers in the same residue should share the same index
let (resi, insertion) = residue.id();
let insertion = insertion.unwrap_or("");
let resn = residue.name().unwrap_or("");

residue
.conformers()
.map(move |conformer| {
let res_id =
ResidueId::from_conformer(conformer, c.id(), resi, insertion, resn);
.models()
.flat_map(|m| {
let model_id = m.serial_number();
m.chains().flat_map(move |c| {
let chain_id = c.id();
c.residues().enumerate().flat_map(move |(i, residue)| {
// All conformers in the same residue should share the same index
let (resi, insertion) = residue.id();
let insertion = insertion.unwrap_or("");
let resn = residue.name().unwrap_or("");

residue.conformers().map(move |conformer| {
let res_id = ResidueId::new(
model_id,
chain_id,
resi,
insertion,
conformer.alternative_location().unwrap_or(""),
resn,
);

(res_id, i)
})
.collect::<Vec<_>>()
})
})
})
.collect::<HashMap<ResidueId, usize>>()
Expand All @@ -356,23 +375,33 @@ fn build_ring_positions(model: &PDB) -> RingPositionResult {
let mut ring_positions = HashMap::new();
let mut errors = Vec::new();

for c in model.chains() {
for r in c
.residues()
.filter(|r| ring_res.contains(r.name().unwrap()))
{
let (resi, insertion_code) = r.id();
let insertion_code = insertion_code.unwrap_or("");
let resn = r.name().unwrap_or("");
for conformer in r.conformers() {
let res_id =
ResidueId::from_conformer(conformer, c.id(), resi, insertion_code, resn);
match r.ring_center_and_normal(None) {
Some(ring) => {
ring_positions.insert(res_id, ring);
}
None => {
errors.push(format!("Failed to calculate ring position for {:?}", r));
for m in model.models() {
let model_id = m.serial_number();
for c in model.chains() {
let chain_id = c.id();
for r in c
.residues()
.filter(|r| ring_res.contains(r.name().unwrap()))
{
let (resi, insertion_code) = r.id();
let insertion_code = insertion_code.unwrap_or("");
let resn = r.name().unwrap_or("");
for conformer in r.conformers() {
let res_id = ResidueId::new(
model_id,
chain_id,
resi,
insertion_code,
conformer.alternative_location().unwrap_or(""),
resn,
);
match r.ring_center_and_normal(None) {
Some(ring) => {
ring_positions.insert(res_id, ring);
}
None => {
errors.push(format!("Failed to calculate ring position for {:?}", r));
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/interactions/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ pub struct InteractingEntity {
/// Entry passed to the results.
#[derive(Debug, Clone)]
pub struct ResultEntry {
/// Model identifier
pub model: usize,
/// Interaction type
pub interaction: Interaction,
/// Ligand residue and atom
Expand Down
Loading

0 comments on commit 286262c

Please sign in to comment.