From 13f536b27e6fa006ed0b29843e165bab26622dd0 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Sat, 21 Mar 2020 00:19:18 -0400 Subject: [PATCH 01/41] Header for RnaSuite class --- src/RnaSuite.hpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/RnaSuite.hpp diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp new file mode 100644 index 000000000..dcb4d3178 --- /dev/null +++ b/src/RnaSuite.hpp @@ -0,0 +1,75 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#if !defined(LOOS_RNASUITE_HPP) +#define LOOS_RNASUITE_HPP + +#include +#include +#include +#include + +namespace loos { + + //! Class for assigning backbone suites to an RNA + /** + * This class acts on an AtomicGroup and assigns backbone suites (as + * defined in Richardson et al. (2008) RNA 14, 465-481) to any RNA residues + * present. It also calculates the "suiteness" score that describes how + * well the residue fits into its assigned suite. + */ + class RnaSuite { + public: + + RnaSuite(const AtomicGroup &group, const double &suiteness_tolerance); + + RnaSuite(const AtomicGroup &group); + + RnaSuite(); + + //! Method to extract RNA backbone atoms from an AtomicGroup + /** + * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', + * and O3') and splits them into AtomicGroups by residue id. + */ + void extractRnaBackboneAtoms(); + + //! Method to calculate backbone dihedrals for each RNA residue + /** + * This methods calculates the six RNA backbone dihedrals (i.e. alpha, + * beta, gamma, delta, epsilon, and zeta) for each residue. + */ + void calculateBackboneDihedrals(); + + private: + vector> alpha_atoms; + vector> beta_atoms; + vector> gamma_atoms; + vector> delta_atoms; + vector> epsilon_atoms; + vector> zeta_atoms; + double suiteness_tolerance; + + }; +} + +#endif + From fabe1779f2b85e08b47864cbf12f5c617aed1718 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Tue, 24 Mar 2020 19:02:53 -0400 Subject: [PATCH 02/41] Method to extract RNA backbone atoms; skeleton of tool --- .gitignore | 1 + Tools/SConscript | 2 +- Tools/rna_suites.cpp | 132 ++++++++++++++++++ src/RnaSuite.cpp | 309 +++++++++++++++++++++++++++++++++++++++++++ src/RnaSuite.hpp | 59 ++++++--- src/SConscript | 2 + src/loos.hpp | 1 + 7 files changed, 490 insertions(+), 16 deletions(-) create mode 100644 Tools/rna_suites.cpp create mode 100644 src/RnaSuite.cpp diff --git a/.gitignore b/.gitignore index 559fdd0c3..37a12c68a 100644 --- a/.gitignore +++ b/.gitignore @@ -150,6 +150,7 @@ Tools/subsetter Tools/xy_rdf Tools/model-select Tools/dihedrals +Tools/rna_suites Packages/Clustering/** !Packages/Clustering/*.cpp !Packages/Clustering/*.hpp diff --git a/Tools/SConscript b/Tools/SConscript index d7e6fd24d..bf4a08325 100644 --- a/Tools/SConscript +++ b/Tools/SConscript @@ -36,7 +36,7 @@ apps = apps + ' traj2pdb merge-traj center-molecule contact-time perturb-structu apps = apps + ' big-svd kurskew periodic_box area_per_lipid residue-contact-map' apps = apps + ' cross-dist fcontacts serialize-selection transition_contacts fixdcd smooth-traj membrane_map packing_score' apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap' -apps = apps + ' dihedrals' +apps = apps + ' dihedrals rna_suites' list = [] diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp new file mode 100644 index 000000000..0053e44bb --- /dev/null +++ b/Tools/rna_suites.cpp @@ -0,0 +1,132 @@ +/* + rna_suites.cpp + + Assigns backbone suites to RNAs based on backbone dihedrals + + Chapin E. Cavender 2020-03 +*/ + +/* + + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008-2020 Tod D. Romo & Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; +using namespace loos; + +namespace opts = loos::OptionsFramework; +namespace po = loos::OptionsFramework::po; + +string fullHelpMessage(void) { + + string full_help_message = +"\n" +" SYNOPSIS\n" +"\n" +" Assigns backbone suites to RNAs based on backbone dihedrals\n" +"\n" +" DESCRIPTION\n" +"\n" +" This tool\n" +"\n" +" EXAMPLES\n" +"\n" +" rna_suites\n" + ; + + return full_help_message; + +} + +class ToolOptions : public opts::OptionsPackage { + +public: + + ToolOptions() {} + + void addGeneric(po::options_description& o) { + + o.add_options() + ("suiteness_cutoff,c", + po::value(&suiteness_cutoff)->default_value(0.01), + "Cutoff for the suiteness score of non-outliers") + ; + + } + + string print() const { + + ostringstream oss; + oss << boost::format( + "suiteness_cutoff=%f" + ) % suiteness_cutoff; + return (oss.str()); + + } + + double suiteness_cutoff; + +}; // ToolOptions + +// Tool functions + +int main(int argc, char *argv[]) { + + // Get command-line input + string header = invocationHeader(argc, argv); + + // Set up tool options + opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); + opts::BasicSelection *sopts = new opts::BasicSelection("!hydrogen"); + opts::TrajectoryWithFrameIndices *tropts = + new opts::TrajectoryWithFrameIndices; + ToolOptions *topts = new ToolOptions; + + opts::AggregateOptions options; + options.add(bopts).add(sopts).add(tropts).add(topts); + if (!options.parse(argc, argv)) + exit(-1); + + // Assign tool options to variables + const double suiteness_cutoff = topts->suiteness_cutoff; + + // Print command-line input + cout << "# " << header << "\n"; + + // Do some error-checking on tool options + + // Build LOOS system and generate atom selection + AtomicGroup model = tropts->model; + pTraj traj = tropts->trajectory; + vector indices = tropts->frameList(); +// AtomicGroup rna_atoms = selectAtoms(model, topts->selection); + + // Number of frames in trajectory + const uint N_frame = indices.size(); + + // Create RNASuite object from RNA atoms + RnaSuite rna_suite = RnaSuite(model, suiteness_cutoff); + + // Print dihedrals + rna_suite.printBackboneAtoms(); + +} diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp new file mode 100644 index 000000000..1906f6c2b --- /dev/null +++ b/src/RnaSuite.cpp @@ -0,0 +1,309 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; + +namespace loos { + + // Constructors + + RnaSuite::RnaSuite(const AtomicGroup &group, + const double suiteness_cutoff_) { + + extractRnaBackboneAtoms(group); + suiteness_cutoff = suiteness_cutoff_; + + } + + RnaSuite::RnaSuite(const AtomicGroup &group) { + + extractRnaBackboneAtoms(group); + suiteness_cutoff = 0.01; + + } + + RnaSuite::RnaSuite() { + suiteness_cutoff = 0.01; + } + + // Methods + + void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { + + std::vector continuous_alpha_atoms; + std::vector continuous_beta_atoms; + std::vector continuous_gamma_atoms; + std::vector continuous_delta_atoms; + std::vector continuous_epsilon_atoms; + std::vector continuous_zeta_atoms; + AtomicGroup dihedral_atoms; + AtomicGroup residue_p; + AtomicGroup residue_o5p; + AtomicGroup residue_c5p; + AtomicGroup residue_c4p; + AtomicGroup residue_c3p; + AtomicGroup residue_o3p; + AtomicGroup prev_residue_c4p; + AtomicGroup prev_residue_c3p; + AtomicGroup prev_residue_o3p; + int current_resid = -2; + + // True if this is the initial residue in a continuous group + bool first_res = true; + + // Clear vector of vectors of AtomicGroups for each backbone dihedral + alpha_atoms.clear(); + beta_atoms.clear(); + gamma_atoms.clear(); + delta_atoms.clear(); + epsilon_atoms.clear(); + zeta_atoms.clear(); + + // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into + // one AtomicGroup. Use raw string literal R"()" to avoid escaping " + AtomicGroup backbone = selectAtoms(group, + R"(name =~ "^(P|C[345]'|O[35]')$")"); + + // Split by resid and loop over residues + for (AtomicGroup residue : backbone.splitByResidue()) { + + // Select RNA backbone atoms from residue + residue_p = selectAtoms(residue, R"(name == "P")"); + residue_o5p = selectAtoms(residue, R"(name == "O5'")"); + residue_c5p = selectAtoms(residue, R"(name == "C5'")"); + residue_c4p = selectAtoms(residue, R"(name == "C4'")"); + residue_c3p = selectAtoms(residue, R"(name == "C3'")"); + residue_o3p = selectAtoms(residue, R"(name == "O3'")"); + + // If any atom besides P is missing, skip this residue and start a + // new continuous group + if (residue_o5p.size() != 1 || residue_c5p.size() != 1 || + residue_c4p.size() != 1 || residue_c3p.size() != 1 || + residue_o3p.size() != 1) { + + first_res = true; + continue; + + } + + // If the resid is not sequential, this is not a continuous group + if (residue_p.size() != 1 + || residue_p[0]->resid() != current_resid + 1) first_res = true; + + if (first_res) { + + first_res = false; + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + // Clear vectors of AtomicGroups for this continuous groups + continuous_alpha_atoms.clear(); + continuous_beta_atoms.clear(); + continuous_gamma_atoms.clear(); + continuous_delta_atoms.clear(); + continuous_epsilon_atoms.clear(); + continuous_zeta_atoms.clear(); + + // Record delta for this initial residue + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } else { + + // Record backbone dihedrals for the remainder of the suite, + // i.e. epsilon and zeta of the previous residue and alpha, + // beta, gamma, and delta of the current residue + dihedral_atoms = prev_residue_c4p; + dihedral_atoms.append(prev_residue_c3p); + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + continuous_epsilon_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_c3p; + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + continuous_zeta_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_o3p; + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + continuous_alpha_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_p; + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + continuous_beta_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_o5p; + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + continuous_gamma_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } + + // Save C4', C3', and O3' for dihedrals in the next residue + prev_residue_c4p = residue_c4p; + prev_residue_c3p = residue_c3p; + prev_residue_o3p = residue_o3p; + + // Update resid + current_resid = residue_o5p[0]->resid(); + + } // loop over residues + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + } // extractRnaBackboneAtoms() + + double RnaSuite::getSuitenessCutoff() const { + return suiteness_cutoff; + } // getSuitenessCutoff() + + void RnaSuite::printBackboneAtoms() const { + + uint continuous_counter; + uint residue_counter; + + cout << boost::format("Sizes %d %d %d %d %d %d\n") % alpha_atoms.size() + % beta_atoms.size() % gamma_atoms.size() % delta_atoms.size() + % epsilon_atoms.size() % zeta_atoms.size(); + + continuous_counter = 0; + for (std::vector continuous_atoms : alpha_atoms) { + continuous_counter++; + cout << boost::format("Alpha %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Alpha %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : beta_atoms) { + continuous_counter++; + cout << boost::format("Beta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Beta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : gamma_atoms) { + continuous_counter++; + cout << boost::format("Gamma %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Gamma %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : delta_atoms) { + continuous_counter++; + cout << boost::format("Delta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Delta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : epsilon_atoms) { + continuous_counter++; + cout << boost::format("Epsilon %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Epsilon %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : zeta_atoms) { + continuous_counter++; + cout << boost::format("Zeta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Zeta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + } // printBackboneAtoms() + + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + } // setSuitenessCutoff() + +} diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index dcb4d3178..f6cb50bf8 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -23,9 +23,9 @@ #define LOOS_RNASUITE_HPP #include -#include #include -#include + +using namespace std; namespace loos { @@ -37,36 +37,65 @@ namespace loos { * well the residue fits into its assigned suite. */ class RnaSuite { + public: - RnaSuite(const AtomicGroup &group, const double &suiteness_tolerance); + RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_); RnaSuite(const AtomicGroup &group); RnaSuite(); - //! Method to extract RNA backbone atoms from an AtomicGroup + //! Method to assign residues to backbone suites from Richardson et al. /** - * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', - * and O3') and splits them into AtomicGroups by residue id. + * This method assigns residues to one of the 46 backbone suites + * defined in Richardson et al. (2008) RNA 14, 465-481. The suite of a + * residue is defined from delta of the previous residue to delta of + * the current residue. */ - void extractRnaBackboneAtoms(); + void assignRichardsonSuites(); //! Method to calculate backbone dihedrals for each RNA residue /** - * This methods calculates the six RNA backbone dihedrals (i.e. alpha, + * This method calculates the six RNA backbone dihedrals (i.e. alpha, * beta, gamma, delta, epsilon, and zeta) for each residue. */ void calculateBackboneDihedrals(); + //! Method to extract RNA backbone atoms from an AtomicGroup + /** + * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', + * and O3') and splits them into AtomicGroups by residue id. + */ + void extractRnaBackboneAtoms(const AtomicGroup &group); + + //! Method to return the cutoff for the suiteness score of non-outliers + double getSuitenessCutoff() const; + + //! Method to print groups of backbone atoms for each dihedral + void printBackboneAtoms() const; + + //! Method to set the cutoff for the suiteness score of non-outliers + void setSuitenessCutoff(const double suiteness_cutoff_); + private: - vector> alpha_atoms; - vector> beta_atoms; - vector> gamma_atoms; - vector> delta_atoms; - vector> epsilon_atoms; - vector> zeta_atoms; - double suiteness_tolerance; + + std::vector> alpha_atoms; + std::vector> beta_atoms; + std::vector> gamma_atoms; + std::vector> delta_atoms; + std::vector> epsilon_atoms; + std::vector> zeta_atoms; + std::vector> alpha; + std::vector> beta; + std::vector> gamma; + std::vector> delta; + std::vector> epsilon; + std::vector> zeta; + std::vector suite_name_hemi5; + std::vector suite_name_hemi3; + std::vector suiteness; + double suiteness_cutoff; }; } diff --git a/src/SConscript b/src/SConscript index f3274eeab..5d1ae37cf 100644 --- a/src/SConscript +++ b/src/SConscript @@ -41,6 +41,7 @@ apps = apps + ' charmm.cpp AtomicNumberDeducer.cpp OptionsFramework.cpp revision apps = apps + ' utils_random.cpp utils_structural.cpp LineReader.cpp xtcwriter.cpp alignment.cpp MultiTraj.cpp' apps = apps + ' index_range_parser.cpp' apps = apps + ' Weights.cpp' +apps = apps + ' RnaSuite.cpp' if (env['HAS_NETCDF']): apps = apps + ' amber_netcdf.cpp' @@ -73,6 +74,7 @@ hdr = hdr + ' xdr.hpp xtc.hpp gro.hpp trr.hpp exceptions.hpp MatrixOps.hpp sorti hdr = hdr + ' Simplex.hpp charmm.hpp AtomicNumberDeducer.hpp OptionsFramework.hpp' hdr = hdr + ' utils_random.hpp utils_structural.hpp LineReader.hpp xtcwriter.hpp' hdr = hdr + ' trajwriter.hpp MultiTraj.hpp index_range_parser.hpp' +hdr = hdr + ' RnaSuite.hpp' if (env['HAS_NETCDF']): hdr = hdr + ' amber_netcdf.hpp' diff --git a/src/loos.hpp b/src/loos.hpp index df91f206e..8e00197a3 100644 --- a/src/loos.hpp +++ b/src/loos.hpp @@ -114,6 +114,7 @@ #include #include +#include #endif From 20d3a832a8e8c32fc712750f40cca31735cd3164 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Thu, 26 Mar 2020 00:36:25 -0400 Subject: [PATCH 03/41] Method to calculate backbone dihedrals --- Tools/rna_suites.cpp | 15 ++- src/RnaSuite.cpp | 258 +++++++++++++++++++++++++++++-------------- src/RnaSuite.hpp | 26 +++++ 3 files changed, 213 insertions(+), 86 deletions(-) diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index 0053e44bb..5c470c3e3 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -118,15 +118,26 @@ int main(int argc, char *argv[]) { AtomicGroup model = tropts->model; pTraj traj = tropts->trajectory; vector indices = tropts->frameList(); -// AtomicGroup rna_atoms = selectAtoms(model, topts->selection); + AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); // Number of frames in trajectory const uint N_frame = indices.size(); // Create RNASuite object from RNA atoms - RnaSuite rna_suite = RnaSuite(model, suiteness_cutoff); + RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); // Print dihedrals rna_suite.printBackboneAtoms(); + // Loop over trajectory + for (vector::iterator i = indices.begin(); i != indices.end(); i++) { + + traj->readFrame(*i); + traj->updateGroupCoords(model); + + rna_suite.calculateBackboneDihedrals(); + rna_suite.printBackboneDihedrals(); + + } + } diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 1906f6c2b..77591e2f9 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -25,7 +25,9 @@ using namespace std; namespace loos { - // Constructors + // |------------------------------------------------------------------------ + // | Constructors + // |------------------------------------------------------------------------ RnaSuite::RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_) { @@ -46,7 +48,99 @@ namespace loos { suiteness_cutoff = 0.01; } - // Methods + // |------------------------------------------------------------------------ + // | Methods + // |------------------------------------------------------------------------ + + void RnaSuite::calculateBackboneDihedrals() { + + // Clear vector of vectors of doubles for each backbone dihedral + alpha.clear(); + beta.clear(); + gamma.clear(); + delta.clear(); + epsilon.clear(); + zeta.clear(); + + for (size_t i = 0; i < N_continuous_group; i++) { + + std::vector continuous_alpha(N_residue[i]); + std::vector continuous_beta(N_residue[i]); + std::vector continuous_gamma(N_residue[i]); + std::vector continuous_delta(N_residue[i] + 1); + std::vector continuous_epsilon(N_residue[i]); + std::vector continuous_zeta(N_residue[i]); + + for (size_t j = 0; j < N_residue[i]; j++) { + + continuous_alpha[j] = Math::torsion( + alpha_atoms[i][j][0], alpha_atoms[i][j][1], + alpha_atoms[i][j][2], alpha_atoms[i][j][3]); + continuous_beta[j] = Math::torsion( + beta_atoms[i][j][0], beta_atoms[i][j][1], + beta_atoms[i][j][2], beta_atoms[i][j][3]); + continuous_gamma[j] = Math::torsion( + gamma_atoms[i][j][0], gamma_atoms[i][j][1], + gamma_atoms[i][j][2], gamma_atoms[i][j][3]); + continuous_delta[j] = Math::torsion( + delta_atoms[i][j][0], delta_atoms[i][j][1], + delta_atoms[i][j][2], delta_atoms[i][j][3]); + continuous_epsilon[j] = Math::torsion( + epsilon_atoms[i][j][0], epsilon_atoms[i][j][1], + epsilon_atoms[i][j][2], epsilon_atoms[i][j][3]); + continuous_zeta[j] = Math::torsion( + zeta_atoms[i][j][0], zeta_atoms[i][j][1], + zeta_atoms[i][j][2], zeta_atoms[i][j][3]); + + } + + continuous_delta[N_residue[i]] = Math::torsion( + delta_atoms[i][N_residue[i]][0], + delta_atoms[i][N_residue[i]][1], + delta_atoms[i][N_residue[i]][2], + delta_atoms[i][N_residue[i]][3]); + + alpha.push_back(continuous_alpha); + beta.push_back(continuous_beta); + gamma.push_back(continuous_gamma); + delta.push_back(continuous_delta); + epsilon.push_back(continuous_epsilon); + zeta.push_back(continuous_zeta); + + } + + } // calculateBackboneDihedrals() + + void RnaSuite::checkContinuousGroupSize( + const std::vector> &group_vector, + const size_t target_size, const string dihedral_name) const { + + if (group_vector.size() != target_size) { + + cout << boost::format("Error: different number of continuous " + "groups for alpha (%d) and %s (%d)\n") % target_size + % dihedral_name % group_vector.size(); + throw(LOOSError()); + + } + + } // checkContinuousGroupSize() + + void RnaSuite::checkResidueSize( + const std::vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const { + + if (residue_vector.size() != target_size) { + + cout << boost::format("Error: different number of residues in " + "continuous group %d for alpha (%d) and %s (%d)\n") + % group_index % target_size % dihedral_name + % residue_vector.size(); + + } + + } // checkResidueSize() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -201,6 +295,32 @@ namespace loos { } + // Get number of continuous groups and check that all dihedral groups + // have same size + N_continuous_group = alpha_atoms.size(); + checkContinuousGroupSize(beta_atoms, N_continuous_group, "beta"); + checkContinuousGroupSize(gamma_atoms, N_continuous_group, "gamma"); + checkContinuousGroupSize(delta_atoms, N_continuous_group, "delta"); + checkContinuousGroupSize(epsilon_atoms, N_continuous_group, "epsilon"); + checkContinuousGroupSize(zeta_atoms, N_continuous_group, "zeta"); + + // Get number of residues in each continuous group and check that these + // are consistent across backbone dihedrals. Delta should have one + // additional residue per continuous group. + size_t residue_size; + + for (size_t i = 0; i < N_continuous_group; i++) { + + residue_size = alpha_atoms[i].size(); + checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1); + checkResidueSize(gamma_atoms[i], residue_size, "gamma", i + 1); + checkResidueSize(delta_atoms[i], residue_size + 1, "delta", i + 1); + checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); + checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); + N_residue.push_back(residue_size); + + } + } // extractRnaBackboneAtoms() double RnaSuite::getSuitenessCutoff() const { @@ -209,98 +329,68 @@ namespace loos { void RnaSuite::printBackboneAtoms() const { - uint continuous_counter; - uint residue_counter; - - cout << boost::format("Sizes %d %d %d %d %d %d\n") % alpha_atoms.size() - % beta_atoms.size() % gamma_atoms.size() % delta_atoms.size() - % epsilon_atoms.size() % zeta_atoms.size(); - - continuous_counter = 0; - for (std::vector continuous_atoms : alpha_atoms) { - continuous_counter++; - cout << boost::format("Alpha %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Alpha %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + size_t i_plus; + size_t j_plus; - continuous_counter = 0; - for (std::vector continuous_atoms : beta_atoms) { - continuous_counter++; - cout << boost::format("Beta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Beta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + cout << boost::format("Number of continuous groups: %d\n") + % N_continuous_group; - continuous_counter = 0; - for (std::vector continuous_atoms : gamma_atoms) { - continuous_counter++; - cout << boost::format("Gamma %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Gamma %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + if (N_continuous_group == 0) return; - continuous_counter = 0; - for (std::vector continuous_atoms : delta_atoms) { - continuous_counter++; - cout << boost::format("Delta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Delta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + for (size_t i = 0; i < N_continuous_group; i++) { + + i_plus = i + 1; + cout << boost::format("Continuous group %d has %d residues\n") + % i_plus % N_residue[i]; + + for (size_t j = 0; j < N_residue[i]; j++) { + + j_plus = j + 1; + cout << boost::format("Delta %d %d\n") % i_plus % j_plus; + cout << delta_atoms[i][j] << endl; + cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus; + cout << epsilon_atoms[i][j] << endl; + cout << boost::format("Zeta %d %d\n") % i_plus % j_plus; + cout << zeta_atoms[i][j] << endl; + cout << boost::format("Alpha %d %d\n") % i_plus % j_plus; + cout << alpha_atoms[i][j] << endl; + cout << boost::format("Beta %d %d\n") % i_plus % j_plus; + cout << beta_atoms[i][j] << endl; + cout << boost::format("Gamma %d %d\n") % i_plus % j_plus; + cout << gamma_atoms[i][j] << endl; - continuous_counter = 0; - for (std::vector continuous_atoms : epsilon_atoms) { - continuous_counter++; - cout << boost::format("Epsilon %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Epsilon %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; } + + cout << boost::format("Delta %d %d\n") % i_plus + % (N_residue[i] + 1); + cout << delta_atoms[i][N_residue[i]] << endl; + } - continuous_counter = 0; - for (std::vector continuous_atoms : zeta_atoms) { - continuous_counter++; - cout << boost::format("Zeta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Zeta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; + } // printBackboneAtoms() + + void RnaSuite::printBackboneDihedrals() const { + + if (alpha.empty()) return; + + for (size_t i = 0; i < N_continuous_group; i++) { + + for (size_t j = 0; j < N_residue[i]; j++) { + + cout << boost::format("%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % gamma_atoms[i][j][0]->resid() % delta[i][j] + % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] + % gamma[i][j]; + } + + cout << boost::format("%4d %8.3f\n") + % delta_atoms[i][N_residue[i]][0]->resid() + % delta[i][N_residue[i]]; + } - } // printBackboneAtoms() + } // printBackboneDihedrals() void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index f6cb50bf8..29d0d829e 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -24,6 +24,7 @@ #include #include +#include using namespace std; @@ -62,6 +63,16 @@ namespace loos { */ void calculateBackboneDihedrals(); + //! Method to check the size of a vector of continuous groups + void checkContinuousGroupSize( + const std::vector> &group_vector, + const size_t target_size, const string dihedral_name) const; + + //! Method to check the size of a vector of residues + void checkResidueSize(const std::vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const; + //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -75,26 +86,41 @@ namespace loos { //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; + //! Method to print backbone dihedrals for each residue + void printBackboneDihedrals() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); private: + // Vector of continuous groups, composed of vectors of AtomicGroups + // for each residue within a continuous group std::vector> alpha_atoms; std::vector> beta_atoms; std::vector> gamma_atoms; std::vector> delta_atoms; std::vector> epsilon_atoms; std::vector> zeta_atoms; + + // Vector of vectors of backbone dihedrals std::vector> alpha; std::vector> beta; std::vector> gamma; std::vector> delta; std::vector> epsilon; std::vector> zeta; + + // Output: suite name (composed of a number-like character for the + // 5' hemi-nucleotide and a letter-like character for the + // 3' hemi-nucleotide) and suiteness score std::vector suite_name_hemi5; std::vector suite_name_hemi3; std::vector suiteness; + + // Other internal variables + size_t N_continuous_group = 0; + vector N_residue; double suiteness_cutoff; }; From 1cf262df8ee2f5b7a4d324085a004cc982374a69 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Fri, 27 Mar 2020 19:06:50 -0400 Subject: [PATCH 04/41] (Untested) Assign ddg_index, reference suites from suitename --- src/RnaSuite.cpp | 397 ++++++++++++++++++++++++++++++++++++++++------- src/RnaSuite.hpp | 77 ++++++--- 2 files changed, 397 insertions(+), 77 deletions(-) diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 77591e2f9..2bf64c694 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -30,7 +30,7 @@ namespace loos { // |------------------------------------------------------------------------ RnaSuite::RnaSuite(const AtomicGroup &group, - const double suiteness_cutoff_) { + const double suiteness_cutoff_) { extractRnaBackboneAtoms(group); suiteness_cutoff = suiteness_cutoff_; @@ -52,6 +52,86 @@ namespace loos { // | Methods // |------------------------------------------------------------------------ + void RnaSuite::assignRichardsonSuites() { + + bool outlier; + size_t N_delta = delta_min.size(); + size_t N_gamma = gamma_min.size(); + size_t N_dg = N_delta * N_gamma; + uint suite_counter = 0; + vector suite(7); + + // Index into delta, delta(j+1), gamma clusters + uint ddg_index; + + if (alpha.empty()) { + + cout << "Warning: backbone dihedrals are empty" << endl; + return; + + } + + // Initialize vectors of suite names and suiteness scores + suite_names.clear() + suiteness.clear() + suite_names.reserve(N_suite); + suiteness.reserve(N_suite); + + for (size_t i = 0; i < N_continuous_group; ++i) { + + for (size_t j = 0; j < N_residue[i]; ++j) { + + suite = {delta[i][j], epsilon[i][j], zeta[i][j], alpha[i][j], + beta[i][j], gamma[i][j], delta[i][j + 1]}; + + // Assign delta(j-1), delta, gamma index. These 3 dihedrals have + // 12 clusters that are independent of the other 4 dihedrals. + ddg_index = 0; + + // Filter on 5' delta. Values outside of this range are + // indicative of incorrect stereochemistry in the ribose. + if (filterDDG(suite[0], delta_min, delta_max, N_dg, ddg_index) + == N_delta) { + + suite_names[suite_counter] = "!d"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on 3' delta + if (filterDDG(suite[6], delta_min, delta_max, N_gamma, ddg_index) + == N_delta) { + + suite_names[suite_counter] = "!d"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on gamma + if (filterDDG(suite[5], gamma_min, gamma_max, 1, ddg_index) + == N_gamma) { + + suite_names[suite_counter] = "!g"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on epsilon. Values outside of this range are + // indicative of a misfit sugar pucker. + + // Get 4D scaled hyperellipsoid distance + + suite_counter++; + + } // loop over residues + + } // loop over continuous groups + + } // assignRichardsonSuites() + void RnaSuite::calculateBackboneDihedrals() { // Clear vector of vectors of doubles for each backbone dihedral @@ -62,43 +142,28 @@ namespace loos { epsilon.clear(); zeta.clear(); - for (size_t i = 0; i < N_continuous_group; i++) { - - std::vector continuous_alpha(N_residue[i]); - std::vector continuous_beta(N_residue[i]); - std::vector continuous_gamma(N_residue[i]); - std::vector continuous_delta(N_residue[i] + 1); - std::vector continuous_epsilon(N_residue[i]); - std::vector continuous_zeta(N_residue[i]); - - for (size_t j = 0; j < N_residue[i]; j++) { - - continuous_alpha[j] = Math::torsion( - alpha_atoms[i][j][0], alpha_atoms[i][j][1], - alpha_atoms[i][j][2], alpha_atoms[i][j][3]); - continuous_beta[j] = Math::torsion( - beta_atoms[i][j][0], beta_atoms[i][j][1], - beta_atoms[i][j][2], beta_atoms[i][j][3]); - continuous_gamma[j] = Math::torsion( - gamma_atoms[i][j][0], gamma_atoms[i][j][1], - gamma_atoms[i][j][2], gamma_atoms[i][j][3]); - continuous_delta[j] = Math::torsion( - delta_atoms[i][j][0], delta_atoms[i][j][1], - delta_atoms[i][j][2], delta_atoms[i][j][3]); - continuous_epsilon[j] = Math::torsion( - epsilon_atoms[i][j][0], epsilon_atoms[i][j][1], - epsilon_atoms[i][j][2], epsilon_atoms[i][j][3]); - continuous_zeta[j] = Math::torsion( - zeta_atoms[i][j][0], zeta_atoms[i][j][1], - zeta_atoms[i][j][2], zeta_atoms[i][j][3]); + for (size_t i = 0; i < N_continuous_group; ++i) { + + vector continuous_alpha(N_residue[i]); + vector continuous_beta(N_residue[i]); + vector continuous_gamma(N_residue[i]); + vector continuous_delta(N_residue[i] + 1); + vector continuous_epsilon(N_residue[i]); + vector continuous_zeta(N_residue[i]); + + for (size_t j = 0; j < N_residue[i]; ++j) { + + continuous_alpha[j] = calculateDihedral(alpha_atoms[i][j]); + continuous_beta[j] = calculateDihedral(beta_atoms[i][j]); + continuous_gamma[j] = calculateDihedral(gamma_atoms[i][j]); + continuous_delta[j] = calculateDihedral(delta_atoms[i][j]); + continuous_epsilon[j] = calculateDihedral(epsilon_atoms[i][j]); + continuous_zeta[j] = calculateDihedral(zeta_atoms[i][j]); } - continuous_delta[N_residue[i]] = Math::torsion( - delta_atoms[i][N_residue[i]][0], - delta_atoms[i][N_residue[i]][1], - delta_atoms[i][N_residue[i]][2], - delta_atoms[i][N_residue[i]][3]); + continuous_delta[N_residue[i]] = + calculateDihedral(delta_atoms[i][N_residue[i]]); alpha.push_back(continuous_alpha); beta.push_back(continuous_beta); @@ -111,8 +176,16 @@ namespace loos { } // calculateBackboneDihedrals() + double RnaSuite::calculateDihedral(const AtomicGroup &group) { + + double dihedral = Math::torsion(group[0], group[1], group[2], group[3]) + if (dihedral < 0.0) dihedral += 360.0; + return dihedral; + + } // calculateDihedral() + void RnaSuite::checkContinuousGroupSize( - const std::vector> &group_vector, + const vector> &group_vector, const size_t target_size, const string dihedral_name) const { if (group_vector.size() != target_size) { @@ -127,7 +200,7 @@ namespace loos { } // checkContinuousGroupSize() void RnaSuite::checkResidueSize( - const std::vector &residue_vector, + const vector &residue_vector, const size_t target_size, const string dihedral_name, const size_t group_index) const { @@ -142,14 +215,152 @@ namespace loos { } // checkResidueSize() + void defineSuites(const string suite_definition) { + + reference_suites.clear(); + + if (suite_definition == "suitename" + || suite_definition == "richardson") defineSuitesFromSuitename(); + + else { + + cout << boost::format("%s is not a recognized suite definition\n") + % suite_definition; + cout << "Must be one of: suitename" << endl; + throw(LOOSError()); + + } + + } // defineSuites() + + void defineSuitesFromFile(const string suite_definition_filename) { + + // TODO read suite definitions from file + cout << "Reading suite definitions from a file is not yet supported\n" + "Go yell at Chapin" << endl; + + } // defineSuitesFromFile() + + void defineSuitesFromSuitename() { + + // Means of dihedral angles + reference_suite_dihedrals = { + { // ddg index 0: C3' C3' plus + { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, + { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, + { 85.664, 245.014, 268.257, 303.879, 138.164, 61.950, 79.457}, + { 82.112, 190.682, 264.945, 295.967, 181.839, 51.455, 81.512}, + { 83.414, 217.400, 222.006, 302.856, 160.719, 49.097, 82.444}, + { 85.072, 216.324, 173.276, 289.320, 164.132, 45.876, 84.956}, + { 83.179, 210.347, 121.474, 288.568, 157.268, 49.347, 81.047}, + { 80.888, 218.636, 290.735, 167.447, 159.565, 51.326, 85.213}, + { 83.856, 238.750, 256.875, 69.562, 170.200, 52.800, 85.287}, + { 85.295, 244.085, 203.815, 65.880, 181.130, 54.680, 86.035}, + { 79.671, 202.471, 63.064, 68.164, 143.450, 49.664, 82.757}, + { 84.000, 195.000, 146.000, 170.000, 170.000, 52.000, 84.000} + }, { // ddg index 1: C3' C3' trans + { 80.514, 200.545, 280.510, 249.314, 82.662, 167.890, 85.507}, + { 80.223, 196.591, 291.299, 153.060, 194.379, 179.061, 83.648}, + { 81.395, 203.030, 294.445, 172.195, 138.540, 175.565, 84.470}, + { 87.417, 223.558, 80.175, 66.667, 109.150, 176.475, 83.833}, + { 86.055, 246.502, 100.392, 73.595, 213.752, 183.395, 85.483} + }, { // ddg index 2: C3' C3' minus + }, { // ddg index 3: C3' C2' plus + { 84.215, 215.014, 288.672, 300.420, 177.476, 58.307, 144.841}, + { 82.731, 220.463, 288.665, 296.983, 221.654, 54.213, 143.771}, + { 84.700, 226.400, 168.336, 292.771, 177.629, 48.629, 147.950}, + { 83.358, 206.042, 277.567, 195.700, 161.600, 50.750, 145.258}, + { 82.614, 206.440, 52.524, 163.669, 148.421, 50.176, 147.590}, + { 84.285, 236.600, 220.400, 68.300, 200.122, 53.693, 145.730}, + { 84.457, 213.286, 69.086, 75.500, 156.671, 57.486, 147.686} + }, { // ddg index 4: C3' C2' trans + { 81.200, 199.243, 288.986, 180.286, 194.743, 178.200, 147.386}, + { 82.133, 204.933, 69.483, 63.417, 115.233, 176.283, 145.733} + }, { // ddg index 5: C3' C2' minus + { 83.977, 216.508, 287.192, 297.254, 225.154, 293.738, 150.677}, + { 84.606, 232.856, 248.125, 63.269, 181.975, 295.744, 149.744}, + { 83.000, 196.900, 65.350, 60.150, 138.425, 292.550, 154.275} + }, { // ddg index 6: C2' C3' plus + {145.399, 260.339, 288.756, 288.444, 192.733, 53.097, 84.067}, + {146.275, 259.783, 169.958, 298.450, 169.583, 50.908, 83.967}, + {149.286, 223.159, 139.421, 284.559, 158.107, 47.900, 84.424}, + {148.006, 191.944, 146.231, 289.288, 150.781, 42.419, 84.956}, + {148.028, 256.922, 165.194, 204.961, 165.194, 49.383, 82.983}, + {145.337, 262.869, 79.588, 203.863, 189.688, 58.000, 84.900}, + {148.992, 270.596, 240.892, 62.225, 176.271, 53.600, 87.262}, + {149.822, 249.956, 187.678, 80.433, 198.133, 61.000, 89.378}, + {146.922, 241.222, 88.894, 59.344, 160.683, 52.333, 83.417}, + {141.900, 258.383, 286.517, 178.267, 165.217, 48.350, 84.783} + }, { // ddg index 7: C2' C3' trans + {147.782, 260.712, 290.424, 296.200, 177.282, 175.594, 86.565}, + {143.722, 227.256, 203.789, 73.856, 216.733, 194.444, 80.911}, + {148.717, 274.683, 100.283, 80.600, 248.133, 181.817, 82.600}, + {150.311, 268.383, 84.972, 63.811, 191.483, 176.644, 85.600}, + {141.633, 244.100, 66.056, 71.667, 122.167, 182.200, 83.622} + }, { // ddg index 8: C2' C3' minus + {149.070, 249.780, 111.520, 278.370, 207.780, 287.820, 86.650} + }, { // ddg index 9: C2' C2' plus + {146.383, 259.402, 291.275, 291.982, 210.048, 54.412, 147.760}, + {145.256, 244.622, 162.822, 294.159, 171.630, 45.900, 145.804}, + {147.593, 248.421, 112.086, 274.943, 164.764, 56.843, 146.264}, + {150.077, 260.246, 213.785, 71.900, 207.638, 56.715, 148.131}, + {146.415, 257.831, 89.597, 67.923, 173.051, 55.513, 147.623}, + {142.900, 236.550, 268.800, 180.783, 185.133, 54.467, 143.350} + }, { // ddg index 10: C2' C2' trans + {149.863, 247.562, 170.488, 277.938, 84.425, 176.413, 148.087}, + {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} + }, { // ddg index 11: C2' C2' minus + {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} + }; + + // Two-character suite name + reference_suite_names = { + {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", + "3g"}, + {"1e", "1c", "1f", "5j", "5n"}, + { }, + {"1b", "1[", "3b", "1z", "5z", "7p", "5p"}, + {"1t", "5q"}, + {"1o", "7r", "5r"}, + {"2a", "4a", "0a", "#a", "4g", "6g", "8d", "4d", "6d", "2g"}, + {"2h", "4n", "0i", "6n", "6j"}, + {"0k"}, + {"2[", "4b", "0b", "4p", "6p", "2z"}, + {"4s", "2u"}, + {"2o"} + }; + + // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or + // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). + reference_suite_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", + "23t", "23m", "22p", "22t", "22m"}; + + // Widths used to scale each dihedral dimension + dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; + + // Satellite widths used to scale overlapping clusters + satellite_width = {50.0, 50.0, 45.0, 60.0}; + + // Boundaries for allowed regions of delta(i-1), delta, and gamma + delta_min = { 60.0, 125.0}; + delta_max = {105.0, 165.0}; + gamma_min = { 20.0, 140.0, 260.0}; + gamma_max = { 95.0, 215.0, 335.0}; + + // Boundaries used to filter suites based on epsilon, zeta, alpha, beta + filter_min = {155.0, 25.0, 25.0, 50.0}; + filter_max = {310.0, 335.0, 335.0, 290.0}; + + } // defineSuitesFromSuitename() + void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { - std::vector continuous_alpha_atoms; - std::vector continuous_beta_atoms; - std::vector continuous_gamma_atoms; - std::vector continuous_delta_atoms; - std::vector continuous_epsilon_atoms; - std::vector continuous_zeta_atoms; + vector continuous_alpha_atoms; + vector continuous_beta_atoms; + vector continuous_gamma_atoms; + vector continuous_delta_atoms; + vector continuous_epsilon_atoms; + vector continuous_zeta_atoms; AtomicGroup dihedral_atoms; AtomicGroup residue_p; AtomicGroup residue_o5p; @@ -308,8 +519,9 @@ namespace loos { // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. size_t residue_size; + N_suite = 0; - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { residue_size = alpha_atoms[i].size(); checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1); @@ -318,11 +530,31 @@ namespace loos { checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); N_residue.push_back(residue_size); + N_suite += residue_size; } } // extractRnaBackboneAtoms() + size_t RnaSuite::filterDDG(dihedral, vector &min, + vector &max, uint increment, uint ddg_index) { + + size_t i = 0; + while (i < min.size()) { + + if (dihedral >= min[i] && dihedral <= max[i]) { + + ddg_index += i * increment; + return i; + + } + + ++i; + + } + + } // filterDDG() + double RnaSuite::getSuitenessCutoff() const { return suiteness_cutoff; } // getSuitenessCutoff() @@ -332,38 +564,45 @@ namespace loos { size_t i_plus; size_t j_plus; + cout << "\n ==== Printing backbone atoms ====\n" << endl; + + if (N_continuous_group == 0) { + + cout << "Warning: backbone atoms are empty" << endl; + return; + + } + cout << boost::format("Number of continuous groups: %d\n") % N_continuous_group; - if (N_continuous_group == 0) return; - - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { i_plus = i + 1; cout << boost::format("Continuous group %d has %d residues\n") % i_plus % N_residue[i]; - for (size_t j = 0; j < N_residue[i]; j++) { + for (size_t j = 0; j < N_residue[i]; ++j) { j_plus = j + 1; cout << boost::format("Delta %d %d\n") % i_plus % j_plus; - cout << delta_atoms[i][j] << endl; + cout << delta_atoms[i][j] << "\n"; cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus; - cout << epsilon_atoms[i][j] << endl; + cout << epsilon_atoms[i][j] << "\n"; cout << boost::format("Zeta %d %d\n") % i_plus % j_plus; - cout << zeta_atoms[i][j] << endl; + cout << zeta_atoms[i][j] << "\n"; cout << boost::format("Alpha %d %d\n") % i_plus % j_plus; - cout << alpha_atoms[i][j] << endl; + cout << alpha_atoms[i][j] << "\n"; cout << boost::format("Beta %d %d\n") % i_plus % j_plus; - cout << beta_atoms[i][j] << endl; + cout << beta_atoms[i][j] << "\n"; cout << boost::format("Gamma %d %d\n") % i_plus % j_plus; - cout << gamma_atoms[i][j] << endl; + cout << gamma_atoms[i][j] << "\n"; } cout << boost::format("Delta %d %d\n") % i_plus % (N_residue[i] + 1); - cout << delta_atoms[i][N_residue[i]] << endl; + cout << delta_atoms[i][N_residue[i]] << "\n"; } @@ -371,13 +610,21 @@ namespace loos { void RnaSuite::printBackboneDihedrals() const { - if (alpha.empty()) return; + cout << "\n ==== Printing backbone dihedrals ====\n" << endl; + + if (alpha.empty()) { + + cout << "Warning: backbone dihedrals are empty" << endl; + return; + + } - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { - for (size_t j = 0; j < N_residue[i]; j++) { + for (size_t j = 0; j < N_residue[i]; ++j) { - cout << boost::format("%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + cout << boost::format( + "%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") % gamma_atoms[i][j][0]->resid() % delta[i][j] % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j]; @@ -392,6 +639,38 @@ namespace loos { } // printBackboneDihedrals() + void RnaSuite::printReferenceSuites() const { + + cout << "\n ==== Printing reference suites ====\n" << endl; + + if (reference_suite_dihedrals.empty()) { + + cout << "Warning: reference suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < reference_suite_dihedrals.size(); ++i) { + + for (size_t j = 0; j < reference_suite_dihedrals[i].size(); ++j) { + + cout << boost::format( + "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % reference_suite_names[i][j] % reference_suite_ddg[i][j] + % reference_suite_dihedrals[i][j][0] + % reference_suite_dihedrals[i][j][1] + % reference_suite_dihedrals[i][j][2] + % reference_suite_dihedrals[i][j][3] + % reference_suite_dihedrals[i][j][4] + % reference_suite_dihedrals[i][j][5] + % reference_suite_dihedrals[i][j][6]; + + } + + } + + } // printReferenceSuites() + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; } // setSuitenessCutoff() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index 29d0d829e..da416feeb 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -63,16 +63,28 @@ namespace loos { */ void calculateBackboneDihedrals(); + //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] + double calculateDihedral(const AtomicGroup &group); + //! Method to check the size of a vector of continuous groups void checkContinuousGroupSize( - const std::vector> &group_vector, + const vector> &group_vector, const size_t target_size, const string dihedral_name) const; //! Method to check the size of a vector of residues - void checkResidueSize(const std::vector &residue_vector, + void checkResidueSize(const vector &residue_vector, const size_t target_size, const string dihedral_name, const size_t group_index) const; + //! Method to define suites used for assignment from an existing scheme + void defineSuites(const string suite_definition); + + //! Method to define suites used for assignment from a file + void defineSuitesFromFile(const string suite_definition_filename); + + //! Method to define suites used for assignment from suitename + void defineSuitesFromSuitename(); + //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -80,6 +92,10 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t RnaSuite::filterDDG(dihedral, vector &min, + vector &max, uint increment, uint ddg_index); + //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; @@ -89,41 +105,66 @@ namespace loos { //! Method to print backbone dihedrals for each residue void printBackboneDihedrals() const; + //! Method to print reference suite names and mean dihedrals + void printReferenceSuites() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); private: + // Reference suites used for assignment + vector>> reference_suite_dihedrals; + vector> reference_suite_names; + vector reference_suite_ddgs; + + // Widths used to scale each dihedral dimension + vector dihedral_width(7); + + // Satellite widths used to scale overlapping clusters + vector satellite_width(4); + + // Boundaries for allowed regions of delta(i-1), delta, and gamma + vector delta_min; + vector delta_max; + vector gamma_min; + vector gamma_max; + + // Boundaries used to filter suites based on epsilon, zeta, alpha, beta + vector filter_min(4); + vector filter_max(4); + // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group - std::vector> alpha_atoms; - std::vector> beta_atoms; - std::vector> gamma_atoms; - std::vector> delta_atoms; - std::vector> epsilon_atoms; - std::vector> zeta_atoms; + vector> alpha_atoms; + vector> beta_atoms; + vector> gamma_atoms; + vector> delta_atoms; + vector> epsilon_atoms; + vector> zeta_atoms; // Vector of vectors of backbone dihedrals - std::vector> alpha; - std::vector> beta; - std::vector> gamma; - std::vector> delta; - std::vector> epsilon; - std::vector> zeta; + vector> alpha; + vector> beta; + vector> gamma; + vector> delta; + vector> epsilon; + vector> zeta; // Output: suite name (composed of a number-like character for the // 5' hemi-nucleotide and a letter-like character for the // 3' hemi-nucleotide) and suiteness score - std::vector suite_name_hemi5; - std::vector suite_name_hemi3; - std::vector suiteness; + vector suite_names; + vector suiteness; // Other internal variables size_t N_continuous_group = 0; vector N_residue; + size_t N_suite; double suiteness_cutoff; - }; + }; // RnaSuite class + } #endif From dc5871d41a13923228ffb1316f316bc0c401b90e Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Mon, 30 Mar 2020 01:07:18 -0400 Subject: [PATCH 05/41] Assignment of delta(i-1), delta, gamma index and closest cluster --- Tools/rna_suites.cpp | 6 ++ src/RnaSuite.cpp | 242 ++++++++++++++++++++++++++++++++++--------- src/RnaSuite.hpp | 30 ++++-- 3 files changed, 217 insertions(+), 61 deletions(-) diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index 5c470c3e3..a44249621 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -126,6 +126,10 @@ int main(int argc, char *argv[]) { // Create RNASuite object from RNA atoms RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); + // Define reference suites from suitename + rna_suite.defineSuites("suitename"); + rna_suite.printReferenceSuites(); + // Print dihedrals rna_suite.printBackboneAtoms(); @@ -137,6 +141,8 @@ int main(int argc, char *argv[]) { rna_suite.calculateBackboneDihedrals(); rna_suite.printBackboneDihedrals(); + rna_suite.assignRichardsonSuites(); + rna_suite.printSuites(); } diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 2bf64c694..eebb021bc 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -52,18 +52,45 @@ namespace loos { // | Methods // |------------------------------------------------------------------------ + size_t RnaSuite::assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index) { + + size_t i = 0; + while (i < min.size()) { + + if (dihedral >= min[i] && dihedral <= max[i]) { + + ddg_index += i * increment; + return i; + + } + + ++i; + + } + + return i; + + } // assignDDGIndex() + void RnaSuite::assignRichardsonSuites() { - bool outlier; size_t N_delta = delta_min.size(); size_t N_gamma = gamma_min.size(); size_t N_dg = N_delta * N_gamma; - uint suite_counter = 0; vector suite(7); - // Index into delta, delta(j+1), gamma clusters + // Index into delta(i-1), delta, gamma clusters uint ddg_index; + // Scaled 4D hyperellipsoid distance in epsilon, zeta, alpha, beta + double dist_ezab; + + // Closest scaled 4D hyperellipsoid distance to a cluster and index of + // the associated cluster + double min_dist_ezab; + size_t min_dist_ezab_index; + if (alpha.empty()) { cout << "Warning: backbone dihedrals are empty" << endl; @@ -72,9 +99,11 @@ namespace loos { } // Initialize vectors of suite names and suiteness scores - suite_names.clear() - suiteness.clear() + suite_names.clear(); + suite_ddg.clear(); + suiteness.clear(); suite_names.reserve(N_suite); + suite_ddg.reserve(N_suite); suiteness.reserve(N_suite); for (size_t i = 0; i < N_continuous_group; ++i) { @@ -90,41 +119,113 @@ namespace loos { // Filter on 5' delta. Values outside of this range are // indicative of incorrect stereochemistry in the ribose. - if (filterDDG(suite[0], delta_min, delta_max, N_dg, ddg_index) - == N_delta) { + if (assignDDGIndex(suite[0], delta_min, delta_max, N_dg, + ddg_index) == N_delta) { - suite_names[suite_counter] = "!d"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!d"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); continue; } // Filter on 3' delta - if (filterDDG(suite[6], delta_min, delta_max, N_gamma, ddg_index) - == N_delta) { + if (assignDDGIndex(suite[6], delta_min, delta_max, N_gamma, + ddg_index) == N_delta) { - suite_names[suite_counter] = "!d"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!d"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); continue; } // Filter on gamma - if (filterDDG(suite[5], gamma_min, gamma_max, 1, ddg_index) - == N_gamma) { + if (assignDDGIndex(suite[5], gamma_min, gamma_max, 1, ddg_index) + == N_gamma) { - suite_names[suite_counter] = "!g"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!g"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // If there are no clusters associated with this ddg_index, then + // this is an outlier + if (N_reference_suite[ddg_index] == 0) { + + suite_names.push_back("!!"); + suite_ddg.push_back(reference_suite_ddgs[ddg_index]); + suiteness.push_back(0.0); continue; } // Filter on epsilon. Values outside of this range are // indicative of a misfit sugar pucker. + if (suite[1] < filter_min[0] || suite[1] > filter_max[0]) { + + suite_names.push_back("!e"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on zeta + if (suite[2] < filter_min[1] || suite[2] > filter_max[1]) { + + suite_names.push_back("!z"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on alpha + if (suite[3] < filter_min[2] || suite[3] > filter_max[2]) { + + suite_names.push_back("!a"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on beta + if (suite[4] < filter_min[3] || suite[4] > filter_max[3]) { + + suite_names.push_back("!b"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + cout << boost::format("%d %d %d") % i % j % ddg_index << endl; + // Find closest cluster in epsilon, zeta, alpha, beta + // Largest distance in 7D is 688.66 + min_dist_ezab = 999.0; + for (size_t k = 0; k < N_reference_suite[ddg_index]; ++k) { - // Get 4D scaled hyperellipsoid distance + // Get 4D scaled hyperellipsoid distance + dist_ezab = hyperellipsoidDist(suite, + reference_suite_dihedrals[ddg_index][k], 1, 4); - suite_counter++; + if (dist_ezab < min_dist_ezab) { + + min_dist_ezab = dist_ezab; + min_dist_ezab_index = k; + + } + + } + + suite_names.push_back( + reference_suite_names[ddg_index][min_dist_ezab_index]); + suite_ddg.push_back(reference_suite_ddgs[ddg_index]); + suiteness.push_back(1.0); } // loop over residues @@ -178,7 +279,7 @@ namespace loos { double RnaSuite::calculateDihedral(const AtomicGroup &group) { - double dihedral = Math::torsion(group[0], group[1], group[2], group[3]) + double dihedral = Math::torsion(group[0], group[1], group[2], group[3]); if (dihedral < 0.0) dihedral += 360.0; return dihedral; @@ -215,9 +316,11 @@ namespace loos { } // checkResidueSize() - void defineSuites(const string suite_definition) { + void RnaSuite::defineSuites(const string suite_definition) { - reference_suites.clear(); + reference_suite_dihedrals.clear(); + reference_suite_names.clear(); + reference_suite_ddgs.clear(); if (suite_definition == "suitename" || suite_definition == "richardson") defineSuitesFromSuitename(); @@ -233,7 +336,7 @@ namespace loos { } // defineSuites() - void defineSuitesFromFile(const string suite_definition_filename) { + void RnaSuite::defineSuitesFromFile(const string filename) { // TODO read suite definitions from file cout << "Reading suite definitions from a file is not yet supported\n" @@ -241,7 +344,7 @@ namespace loos { } // defineSuitesFromFile() - void defineSuitesFromSuitename() { + void RnaSuite::defineSuitesFromSuitename() { // Means of dihedral angles reference_suite_dihedrals = { @@ -311,7 +414,7 @@ namespace loos { {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} }, { // ddg index 11: C2' C2' minus {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} - }; + } }; // Two-character suite name reference_suite_names = { @@ -351,6 +454,12 @@ namespace loos { filter_min = {155.0, 25.0, 25.0, 50.0}; filter_max = {310.0, 335.0, 335.0, 290.0}; + // Get number of ddg clusters and number of suites in each ddg cluster + N_reference_ddg = reference_suite_dihedrals.size(); + N_reference_suite.clear(); + for (size_t i = 0; i < N_reference_ddg; ++i) + N_reference_suite.push_back(reference_suite_dihedrals[i].size()); + } // defineSuitesFromSuitename() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -372,6 +481,7 @@ namespace loos { AtomicGroup prev_residue_c3p; AtomicGroup prev_residue_o3p; int current_resid = -2; + size_t residue_size; // True if this is the initial residue in a continuous group bool first_res = true; @@ -518,7 +628,7 @@ namespace loos { // Get number of residues in each continuous group and check that these // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. - size_t residue_size; + N_residue.clear(); N_suite = 0; for (size_t i = 0; i < N_continuous_group; ++i) { @@ -536,28 +646,28 @@ namespace loos { } // extractRnaBackboneAtoms() - size_t RnaSuite::filterDDG(dihedral, vector &min, - vector &max, uint increment, uint ddg_index) { - - size_t i = 0; - while (i < min.size()) { + double RnaSuite::getSuitenessCutoff() const { + return suiteness_cutoff; + } // getSuitenessCutoff() - if (dihedral >= min[i] && dihedral <= max[i]) { + double RnaSuite::hyperellipsoidDist(vector &dihedrals, + vector &reference, uint first_index, uint last_index) { - ddg_index += i * increment; - return i; + double unscaled_diff; + double sum_scaled_powers = 0.0; - } + for (uint i = first_index; i <= last_index; ++i) { - ++i; + unscaled_diff = abs(dihedrals[i] - reference[i]); + // suitename program does not wrap unscaled coordinates + // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff; + sum_scaled_powers += pow(unscaled_diff / dihedral_width[i], 3.0); } - } // filterDDG() + return cbrt(sum_scaled_powers); - double RnaSuite::getSuitenessCutoff() const { - return suiteness_cutoff; - } // getSuitenessCutoff() + } // hyperellipsoidDist4() void RnaSuite::printBackboneAtoms() const { @@ -566,7 +676,7 @@ namespace loos { cout << "\n ==== Printing backbone atoms ====\n" << endl; - if (N_continuous_group == 0) { + if (alpha_atoms.empty()) { cout << "Warning: backbone atoms are empty" << endl; return; @@ -624,17 +734,14 @@ namespace loos { for (size_t j = 0; j < N_residue[i]; ++j) { cout << boost::format( - "%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % gamma_atoms[i][j][0]->resid() % delta[i][j] + "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % gamma_atoms[i][j][0]->resid() + % gamma_atoms[i][j][0]->resname() % delta[i][j] % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] - % gamma[i][j]; + % gamma[i][j] % delta[i][j + 1]; } - cout << boost::format("%4d %8.3f\n") - % delta_atoms[i][N_residue[i]][0]->resid() - % delta[i][N_residue[i]]; - } } // printBackboneDihedrals() @@ -650,13 +757,13 @@ namespace loos { } - for (size_t i = 0; i < reference_suite_dihedrals.size(); ++i) { + for (size_t i = 0; i < N_reference_ddg; ++i) { - for (size_t j = 0; j < reference_suite_dihedrals[i].size(); ++j) { + for (size_t j = 0; j < N_reference_suite[i]; ++j) { cout << boost::format( "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % reference_suite_names[i][j] % reference_suite_ddg[i][j] + % reference_suite_names[i][j] % reference_suite_ddgs[i] % reference_suite_dihedrals[i][j][0] % reference_suite_dihedrals[i][j][1] % reference_suite_dihedrals[i][j][2] @@ -671,6 +778,39 @@ namespace loos { } // printReferenceSuites() + void RnaSuite::printSuites() const { + + uint suite_counter = 0; + + cout << "\n ==== Printing suites ====\n" << endl; + + if (suite_names.empty()) { + + cout << "Warning: suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < N_continuous_group; ++i) { + + for (size_t j = 0; j < N_residue[i]; ++j) { + + cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " + "%7.3f %7.3f %7.3f %7.3f\n") % gamma_atoms[i][j][0]->resid() + % gamma_atoms[i][j][0]->resname() + % suite_names[suite_counter] % suite_ddg[suite_counter] + % suiteness[suite_counter] % delta[i][j] % epsilon[i][j] + % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j] + % delta[i][j + 1]; + + ++suite_counter; + + } + + } + + } // printReferenceSuites() + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; } // setSuitenessCutoff() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index da416feeb..8375cc040 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -47,6 +47,10 @@ namespace loos { RnaSuite(); + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index); + //! Method to assign residues to backbone suites from Richardson et al. /** * This method assigns residues to one of the 46 backbone suites @@ -80,7 +84,7 @@ namespace loos { void defineSuites(const string suite_definition); //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string suite_definition_filename); + void defineSuitesFromFile(const string filename); //! Method to define suites used for assignment from suitename void defineSuitesFromSuitename(); @@ -92,13 +96,13 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); - //! Method to assign residues to a delta(i-1), delta, gamma index - size_t RnaSuite::filterDDG(dihedral, vector &min, - vector &max, uint increment, uint ddg_index); - //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; + //! Calculate a scaled hyperellipsoid distance between two points + double hyperellipsoidDist(vector &dihedrals, + vector &reference, uint first_index, uint last_index); + //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; @@ -108,6 +112,9 @@ namespace loos { //! Method to print reference suite names and mean dihedrals void printReferenceSuites() const; + //! Method to print suite names, suiteness scores, and dihedrals + void printSuites() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); @@ -119,10 +126,10 @@ namespace loos { vector reference_suite_ddgs; // Widths used to scale each dihedral dimension - vector dihedral_width(7); + vector dihedral_width; // Satellite widths used to scale overlapping clusters - vector satellite_width(4); + vector satellite_width; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -131,8 +138,8 @@ namespace loos { vector gamma_max; // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - vector filter_min(4); - vector filter_max(4); + vector filter_min; + vector filter_max; // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group @@ -155,10 +162,13 @@ namespace loos { // 5' hemi-nucleotide and a letter-like character for the // 3' hemi-nucleotide) and suiteness score vector suite_names; + vector suite_ddg; vector suiteness; // Other internal variables - size_t N_continuous_group = 0; + size_t N_reference_ddg; + vector N_reference_suite; + size_t N_continuous_group; vector N_residue; size_t N_suite; double suiteness_cutoff; From 25c924cdb1d9fa34aade0910299ed3ab61439dda Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Wed, 8 Apr 2020 19:00:28 -0400 Subject: [PATCH 06/41] Suite assignment and SWIG interface; agrees with suitename for PDB 1S72 --- Tools/rna_suites.cpp | 33 ++- src/RnaSuite.cpp | 651 +++++++++++++++++++++++++++++-------------- src/RnaSuite.hpp | 116 +++++--- src/RnaSuite.i | 27 ++ src/loos.i | 3 +- 5 files changed, 567 insertions(+), 263 deletions(-) create mode 100644 src/RnaSuite.i diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index a44249621..46b620ac8 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -120,30 +120,39 @@ int main(int argc, char *argv[]) { vector indices = tropts->frameList(); AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); - // Number of frames in trajectory - const uint N_frame = indices.size(); - // Create RNASuite object from RNA atoms RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); - - // Define reference suites from suitename - rna_suite.defineSuites("suitename"); - rna_suite.printReferenceSuites(); + vector suite_resids = rna_suite.getSuiteResids(); + vector suite_resnames = rna_suite.getSuiteResnames(); + //rna_suite.printReferenceSuites(); // Print dihedrals - rna_suite.printBackboneAtoms(); + //rna_suite.printBackboneAtoms(); // Loop over trajectory - for (vector::iterator i = indices.begin(); i != indices.end(); i++) { + vector suite_names; + vector suite_ddgs; + vector suiteness; + uint t = 0; + for (vector::iterator i = indices.begin(); i != indices.end(); ++i) { traj->readFrame(*i); traj->updateGroupCoords(model); rna_suite.calculateBackboneDihedrals(); - rna_suite.printBackboneDihedrals(); - rna_suite.assignRichardsonSuites(); - rna_suite.printSuites(); + rna_suite.assignSuitenameSuites(); + suite_names = rna_suite.getSuiteNames(); + suite_ddgs = rna_suite.getSuiteDDGs(); + suiteness = rna_suite.getSuitenessScores(); + + for (uint j = 0; j < suite_resids.size(); ++j) + cout << boost::format("%5d %5d %3s %2s %2s %8.6f") % t + % suite_resids[j] % suite_resnames[j] % suite_names[j] + % suite_ddgs[j] % suiteness[j] << endl; + + ++t; } } + diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index eebb021bc..02429b556 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -29,23 +29,46 @@ namespace loos { // | Constructors // |------------------------------------------------------------------------ - RnaSuite::RnaSuite(const AtomicGroup &group, + RnaSuite::RnaSuite(const AtomicGroup &group, const string suite_definition, const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + defineSuites(suite_definition); extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite(const AtomicGroup &group, + const string suite_definition) { + + suiteness_cutoff = 0.01; + defineSuites(suite_definition); + extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite(const AtomicGroup &group, + const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + defineSuites("suitename"); + extractRnaBackboneAtoms(group); } RnaSuite::RnaSuite(const AtomicGroup &group) { - extractRnaBackboneAtoms(group); suiteness_cutoff = 0.01; + defineSuites("suitename"); + extractRnaBackboneAtoms(group); } RnaSuite::RnaSuite() { + suiteness_cutoff = 0.01; + defineSuites("suitename"); + } // |------------------------------------------------------------------------ @@ -73,7 +96,7 @@ namespace loos { } // assignDDGIndex() - void RnaSuite::assignRichardsonSuites() { + void RnaSuite::assignSuitenameSuites() { size_t N_delta = delta_min.size(); size_t N_gamma = gamma_min.size(); @@ -89,190 +112,292 @@ namespace loos { // Closest scaled 4D hyperellipsoid distance to a cluster and index of // the associated cluster double min_dist_ezab; - size_t min_dist_ezab_index; + size_t min_index; + + // Closest scaled 4D hyperellipsoid distance to a dominant cluster + double dom_min_dist_ezab; + size_t dom_min_index; - if (alpha.empty()) { + // Closest scaled 4D hyperellipsoid distance to a non-dominant cluster + double sat_min_dist_ezab; + size_t sat_min_index; - cout << "Warning: backbone dihedrals are empty" << endl; + // Index into vector of widths for pair of dominant-satellite clusters + size_t dom_sat_index; + + // Number of clusters this dinucleotide could belong to + uint candidates; + + // Scaled 7D hyperellipsoid distance + double dist_7; + + // Index of the assigned suite + size_t assigned_suite_index; + + // Goodness-of-fit for assigned suite + double suiteness_score; + + if (suite_dihedrals.empty()) { + + cerr << "Warning: backbone dihedrals are empty" << endl; return; } // Initialize vectors of suite names and suiteness scores suite_names.clear(); - suite_ddg.clear(); + suite_ddgs.clear(); suiteness.clear(); suite_names.reserve(N_suite); - suite_ddg.reserve(N_suite); + suite_ddgs.reserve(N_suite); suiteness.reserve(N_suite); - for (size_t i = 0; i < N_continuous_group; ++i) { + for (size_t i = 0; i < N_suite; ++i) { - for (size_t j = 0; j < N_residue[i]; ++j) { + // Assign delta(j-1), delta, gamma index. These 3 dihedrals have + // 12 clusters that are independent of the other 4 dihedrals. + ddg_index = 0; - suite = {delta[i][j], epsilon[i][j], zeta[i][j], alpha[i][j], - beta[i][j], gamma[i][j], delta[i][j + 1]}; + // Filter on 5' delta. Values outside of this range are + // indicative of incorrect stereochemistry in the ribose. + if (assignDDGIndex(suite_dihedrals[i][0], delta_min, delta_max, + N_dg, ddg_index) == N_delta) { - // Assign delta(j-1), delta, gamma index. These 3 dihedrals have - // 12 clusters that are independent of the other 4 dihedrals. - ddg_index = 0; + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on 5' delta. Values outside of this range are - // indicative of incorrect stereochemistry in the ribose. - if (assignDDGIndex(suite[0], delta_min, delta_max, N_dg, - ddg_index) == N_delta) { + } - suite_names.push_back("!d"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on 3' delta + if (assignDDGIndex(suite_dihedrals[i][6], delta_min, delta_max, + N_gamma, ddg_index) == N_delta) { - } + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on 3' delta - if (assignDDGIndex(suite[6], delta_min, delta_max, N_gamma, - ddg_index) == N_delta) { + } - suite_names.push_back("!d"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on gamma + if (assignDDGIndex(suite_dihedrals[i][5], gamma_min, gamma_max, 1, + ddg_index) == N_gamma) { - } + suite_names.push_back("!g"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on gamma - if (assignDDGIndex(suite[5], gamma_min, gamma_max, 1, ddg_index) - == N_gamma) { + } - suite_names.push_back("!g"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on epsilon. Values outside of this range are + // indicative of a misfit sugar pucker. + if (suite_dihedrals[i][1] < filter_min[0] + || suite_dihedrals[i][1] > filter_max[0]) { - } + suite_names.push_back("!e"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // If there are no clusters associated with this ddg_index, then - // this is an outlier - if (N_reference_suite[ddg_index] == 0) { + } - suite_names.push_back("!!"); - suite_ddg.push_back(reference_suite_ddgs[ddg_index]); - suiteness.push_back(0.0); - continue; + // Filter on zeta + if (suite_dihedrals[i][2] < filter_min[1] + || suite_dihedrals[i][2] > filter_max[1]) { - } + suite_names.push_back("!z"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on epsilon. Values outside of this range are - // indicative of a misfit sugar pucker. - if (suite[1] < filter_min[0] || suite[1] > filter_max[0]) { + } - suite_names.push_back("!e"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on alpha + if (suite_dihedrals[i][3] < filter_min[2] + || suite_dihedrals[i][3] > filter_max[2]) { - } + suite_names.push_back("!a"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on zeta - if (suite[2] < filter_min[1] || suite[2] > filter_max[1]) { + } - suite_names.push_back("!z"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on beta + if (suite_dihedrals[i][4] < filter_min[3] + || suite_dihedrals[i][4] > filter_max[3]) { - } + suite_names.push_back("!b"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on alpha - if (suite[3] < filter_min[2] || suite[3] > filter_max[2]) { + } + + // If there are no clusters associated with this ddg_index, then + // this is an outlier + if (N_reference_suite[ddg_index] == 0) { - suite_names.push_back("!a"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); + continue; + + } + + // Find closest cluster in epsilon, zeta, alpha, beta + // Largest distance in 7D is 688.66^3, so 10^9 should be safe + min_dist_ezab = 999999999.0; + dom_min_dist_ezab = 999999999.0; + sat_min_dist_ezab = 999999999.0; + min_index = N_reference_suite[ddg_index]; + dom_min_index = N_reference_suite[ddg_index]; + sat_min_index = N_reference_suite[ddg_index]; + candidates = 0; + + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) { + + // Get 4D scaled hyperellipsoid distance + dist_ezab = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][j], dihedral_width, 1, 4); + + // Get closest cluster + if (dist_ezab < min_dist_ezab) { + + min_dist_ezab = dist_ezab; + min_index = j; } - // Filter on beta - if (suite[4] < filter_min[3] || suite[4] > filter_max[3]) { + // Get closest non-dominant cluster + if (dominant_suites[ddg_index][j] != j + && dist_ezab < sat_min_dist_ezab) { - suite_names.push_back("!b"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + sat_min_dist_ezab = dist_ezab; + sat_min_index = j; } - cout << boost::format("%d %d %d") % i % j % ddg_index << endl; - // Find closest cluster in epsilon, zeta, alpha, beta - // Largest distance in 7D is 688.66 - min_dist_ezab = 999.0; - for (size_t k = 0; k < N_reference_suite[ddg_index]; ++k) { + // If 4D distance < 1, this reference suite is a candidate + if (dist_ezab < 1) { - // Get 4D scaled hyperellipsoid distance - dist_ezab = hyperellipsoidDist(suite, - reference_suite_dihedrals[ddg_index][k], 1, 4); + ++candidates; - if (dist_ezab < min_dist_ezab) { + // Is this candidate a dominant cluster? + if (dominant_suites[ddg_index][j] == j) { - min_dist_ezab = dist_ezab; - min_dist_ezab_index = k; + dom_min_dist_ezab = dist_ezab; + dom_min_index = j; } } + } // loop over reference suites + + // Assign membership to a reference suite + + // If there are multiple candidates, and the two canidates are + // a dominant-satellite pair, then reweight distances + if (candidates > 1 && dom_min_index != N_reference_suite[ddg_index] + && sat_min_index != N_reference_suite[ddg_index] + && dominant_suites[ddg_index][sat_min_index] == dom_min_index) { + + // Is the DNMP in between the dominant and satellite suites? + if (isBetweenDomSatPair(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + reference_dihedrals[ddg_index][sat_min_index])) { + + // Rescale distances from point to dominant and satellite + // suites by ratio of distances from suite centers to + // boundary plane and assign to closest of the two + dom_sat_index = dom_sat_pair_index[ddg_index][sat_min_index]; + if (hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][sat_min_index], + satellite_width[dom_sat_index], 1, 4) + <= hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + dominant_width[dom_sat_index], 1, 4)) + + assigned_suite_index = sat_min_index; + + else assigned_suite_index = dom_min_index; + + + } + + else { + + // Assign to closer of dominant or satellite suite + if (sat_min_dist_ezab <= dom_min_dist_ezab) + assigned_suite_index = sat_min_index; + else assigned_suite_index = dom_min_index; + + } + + } + + // If there is zero or one candidate or multiple candidates but no + // dominant-satellite pair, then assign to the closest suite + else assigned_suite_index = min_index; + + // Make a final decision on whether this is an outlier using 7D + // hyperellipsoid distance + dist_7 = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][assigned_suite_index], + dihedral_width, 0, 6); + + if (dist_7 < 1) { + suite_names.push_back( - reference_suite_names[ddg_index][min_dist_ezab_index]); - suite_ddg.push_back(reference_suite_ddgs[ddg_index]); - suiteness.push_back(1.0); + reference_names[ddg_index][assigned_suite_index]); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness_score = (1 + cos(M_PI * cbrt(dist_7))) / 2.0; + if (suiteness_score < suiteness_cutoff) + suiteness_score = suiteness_cutoff; + suiteness.push_back(suiteness_score); + + } else { - } // loop over residues + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); - } // loop over continuous groups + } + + } // loop over suites - } // assignRichardsonSuites() + } // assignSuitenameSuites() void RnaSuite::calculateBackboneDihedrals() { - // Clear vector of vectors of doubles for each backbone dihedral - alpha.clear(); - beta.clear(); - gamma.clear(); - delta.clear(); - epsilon.clear(); - zeta.clear(); + double prev_delta; + vector suite(7); + + // Clear vector of doubles for suite backbone dihedrals + suite_dihedrals.clear(); for (size_t i = 0; i < N_continuous_group; ++i) { - vector continuous_alpha(N_residue[i]); - vector continuous_beta(N_residue[i]); - vector continuous_gamma(N_residue[i]); - vector continuous_delta(N_residue[i] + 1); - vector continuous_epsilon(N_residue[i]); - vector continuous_zeta(N_residue[i]); + prev_delta = calculateDihedral(delta_atoms[i][0]); for (size_t j = 0; j < N_residue[i]; ++j) { - continuous_alpha[j] = calculateDihedral(alpha_atoms[i][j]); - continuous_beta[j] = calculateDihedral(beta_atoms[i][j]); - continuous_gamma[j] = calculateDihedral(gamma_atoms[i][j]); - continuous_delta[j] = calculateDihedral(delta_atoms[i][j]); - continuous_epsilon[j] = calculateDihedral(epsilon_atoms[i][j]); - continuous_zeta[j] = calculateDihedral(zeta_atoms[i][j]); + suite[0] = prev_delta; + suite[1] = calculateDihedral(epsilon_atoms[i][j]); + suite[2] = calculateDihedral(zeta_atoms[i][j]); + suite[3] = calculateDihedral(alpha_atoms[i][j]); + suite[4] = calculateDihedral(beta_atoms[i][j]); + suite[5] = calculateDihedral(gamma_atoms[i][j]); + prev_delta = calculateDihedral(delta_atoms[i][j + 1]); + suite[6] = prev_delta; + suite_dihedrals.push_back(suite); } - continuous_delta[N_residue[i]] = - calculateDihedral(delta_atoms[i][N_residue[i]]); - - alpha.push_back(continuous_alpha); - beta.push_back(continuous_beta); - gamma.push_back(continuous_gamma); - delta.push_back(continuous_delta); - epsilon.push_back(continuous_epsilon); - zeta.push_back(continuous_zeta); - } } // calculateBackboneDihedrals() @@ -291,7 +416,7 @@ namespace loos { if (group_vector.size() != target_size) { - cout << boost::format("Error: different number of continuous " + cerr << boost::format("Error: different number of continuous " "groups for alpha (%d) and %s (%d)\n") % target_size % dihedral_name % group_vector.size(); throw(LOOSError()); @@ -307,7 +432,7 @@ namespace loos { if (residue_vector.size() != target_size) { - cout << boost::format("Error: different number of residues in " + cerr << boost::format("Error: different number of residues in " "continuous group %d for alpha (%d) and %s (%d)\n") % group_index % target_size % dihedral_name % residue_vector.size(); @@ -318,18 +443,18 @@ namespace loos { void RnaSuite::defineSuites(const string suite_definition) { - reference_suite_dihedrals.clear(); - reference_suite_names.clear(); - reference_suite_ddgs.clear(); + reference_dihedrals.clear(); + reference_names.clear(); + reference_ddgs.clear(); if (suite_definition == "suitename" || suite_definition == "richardson") defineSuitesFromSuitename(); else { - cout << boost::format("%s is not a recognized suite definition\n") + cerr << boost::format("%s is not a recognized suite definition\n") % suite_definition; - cout << "Must be one of: suitename" << endl; + cerr << "Must be one of: suitename" << endl; throw(LOOSError()); } @@ -339,15 +464,17 @@ namespace loos { void RnaSuite::defineSuitesFromFile(const string filename) { // TODO read suite definitions from file - cout << "Reading suite definitions from a file is not yet supported\n" + cerr << "Reading suite definitions from a file is not yet supported\n" "Go yell at Chapin" << endl; } // defineSuitesFromFile() void RnaSuite::defineSuitesFromSuitename() { + size_t suite_size; + // Means of dihedral angles - reference_suite_dihedrals = { + reference_dihedrals = { { // ddg index 0: C3' C3' plus { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, @@ -416,8 +543,49 @@ namespace loos { {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} } }; + // Get number of ddg clusters and number of suites in each ddg cluster + // Dominant suites lists indices of the dominant suite associated with + // a satellite suite. A value of reference_suite_dihedrals.size() + // that this suite is neither dominant nor satellite. A dominant suite + // will point to its own index. + N_reference_ddg = reference_dihedrals.size(); + N_reference_suite.clear(); + dominant_suites.clear(); + for (size_t i = 0; i < N_reference_ddg; ++i) { + + suite_size = reference_dihedrals[i].size(); + N_reference_suite.push_back(suite_size); + vector dom_suites(suite_size, suite_size); + dominant_suites.push_back(dom_suites); + + } + + // 1m, 1L, and &a are satellites of 1a + dominant_suites[0][0] = 0; + dominant_suites[0][1] = 0; + dominant_suites[0][2] = 0; + dominant_suites[0][3] = 0; + + // 1f is a satellite of 1c + dominant_suites[1][1] = 1; + dominant_suites[1][2] = 1; + + // 1[ is a satellite of 1b + dominant_suites[3][0] = 0; + dominant_suites[3][1] = 0; + + // 4a and #a are satellites of 0a + dominant_suites[6][2] = 2; + dominant_suites[6][1] = 2; + dominant_suites[6][3] = 2; + + // 0i nd 6j are satellites of 6n + dominant_suites[7][3] = 3; + dominant_suites[7][2] = 3; + dominant_suites[7][4] = 3; + // Two-character suite name - reference_suite_names = { + reference_names = { {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", "3g"}, {"1e", "1c", "1f", "5j", "5n"}, @@ -433,16 +601,54 @@ namespace loos { {"2o"} }; - // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or - // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). - reference_suite_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", - "23t", "23m", "22p", "22t", "22m"}; - // Widths used to scale each dihedral dimension dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; - // Satellite widths used to scale overlapping clusters - satellite_width = {50.0, 50.0, 45.0, 60.0}; + // Alternative widths used to scale dominant-satellite pairs + dominant_width = { + {28.0, 60.0, 55.0, 50.0, 64.0, 35.0, 28.0}, + {28.0, 70.0, 55.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 60.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 65.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 56.0, 35.0, 28.0}, + {28.0, 50.0, 50.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 36.0, 36.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0} + }; + + satellite_width = { + {28.0, 60.0, 55.0, 50.0, 32.0, 35.0, 28.0}, + {28.0, 18.0, 55.0, 50.0, 18.0, 35.0, 28.0}, + {28.0, 20.0, 20.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 47.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 34.0, 35.0, 28.0}, + {28.0, 40.0, 40.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 26.0, 26.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + }; + + // Index into dominant-satellite pair widths + dom_sat_pair_index = { + {9, 0, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9}, + {9, 9, 3, 9, 9}, + { }, + {9, 4, 9, 9, 9, 9, 9}, + {9, 9}, + {9, 9, 9}, + {9, 5, 9, 6, 9, 9, 9, 9, 9, 9}, + {9, 9, 7, 9, 8}, + {9}, + {9, 9, 9, 9, 9, 9}, + {9, 9}, + {9} + }; + + // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or + // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). + reference_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", + "23t", "23m", "22p", "22t", "22m"}; // Boundaries for allowed regions of delta(i-1), delta, and gamma delta_min = { 60.0, 125.0}; @@ -454,12 +660,6 @@ namespace loos { filter_min = {155.0, 25.0, 25.0, 50.0}; filter_max = {310.0, 335.0, 335.0, 290.0}; - // Get number of ddg clusters and number of suites in each ddg cluster - N_reference_ddg = reference_suite_dihedrals.size(); - N_reference_suite.clear(); - for (size_t i = 0; i < N_reference_ddg; ++i) - N_reference_suite.push_back(reference_suite_dihedrals[i].size()); - } // defineSuitesFromSuitename() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -629,7 +829,8 @@ namespace loos { // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. N_residue.clear(); - N_suite = 0; + suite_resids.clear(); + suite_resnames.clear(); for (size_t i = 0; i < N_continuous_group; ++i) { @@ -640,18 +841,51 @@ namespace loos { checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); N_residue.push_back(residue_size); - N_suite += residue_size; + + for (size_t j = 0; j < residue_size; ++j) { + + suite_resids.push_back(gamma_atoms[i][j][0]->resid()); + suite_resnames.push_back(gamma_atoms[i][j][0]->resname()); + + } } + N_suite = suite_resids.size(); + } // extractRnaBackboneAtoms() + vector RnaSuite::getSuiteDDGs() const { + return suite_ddgs; + } // getSuiteDDGs() + + vector> RnaSuite::getSuiteDihedrals() const { + return suite_dihedrals; + } // getSuiteDihedrals() + + vector RnaSuite::getSuiteNames() const { + return suite_names; + } // getSuiteNames() + + vector RnaSuite::getSuiteResids() const { + return suite_resids; + } // getSuiteResids() + + vector RnaSuite::getSuiteResnames() const { + return suite_resnames; + } // getSuiteResnames() + double RnaSuite::getSuitenessCutoff() const { return suiteness_cutoff; } // getSuitenessCutoff() - double RnaSuite::hyperellipsoidDist(vector &dihedrals, - vector &reference, uint first_index, uint last_index) { + vector RnaSuite::getSuitenessScores() const { + return suiteness; + } // getSuitenessScores() + + double RnaSuite::hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index) { double unscaled_diff; double sum_scaled_powers = 0.0; @@ -661,14 +895,40 @@ namespace loos { unscaled_diff = abs(dihedrals[i] - reference[i]); // suitename program does not wrap unscaled coordinates // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff; - sum_scaled_powers += pow(unscaled_diff / dihedral_width[i], 3.0); + sum_scaled_powers += pow(unscaled_diff / width[i], 3.0); } - return cbrt(sum_scaled_powers); + return sum_scaled_powers; } // hyperellipsoidDist4() + bool RnaSuite::isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite) { + + double dom_to_sat; + double dom_dot_product = 0; + double sat_dot_product = 0; + + // If the point is in between the dominant and satellite reference + // suites, then the dot product between the vectors (point - dominant) + // and (satellite - dominant) and the dot product between the vectors + // (point - satellite) and (dominant - satellite) should both be + // positive, i.e. the cosine of the angles is positive. + for (uint i = 1; i <= 4; ++i) { + + dom_to_sat = satellite[i] - dominant[i]; + dom_dot_product += (dihedrals[i] - dominant[i]) * dom_to_sat; + // sat_dot_product += (dihedrals[i] - satellite[i]) * sat_to_dom + // sat_to_dom = -dom_to_sat + sat_dot_product += (satellite[i] - dihedrals[i]) * dom_to_sat; + + } + + return dom_dot_product > 0 && sat_dot_product > 0; + + } // isBetweenDomSatPair() + void RnaSuite::printBackboneAtoms() const { size_t i_plus; @@ -678,7 +938,7 @@ namespace loos { if (alpha_atoms.empty()) { - cout << "Warning: backbone atoms are empty" << endl; + cerr << "Warning: backbone atoms are empty" << endl; return; } @@ -722,27 +982,20 @@ namespace loos { cout << "\n ==== Printing backbone dihedrals ====\n" << endl; - if (alpha.empty()) { + if (suite_dihedrals.empty()) { - cout << "Warning: backbone dihedrals are empty" << endl; + cerr << "Warning: backbone dihedrals are empty" << endl; return; } - for (size_t i = 0; i < N_continuous_group; ++i) { - - for (size_t j = 0; j < N_residue[i]; ++j) { - - cout << boost::format( - "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % gamma_atoms[i][j][0]->resid() - % gamma_atoms[i][j][0]->resname() % delta[i][j] - % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] - % gamma[i][j] % delta[i][j + 1]; - - } - - } + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format( + "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % suite_resids[i] % suite_resnames[i] % suite_dihedrals[i][0] + % suite_dihedrals[i][1] % suite_dihedrals[i][2] + % suite_dihedrals[i][3] % suite_dihedrals[i][4] + % suite_dihedrals[i][5] % suite_dihedrals[i][6]; } // printBackboneDihedrals() @@ -750,69 +1003,55 @@ namespace loos { cout << "\n ==== Printing reference suites ====\n" << endl; - if (reference_suite_dihedrals.empty()) { + if (reference_dihedrals.empty()) { - cout << "Warning: reference suites are empty" << endl; + cerr << "Warning: reference suites are empty" << endl; return; } - for (size_t i = 0; i < N_reference_ddg; ++i) { - - for (size_t j = 0; j < N_reference_suite[i]; ++j) { - + for (size_t i = 0; i < N_reference_ddg; ++i) + for (size_t j = 0; j < N_reference_suite[i]; ++j) cout << boost::format( "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % reference_suite_names[i][j] % reference_suite_ddgs[i] - % reference_suite_dihedrals[i][j][0] - % reference_suite_dihedrals[i][j][1] - % reference_suite_dihedrals[i][j][2] - % reference_suite_dihedrals[i][j][3] - % reference_suite_dihedrals[i][j][4] - % reference_suite_dihedrals[i][j][5] - % reference_suite_dihedrals[i][j][6]; - - } - - } + % reference_names[i][j] % reference_ddgs[i] + % reference_dihedrals[i][j][0] + % reference_dihedrals[i][j][1] + % reference_dihedrals[i][j][2] + % reference_dihedrals[i][j][3] + % reference_dihedrals[i][j][4] + % reference_dihedrals[i][j][5] + % reference_dihedrals[i][j][6]; } // printReferenceSuites() void RnaSuite::printSuites() const { - uint suite_counter = 0; - cout << "\n ==== Printing suites ====\n" << endl; if (suite_names.empty()) { - cout << "Warning: suites are empty" << endl; + cerr << "Warning: suites are empty" << endl; return; } - for (size_t i = 0; i < N_continuous_group; ++i) { - - for (size_t j = 0; j < N_residue[i]; ++j) { - - cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " - "%7.3f %7.3f %7.3f %7.3f\n") % gamma_atoms[i][j][0]->resid() - % gamma_atoms[i][j][0]->resname() - % suite_names[suite_counter] % suite_ddg[suite_counter] - % suiteness[suite_counter] % delta[i][j] % epsilon[i][j] - % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j] - % delta[i][j + 1]; - - ++suite_counter; - - } - - } + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " + "%7.3f %7.3f %7.3f %7.3f\n") % suite_resids[i] + % suite_resnames[i] % suite_names[i] % suite_ddgs[i] + % suiteness[i] % suite_dihedrals[i][0] % suite_dihedrals[i][1] + % suite_dihedrals[i][2] % suite_dihedrals[i][3] + % suite_dihedrals[i][4] % suite_dihedrals[i][5] + % suite_dihedrals[i][6]; - } // printReferenceSuites() + } // printSuites() void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + } // setSuitenessCutoff() } + diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index 8375cc040..b3d68e2d1 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -41,16 +41,17 @@ namespace loos { public: + RnaSuite(const AtomicGroup &group, const string suite_defintion, + const double suiteness_cutoff_); + + RnaSuite(const AtomicGroup &group, const string suite_definition); + RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_); RnaSuite(const AtomicGroup &group); RnaSuite(); - //! Method to assign residues to a delta(i-1), delta, gamma index - size_t assignDDGIndex(double dihedral, vector &min, - vector &max, uint increment, uint &ddg_index); - //! Method to assign residues to backbone suites from Richardson et al. /** * This method assigns residues to one of the 46 backbone suites @@ -58,7 +59,7 @@ namespace loos { * residue is defined from delta of the previous residue to delta of * the current residue. */ - void assignRichardsonSuites(); + void assignSuitenameSuites(); //! Method to calculate backbone dihedrals for each RNA residue /** @@ -67,28 +68,9 @@ namespace loos { */ void calculateBackboneDihedrals(); - //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] - double calculateDihedral(const AtomicGroup &group); - - //! Method to check the size of a vector of continuous groups - void checkContinuousGroupSize( - const vector> &group_vector, - const size_t target_size, const string dihedral_name) const; - - //! Method to check the size of a vector of residues - void checkResidueSize(const vector &residue_vector, - const size_t target_size, const string dihedral_name, - const size_t group_index) const; - //! Method to define suites used for assignment from an existing scheme void defineSuites(const string suite_definition); - //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string filename); - - //! Method to define suites used for assignment from suitename - void defineSuitesFromSuitename(); - //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -96,12 +78,26 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); + //! Method to return the current indices into delta delta gamma clusters + vector getSuiteDDGs() const; + + //! Method to return the current backbone dihedrals + vector> getSuiteDihedrals() const; + + //! Method to return the current assigned suite names + vector getSuiteNames() const; + + //! Method to return the suite residue indices + vector getSuiteResids() const; + + //! Method to return the suite residue names + vector getSuiteResnames() const; + //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; - //! Calculate a scaled hyperellipsoid distance between two points - double hyperellipsoidDist(vector &dihedrals, - vector &reference, uint first_index, uint last_index); + //! Method to return the current suiteness scores + vector getSuitenessScores() const; //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; @@ -120,16 +116,53 @@ namespace loos { private: + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index); + + //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] + double calculateDihedral(const AtomicGroup &group); + + //! Method to check the size of a vector of continuous groups + void checkContinuousGroupSize( + const vector> &group_vector, + const size_t target_size, const string dihedral_name) const; + + //! Method to check the size of a vector of residues + void checkResidueSize(const vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const; + + //! Method to define suites used for assignment from a file + void defineSuitesFromFile(const string filename); + + //! Method to define suites used for assignment from suitename + void defineSuitesFromSuitename(); + + //! Method to test whether a point is in between two reference points + bool isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite); + + //! Calculate a scaled hyperellipsoid distance between two points + double hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index); + // Reference suites used for assignment - vector>> reference_suite_dihedrals; - vector> reference_suite_names; - vector reference_suite_ddgs; + vector>> reference_dihedrals; + vector> reference_names; + vector reference_ddgs; + vector> dominant_suites; // Widths used to scale each dihedral dimension vector dihedral_width; - // Satellite widths used to scale overlapping clusters - vector satellite_width; + // Alternative widths used to scale dominant-satellite pairs + vector> dominant_width; + vector> satellite_width; + + // Index into dominant-satellite pair widths + vector> dom_sat_pair_index; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -150,19 +183,14 @@ namespace loos { vector> epsilon_atoms; vector> zeta_atoms; - // Vector of vectors of backbone dihedrals - vector> alpha; - vector> beta; - vector> gamma; - vector> delta; - vector> epsilon; - vector> zeta; - - // Output: suite name (composed of a number-like character for the - // 5' hemi-nucleotide and a letter-like character for the - // 3' hemi-nucleotide) and suiteness score + // Suite residue ids, residue names, and dihedrals + vector suite_resids; + vector suite_resnames; + vector> suite_dihedrals; + + // Assigned suite names, ddg indices, and suiteness scores vector suite_names; - vector suite_ddg; + vector suite_ddgs; vector suiteness; // Other internal variables diff --git a/src/RnaSuite.i b/src/RnaSuite.i new file mode 100644 index 000000000..444b78aed --- /dev/null +++ b/src/RnaSuite.i @@ -0,0 +1,27 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +%header %{ +#include +%} + +%include "RnaSuite.hpp" + diff --git a/src/loos.i b/src/loos.i index c251fb2b2..adbec630f 100644 --- a/src/loos.i +++ b/src/loos.i @@ -52,7 +52,7 @@ namespace loos { %template(DoubleVectorMatrix) std::vector< std::vector >; %template(IntVector) std::vector; %template(UIntVector) std::vector; - +%template(StringVector) std::vector; %include "exceptions.i" @@ -81,3 +81,4 @@ namespace loos { %include "gro.i" %include "utils_structural.i" %include "Weights.i" +%include "RnaSuite.i" From d596c36fd9d4ba074f82725fdb932ff4d1e8662e Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Wed, 15 Apr 2020 04:05:22 -0400 Subject: [PATCH 07/41] Read reference suite definitions from file; remove C++11 features --- Tools/suitename_definitions.dat | 82 ++++++ src/RnaSuite.cpp | 459 ++++++++++++++++---------------- src/RnaSuite.hpp | 47 ++-- 3 files changed, 338 insertions(+), 250 deletions(-) create mode 100644 Tools/suitename_definitions.dat diff --git a/Tools/suitename_definitions.dat b/Tools/suitename_definitions.dat new file mode 100644 index 000000000..1969a283f --- /dev/null +++ b/Tools/suitename_definitions.dat @@ -0,0 +1,82 @@ +# Reference suite dihedrals +# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035 +suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093 +suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457 +suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512 +suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444 +suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956 +suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047 +suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213 +suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287 +suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035 +suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757 +suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000 +suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507 +suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648 +suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470 +suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833 +suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483 +suite 33m +suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841 +suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771 +suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950 +suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258 +suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590 +suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730 +suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686 +suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386 +suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733 +suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677 +suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744 +suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275 +suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067 +suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967 +suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424 +suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956 +suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983 +suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900 +suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262 +suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378 +suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417 +suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783 +suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565 +suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911 +suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600 +suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600 +suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622 +suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650 +suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760 +suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804 +suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264 +suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131 +suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623 +suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350 +suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087 +suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120 +suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458 +# Default widths for hyperellipsoid distance +# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +width 28.000 60.000 55.000 50.000 70.000 35.000 28.000 +# Dominant-satellite pairs. Must come after suite and width +# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth +domsat 1m 1a 4 32.000 64.000 +domsat 1L 1a 1 18.000 70.000 4 18.000 70.000 +domsat &a 1a 1 20.000 60.000 2 20.000 60.000 +domsat 1f 1c 4 47.000 65.000 +domsat 1[ 1b 4 34.000 56.000 +domsat 4a 0a 1 40.000 50.000 2 40.000 50.000 +domsat #a 0a 1 26.000 36.000 2 26.000 36.000 +domsat 0i 6n 4 60.000 60.000 +domsat 6j 6n 4 60.000 60.000 +# Filter ranges +# Min Max +delta 60.000 105.000 +delta 125.000 165.000 +epsilon 155.000 310.000 +zeta 25.000 335.000 +alpha 25.000 335.000 +beta 50.000 290.000 +gamma 20.000 95.000 +gamma 140.000 215.000 +gamma 260.000 335.000 diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 02429b556..7a3bdf006 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -194,8 +194,8 @@ namespace loos { // Filter on epsilon. Values outside of this range are // indicative of a misfit sugar pucker. - if (suite_dihedrals[i][1] < filter_min[0] - || suite_dihedrals[i][1] > filter_max[0]) { + if (suite_dihedrals[i][1] < ezab_min[0] + || suite_dihedrals[i][1] > ezab_max[0]) { suite_names.push_back("!e"); suite_ddgs.push_back("!!!"); @@ -205,8 +205,8 @@ namespace loos { } // Filter on zeta - if (suite_dihedrals[i][2] < filter_min[1] - || suite_dihedrals[i][2] > filter_max[1]) { + if (suite_dihedrals[i][2] < ezab_min[1] + || suite_dihedrals[i][2] > ezab_max[1]) { suite_names.push_back("!z"); suite_ddgs.push_back("!!!"); @@ -216,8 +216,8 @@ namespace loos { } // Filter on alpha - if (suite_dihedrals[i][3] < filter_min[2] - || suite_dihedrals[i][3] > filter_max[2]) { + if (suite_dihedrals[i][3] < ezab_min[2] + || suite_dihedrals[i][3] > ezab_max[2]) { suite_names.push_back("!a"); suite_ddgs.push_back("!!!"); @@ -227,8 +227,8 @@ namespace loos { } // Filter on beta - if (suite_dihedrals[i][4] < filter_min[3] - || suite_dihedrals[i][4] > filter_max[3]) { + if (suite_dihedrals[i][4] < ezab_min[3] + || suite_dihedrals[i][4] > ezab_max[3]) { suite_names.push_back("!b"); suite_ddgs.push_back("!!!"); @@ -411,14 +411,14 @@ namespace loos { } // calculateDihedral() void RnaSuite::checkContinuousGroupSize( - const vector> &group_vector, + const vector > &group_vector, const size_t target_size, const string dihedral_name) const { if (group_vector.size() != target_size) { cerr << boost::format("Error: different number of continuous " - "groups for alpha (%d) and %s (%d)\n") % target_size - % dihedral_name % group_vector.size(); + "groups for alpha (%d) and %s (%d)") % target_size + % dihedral_name % group_vector.size() << endl; throw(LOOSError()); } @@ -433,234 +433,240 @@ namespace loos { if (residue_vector.size() != target_size) { cerr << boost::format("Error: different number of residues in " - "continuous group %d for alpha (%d) and %s (%d)\n") - % group_index % target_size % dihedral_name - % residue_vector.size(); + "continuous group %d for alpha (%d) and %s (%d)") % group_index + % target_size % dihedral_name % residue_vector.size() << endl; + throw(LOOSError()); } } // checkResidueSize() - void RnaSuite::defineSuites(const string suite_definition) { + void RnaSuite::defineSuites(const string& suite_definition) { + // Clear vectors for reference suites reference_dihedrals.clear(); reference_names.clear(); reference_ddgs.clear(); + dihedral_width.clear(); + dominant_suites.clear(); + dom_sat_pair_index.clear(); + dominant_width.clear(); + satellite_width.clear(); + delta_min.clear(); + delta_max.clear(); + gamma_min.clear(); + gamma_max.clear(); + ezab_min = vector(4); + ezab_max = vector(4); + N_reference_ddg = 0; + N_reference_suite.clear(); - if (suite_definition == "suitename" - || suite_definition == "richardson") defineSuitesFromSuitename(); + if (suite_definition == "suitename") + defineSuitesFromFile("suitename_definitions.dat"); - else { + else defineSuitesFromFile(suite_definition); - cerr << boost::format("%s is not a recognized suite definition\n") - % suite_definition; - cerr << "Must be one of: suitename" << endl; - throw(LOOSError()); + } // defineSuites() - } + void RnaSuite::defineSuitesFromFile(const string& filename) { - } // defineSuites() + size_t ddg_index; + size_t dom_index; + size_t sat_index; + size_t position; + string field; + string line; + string record; + vector dihedrals(7); - void RnaSuite::defineSuitesFromFile(const string filename) { + // Store dominant-satellite pairs + vector domsat_ddg; + vector domsat_dom; + vector domsat_sat; + vector > domsat_dihedral; + vector > domsat_dom_width; + vector > domsat_sat_width; - // TODO read suite definitions from file - cerr << "Reading suite definitions from a file is not yet supported\n" - "Go yell at Chapin" << endl; + // Read file contents + ifstream ifs(filename.c_str()); + if (!ifs) throw(FileOpenError(filename)); - } // defineSuitesFromFile() + while (getline(ifs, line)) { - void RnaSuite::defineSuitesFromSuitename() { - - size_t suite_size; - - // Means of dihedral angles - reference_dihedrals = { - { // ddg index 0: C3' C3' plus - { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, - { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, - { 85.664, 245.014, 268.257, 303.879, 138.164, 61.950, 79.457}, - { 82.112, 190.682, 264.945, 295.967, 181.839, 51.455, 81.512}, - { 83.414, 217.400, 222.006, 302.856, 160.719, 49.097, 82.444}, - { 85.072, 216.324, 173.276, 289.320, 164.132, 45.876, 84.956}, - { 83.179, 210.347, 121.474, 288.568, 157.268, 49.347, 81.047}, - { 80.888, 218.636, 290.735, 167.447, 159.565, 51.326, 85.213}, - { 83.856, 238.750, 256.875, 69.562, 170.200, 52.800, 85.287}, - { 85.295, 244.085, 203.815, 65.880, 181.130, 54.680, 86.035}, - { 79.671, 202.471, 63.064, 68.164, 143.450, 49.664, 82.757}, - { 84.000, 195.000, 146.000, 170.000, 170.000, 52.000, 84.000} - }, { // ddg index 1: C3' C3' trans - { 80.514, 200.545, 280.510, 249.314, 82.662, 167.890, 85.507}, - { 80.223, 196.591, 291.299, 153.060, 194.379, 179.061, 83.648}, - { 81.395, 203.030, 294.445, 172.195, 138.540, 175.565, 84.470}, - { 87.417, 223.558, 80.175, 66.667, 109.150, 176.475, 83.833}, - { 86.055, 246.502, 100.392, 73.595, 213.752, 183.395, 85.483} - }, { // ddg index 2: C3' C3' minus - }, { // ddg index 3: C3' C2' plus - { 84.215, 215.014, 288.672, 300.420, 177.476, 58.307, 144.841}, - { 82.731, 220.463, 288.665, 296.983, 221.654, 54.213, 143.771}, - { 84.700, 226.400, 168.336, 292.771, 177.629, 48.629, 147.950}, - { 83.358, 206.042, 277.567, 195.700, 161.600, 50.750, 145.258}, - { 82.614, 206.440, 52.524, 163.669, 148.421, 50.176, 147.590}, - { 84.285, 236.600, 220.400, 68.300, 200.122, 53.693, 145.730}, - { 84.457, 213.286, 69.086, 75.500, 156.671, 57.486, 147.686} - }, { // ddg index 4: C3' C2' trans - { 81.200, 199.243, 288.986, 180.286, 194.743, 178.200, 147.386}, - { 82.133, 204.933, 69.483, 63.417, 115.233, 176.283, 145.733} - }, { // ddg index 5: C3' C2' minus - { 83.977, 216.508, 287.192, 297.254, 225.154, 293.738, 150.677}, - { 84.606, 232.856, 248.125, 63.269, 181.975, 295.744, 149.744}, - { 83.000, 196.900, 65.350, 60.150, 138.425, 292.550, 154.275} - }, { // ddg index 6: C2' C3' plus - {145.399, 260.339, 288.756, 288.444, 192.733, 53.097, 84.067}, - {146.275, 259.783, 169.958, 298.450, 169.583, 50.908, 83.967}, - {149.286, 223.159, 139.421, 284.559, 158.107, 47.900, 84.424}, - {148.006, 191.944, 146.231, 289.288, 150.781, 42.419, 84.956}, - {148.028, 256.922, 165.194, 204.961, 165.194, 49.383, 82.983}, - {145.337, 262.869, 79.588, 203.863, 189.688, 58.000, 84.900}, - {148.992, 270.596, 240.892, 62.225, 176.271, 53.600, 87.262}, - {149.822, 249.956, 187.678, 80.433, 198.133, 61.000, 89.378}, - {146.922, 241.222, 88.894, 59.344, 160.683, 52.333, 83.417}, - {141.900, 258.383, 286.517, 178.267, 165.217, 48.350, 84.783} - }, { // ddg index 7: C2' C3' trans - {147.782, 260.712, 290.424, 296.200, 177.282, 175.594, 86.565}, - {143.722, 227.256, 203.789, 73.856, 216.733, 194.444, 80.911}, - {148.717, 274.683, 100.283, 80.600, 248.133, 181.817, 82.600}, - {150.311, 268.383, 84.972, 63.811, 191.483, 176.644, 85.600}, - {141.633, 244.100, 66.056, 71.667, 122.167, 182.200, 83.622} - }, { // ddg index 8: C2' C3' minus - {149.070, 249.780, 111.520, 278.370, 207.780, 287.820, 86.650} - }, { // ddg index 9: C2' C2' plus - {146.383, 259.402, 291.275, 291.982, 210.048, 54.412, 147.760}, - {145.256, 244.622, 162.822, 294.159, 171.630, 45.900, 145.804}, - {147.593, 248.421, 112.086, 274.943, 164.764, 56.843, 146.264}, - {150.077, 260.246, 213.785, 71.900, 207.638, 56.715, 148.131}, - {146.415, 257.831, 89.597, 67.923, 173.051, 55.513, 147.623}, - {142.900, 236.550, 268.800, 180.783, 185.133, 54.467, 143.350} - }, { // ddg index 10: C2' C2' trans - {149.863, 247.562, 170.488, 277.938, 84.425, 176.413, 148.087}, - {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} - }, { // ddg index 11: C2' C2' minus - {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} - } }; - - // Get number of ddg clusters and number of suites in each ddg cluster - // Dominant suites lists indices of the dominant suite associated with - // a satellite suite. A value of reference_suite_dihedrals.size() - // that this suite is neither dominant nor satellite. A dominant suite - // will point to its own index. - N_reference_ddg = reference_dihedrals.size(); - N_reference_suite.clear(); - dominant_suites.clear(); - for (size_t i = 0; i < N_reference_ddg; ++i) { + record = parseStringAs(line, 0, 8); + + if (record.empty() || record[0] == '#') continue; + + else if (record == "suite") { + + // Define a reference suite + + // Get delta delta gamma cluster + field = parseStringAs( + line, 16, min((size_t) 8, line.size() - 16)); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) + if (field == reference_ddgs[i]) { + ddg_index = i; + break; + } + + // This is a new DDG cluster + if (ddg_index == N_reference_ddg) { + reference_ddgs.push_back(field); + reference_dihedrals.push_back(vector >()); + reference_names.push_back(vector()); + ++N_reference_ddg; + N_reference_suite.push_back(0); + } + + // Get suite name + field = parseStringAs(line, 8, 8); + if (field.empty()) continue; + reference_names[ddg_index].push_back(field); + + // Get reference suite dihedrals + dihedrals[0] = parseStringAs(line, 24, 8); + dihedrals[1] = parseStringAs(line, 32, 8); + dihedrals[2] = parseStringAs(line, 40, 8); + dihedrals[3] = parseStringAs(line, 48, 8); + dihedrals[4] = parseStringAs(line, 56, 8); + dihedrals[5] = parseStringAs(line, 64, 8); + dihedrals[6] = parseStringAs(line, 72, 8); + reference_dihedrals[ddg_index].push_back(dihedrals); + + ++N_reference_suite[ddg_index]; + + } else if (record == "width") { + + // Get default widths for hyperellipsoid distance + dihedral_width.push_back(parseStringAs(line, 8, 8)); + dihedral_width.push_back(parseStringAs(line, 16, 8)); + dihedral_width.push_back(parseStringAs(line, 24, 8)); + dihedral_width.push_back(parseStringAs(line, 32, 8)); + dihedral_width.push_back(parseStringAs(line, 40, 8)); + dihedral_width.push_back(parseStringAs(line, 48, 8)); + dihedral_width.push_back(parseStringAs(line, 56, 8)); + + } else if (record == "domsat") { + + // Define a dominant-satellite pair + + // Get index of dominant suite + field = parseStringAs(line, 16, 8); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) { + for (size_t j = 0; j < N_reference_suite[i]; ++j) + if (field == reference_names[i][j]) { + ddg_index = i; + dom_index = j; + break; + } + if (ddg_index != N_reference_ddg) break; + } + + if (ddg_index == N_reference_ddg) { + cerr << boost::format( + "Warning: dominant suite %s was not defined in file %s") + % field % filename << endl; + continue; + } + + // Get index of satellite suite + field = parseStringAs(line, 8, 8); + sat_index = N_reference_suite[ddg_index]; + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) + if (field == reference_names[ddg_index][j]) { + sat_index = j; + break; + } + + if (sat_index == N_reference_suite[ddg_index]) { + cerr << boost::format( + "Warning: satellite suite %s was not defined in file %s") + % field % filename << endl; + continue; + } + + domsat_ddg.push_back(ddg_index); + domsat_dom.push_back(dom_index); + domsat_sat.push_back(sat_index); + + // Loop over dihedrals with alternate widths + vector dihedral_indices; + vector dom_width; + vector sat_width; + position = 24; + while (position < line.size()) { + dihedral_indices.push_back( + parseStringAs(line, position, 8)); + sat_width.push_back( + parseStringAs(line, position + 8, 8)); + dom_width.push_back( + parseStringAs(line, position + 16, 8)); + position += 24; + } + domsat_dihedral.push_back(dihedral_indices); + domsat_dom_width.push_back(dom_width); + domsat_sat_width.push_back(sat_width); + + } else if (record == "delta") { + + delta_min.push_back(parseStringAs(line, 8, 8)); + delta_max.push_back(parseStringAs(line, 16, 8)); + + } else if (record == "epsilon") { + + ezab_min[0] = parseStringAs(line, 8, 8); + ezab_max[0] = parseStringAs(line, 16, 8); + + } else if (record == "zeta") { + + ezab_min[1] = parseStringAs(line, 8, 8); + ezab_max[1] = parseStringAs(line, 16, 8); + + } else if (record == "alpha") { - suite_size = reference_dihedrals[i].size(); - N_reference_suite.push_back(suite_size); - vector dom_suites(suite_size, suite_size); - dominant_suites.push_back(dom_suites); + ezab_min[2] = parseStringAs(line, 8, 8); + ezab_max[2] = parseStringAs(line, 16, 8); + } else if (record == "beta") { + + ezab_min[3] = parseStringAs(line, 8, 8); + ezab_max[3] = parseStringAs(line, 16, 8); + + } else if (record == "gamma") { + + gamma_min.push_back(parseStringAs(line, 8, 8)); + gamma_max.push_back(parseStringAs(line, 16, 8)); + + } else cerr << boost::format( + "Warning: Unrecognized record %s in suite definition from %s") + % record % filename << endl; + + } // Loop over lines in file + + // Construct vectors for dominant-satellite pairs + for (size_t i = 0; i < N_reference_ddg; ++i) { + dominant_suites.push_back( + vector(N_reference_suite[i], N_reference_suite[i])); + dom_sat_pair_index.push_back( + vector(N_reference_suite[i], domsat_dihedral.size())); } - // 1m, 1L, and &a are satellites of 1a - dominant_suites[0][0] = 0; - dominant_suites[0][1] = 0; - dominant_suites[0][2] = 0; - dominant_suites[0][3] = 0; - - // 1f is a satellite of 1c - dominant_suites[1][1] = 1; - dominant_suites[1][2] = 1; - - // 1[ is a satellite of 1b - dominant_suites[3][0] = 0; - dominant_suites[3][1] = 0; - - // 4a and #a are satellites of 0a - dominant_suites[6][2] = 2; - dominant_suites[6][1] = 2; - dominant_suites[6][3] = 2; - - // 0i nd 6j are satellites of 6n - dominant_suites[7][3] = 3; - dominant_suites[7][2] = 3; - dominant_suites[7][4] = 3; - - // Two-character suite name - reference_names = { - {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", - "3g"}, - {"1e", "1c", "1f", "5j", "5n"}, - { }, - {"1b", "1[", "3b", "1z", "5z", "7p", "5p"}, - {"1t", "5q"}, - {"1o", "7r", "5r"}, - {"2a", "4a", "0a", "#a", "4g", "6g", "8d", "4d", "6d", "2g"}, - {"2h", "4n", "0i", "6n", "6j"}, - {"0k"}, - {"2[", "4b", "0b", "4p", "6p", "2z"}, - {"4s", "2u"}, - {"2o"} - }; - - // Widths used to scale each dihedral dimension - dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; - - // Alternative widths used to scale dominant-satellite pairs - dominant_width = { - {28.0, 60.0, 55.0, 50.0, 64.0, 35.0, 28.0}, - {28.0, 70.0, 55.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 60.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 65.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 56.0, 35.0, 28.0}, - {28.0, 50.0, 50.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 36.0, 36.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0} - }; - - satellite_width = { - {28.0, 60.0, 55.0, 50.0, 32.0, 35.0, 28.0}, - {28.0, 18.0, 55.0, 50.0, 18.0, 35.0, 28.0}, - {28.0, 20.0, 20.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 47.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 34.0, 35.0, 28.0}, - {28.0, 40.0, 40.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 26.0, 26.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - }; - - // Index into dominant-satellite pair widths - dom_sat_pair_index = { - {9, 0, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9}, - {9, 9, 3, 9, 9}, - { }, - {9, 4, 9, 9, 9, 9, 9}, - {9, 9}, - {9, 9, 9}, - {9, 5, 9, 6, 9, 9, 9, 9, 9, 9}, - {9, 9, 7, 9, 8}, - {9}, - {9, 9, 9, 9, 9, 9}, - {9, 9}, - {9} - }; - - // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or - // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). - reference_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", - "23t", "23m", "22p", "22t", "22m"}; - - // Boundaries for allowed regions of delta(i-1), delta, and gamma - delta_min = { 60.0, 125.0}; - delta_max = {105.0, 165.0}; - gamma_min = { 20.0, 140.0, 260.0}; - gamma_max = { 95.0, 215.0, 335.0}; - - // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - filter_min = {155.0, 25.0, 25.0, 50.0}; - filter_max = {310.0, 335.0, 335.0, 290.0}; - - } // defineSuitesFromSuitename() + for (size_t i = 0; i < domsat_dihedral.size(); ++i) { + dominant_suites[domsat_ddg[i]][domsat_dom[i]] = domsat_dom[i]; + dominant_suites[domsat_ddg[i]][domsat_sat[i]] = domsat_dom[i]; + dom_sat_pair_index[domsat_ddg[i]][domsat_sat[i]] = i; + dominant_width.push_back(dihedral_width); + satellite_width.push_back(dihedral_width); + for (size_t j = 0; j < domsat_dihedral[i].size(); ++j) { + dominant_width[i][domsat_dihedral[i][j]] = domsat_dom_width[i][j]; + satellite_width[i][domsat_dihedral[i][j]] = domsat_sat_width[i][j]; + } + } + + } // defineSuitesFromFile() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -697,18 +703,19 @@ namespace loos { // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into // one AtomicGroup. Use raw string literal R"()" to avoid escaping " AtomicGroup backbone = selectAtoms(group, - R"(name =~ "^(P|C[345]'|O[35]')$")"); + "(name =~ \"^(P|C[345]'|O[35]')$\")"); // Split by resid and loop over residues - for (AtomicGroup residue : backbone.splitByResidue()) { + vector backbone_residues = backbone.splitByResidue(); + for (size_t i = 0; i < backbone_residues.size(); ++i) { // Select RNA backbone atoms from residue - residue_p = selectAtoms(residue, R"(name == "P")"); - residue_o5p = selectAtoms(residue, R"(name == "O5'")"); - residue_c5p = selectAtoms(residue, R"(name == "C5'")"); - residue_c4p = selectAtoms(residue, R"(name == "C4'")"); - residue_c3p = selectAtoms(residue, R"(name == "C3'")"); - residue_o3p = selectAtoms(residue, R"(name == "O3'")"); + residue_p = selectAtoms(backbone_residues[i], "(name == \"P\")"); + residue_o5p = selectAtoms(backbone_residues[i], "(name == \"O5'\")"); + residue_c5p = selectAtoms(backbone_residues[i], "(name == \"C5'\")"); + residue_c4p = selectAtoms(backbone_residues[i], "(name == \"C4'\")"); + residue_c3p = selectAtoms(backbone_residues[i], "(name == \"C3'\")"); + residue_o3p = selectAtoms(backbone_residues[i], "(name == \"O3'\")"); // If any atom besides P is missing, skip this residue and start a // new continuous group @@ -859,7 +866,7 @@ namespace loos { return suite_ddgs; } // getSuiteDDGs() - vector> RnaSuite::getSuiteDihedrals() const { + vector > RnaSuite::getSuiteDihedrals() const { return suite_dihedrals; } // getSuiteDihedrals() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index b3d68e2d1..05ad1b7c5 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -69,7 +69,7 @@ namespace loos { void calculateBackboneDihedrals(); //! Method to define suites used for assignment from an existing scheme - void defineSuites(const string suite_definition); + void defineSuites(const string& suite_definition); //! Method to extract RNA backbone atoms from an AtomicGroup /** @@ -82,7 +82,7 @@ namespace loos { vector getSuiteDDGs() const; //! Method to return the current backbone dihedrals - vector> getSuiteDihedrals() const; + vector > getSuiteDihedrals() const; //! Method to return the current assigned suite names vector getSuiteNames() const; @@ -125,7 +125,7 @@ namespace loos { //! Method to check the size of a vector of continuous groups void checkContinuousGroupSize( - const vector> &group_vector, + const vector > &group_vector, const size_t target_size, const string dihedral_name) const; //! Method to check the size of a vector of residues @@ -134,10 +134,7 @@ namespace loos { const size_t group_index) const; //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string filename); - - //! Method to define suites used for assignment from suitename - void defineSuitesFromSuitename(); + void defineSuitesFromFile(const string& filename); //! Method to test whether a point is in between two reference points bool isBetweenDomSatPair(const vector &dihedrals, @@ -149,20 +146,22 @@ namespace loos { uint first_index, uint last_index); // Reference suites used for assignment - vector>> reference_dihedrals; - vector> reference_names; + vector >> reference_dihedrals; + vector > reference_names; vector reference_ddgs; - vector> dominant_suites; // Widths used to scale each dihedral dimension vector dihedral_width; - // Alternative widths used to scale dominant-satellite pairs - vector> dominant_width; - vector> satellite_width; + // Indices of dominant-satellite pairs + vector > dominant_suites; // Index into dominant-satellite pair widths - vector> dom_sat_pair_index; + vector > dom_sat_pair_index; + + // Alternative widths used to scale dominant-satellite pairs + vector > dominant_width; + vector > satellite_width; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -170,23 +169,23 @@ namespace loos { vector gamma_min; vector gamma_max; - // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - vector filter_min; - vector filter_max; + // Boundaries for allowed regions of epsilon, zeta, alpha, beta + vector ezab_min; + vector ezab_max; // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group - vector> alpha_atoms; - vector> beta_atoms; - vector> gamma_atoms; - vector> delta_atoms; - vector> epsilon_atoms; - vector> zeta_atoms; + vector > alpha_atoms; + vector > beta_atoms; + vector > gamma_atoms; + vector > delta_atoms; + vector > epsilon_atoms; + vector > zeta_atoms; // Suite residue ids, residue names, and dihedrals vector suite_resids; vector suite_resnames; - vector> suite_dihedrals; + vector > suite_dihedrals; // Assigned suite names, ddg indices, and suiteness scores vector suite_names; From 976cf0c11f4f17c80089c802001dd845f0c10ce1 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Tue, 14 Jan 2020 11:10:13 -0500 Subject: [PATCH 08/41] Wrote first pass of dihedral tool. While loop is not finished. Need to add 'tag' processing for each dihedral. --- Tools/dihedrals.cpp | 224 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 Tools/dihedrals.cpp diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp new file mode 100644 index 000000000..b9f4d3edb --- /dev/null +++ b/Tools/dihedrals.cpp @@ -0,0 +1,224 @@ +/* + dihedrals + + Computes the dihedral angle between each set of four atoms specified. +*/ + +/* + + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008-2020 Tod D. Romo & Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace loos; +namespace opts = loos::OptionsFramework; +namespace po = loos::OptionsFramework::po; + +typedef vector vGroup; + +// @cond TOOL_INTERNAL + +string fullHelpMessage() { + string s = "XXX"; + return s; +} + +// these determine where the string containing the dihedral selections is split +string quartet_delim = "|"; +string atom_delim = ","; + +// C++ 11 regex split +// https://stackoverflow.com/questions/9435385/split-a-string-using-c11 +vector split(const string &input, const string ®ex) { + // passing -1 as the submatch index parameter performs splitting + regex re(regex); + sregex_token_iterator first{input.begin(), input.end(), re, -1}, last; + return {first, last}; +} + +// split of split strings +vector> deep_split(const string &input, + const string &outer_regex, + const string &inner_regex) { + vector> vector_of_frags; + vector outer_frags = split(input, outer_regex); + // split the outer fragments and push these vectors into return object + for (string &frag : outer_frags) { + vector_of_frags.push_back(move(split(frag, inner_regex))); + } + return vector_of_frags; +} +class ToolOptions : public opts::OptionsPackage { +public: + ToolOptions() : dihedral_sel_strings(""), pdb(""), dihedral_sels{} {}; + + void addGeneric(po::options_description &o) { + o.add_options()("dihedral-sel-strings,D", + po::value(&dihedral_sel_strings)->default_value(""), + "Ordered quartets of selection strings; each quartet is " + "delimited by '" + + quartet_delim + ", and each string within by '" + + atom_delim + + "'.")("pdb", po::value(&pdb)->default_value(""), + "Prefix to write PDBs for each dihedral selected " + "from frame 1 of provided multi-traj."); + } + + string print() const { + ostringstream oss; + oss << boost::format("dihedral-sel-strings=%s,pdb=%s") % + dihedral_sel_strings % pdb; + } + + bool postConditions(po::variables_map &map) { + dihedral_sels = deep_split(dihedral_sel_strings, quartet_delim, atom_delim); + for (auto d : dihedral_sels) { + if (d.size() != 4) { + throw(LOOSError("The following selection did not split to a quartet of " + "selections:\n")); + for (auto s : d) + cout cout << s << "\t"; + cout << "\n"; + return false; + } + } + return true; + } + + vector> dihedral_sels; + string dihedral_sel_strings; + string pdb; +}; + +// takes an atomic group for scope, and a vector of vectors of sel-strings. +// Corrects order of discovery of each dihedral, and returns atomic group of +// dihedrals. +vector sels_to_dihedralAGs(const vector> &dihedral_sels, + const AtomicGroup &scope) { + // append to this for return later. + vGroup dihedralAGs; + for (auto dSels : dihedral_sels) { + // first get a set of AGs that have all the atoms of the dihedral in them + // They are likely to be in the order of the selection matched first + // i.e. all the matches for selection 1, then all for 2, and so forth. + AtomicGroup outoforder_dihedralType; + for (auto sel : dSels) { + outoforder_dihedralType += selectAtoms(scope, sel); + } + // This separates all non-connected atoms into separate atomic groups. + vGroup dihedralTypeVector = outoforder_dihedralType.splitByMolecule(); + // reorder them here to match that provided by user + // it may turn out this is unnecessary, + // but the return order of selectAtoms calls is not specified. + for (auto oo_D : dihedralTypeVector) { + AtomicGroup reordered; + for (auto sel : dSels) { + reordered += SelectAtoms(oo_D, sel); + } + oo_D = move(reordered); + } + dihedralAGs.push_back(move(dihedralTypeVector)); + } + return dihedralAGs; +} + +int main(int argc, char *argv[]) { + string header = invocationHeader(argc, argv); + + opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); + opts::BasicSelection *sopts = new opts::BasicSelection("all"); + opts::MultiTrajOptions *mtopts = new opts::MultiTrajOptions; + ToolOptions *topts = new ToolOptions; + + opts::AggregateOptions options; + options.add(bopts).add(sopts).add(mtopts).add(topts); + if (!options.parse(argc, argv)) + exit(-1); + + // set up system for looping. Load coords from frame 0 into scope. + AtomicGroup model = mtopts->model; + AtomicGroup scope = selectAtoms(model, sopts->selection); + pTraj traj = mtopts->trajectory; + traj->updateGroupCoords(model); + + // figure out what dihedrals to track + vector dihedrals = sels_to_dihedralAGs(topts->dihedral_sels, scope); + + // if verbosity, and no pdbs were requested, then print each atomic group + // found for each atom in each dihedral to stderr. + if (bopts->verbosity > 0) { + if (topts->pdb.empty()) { + cerr << header; + cerr << "# Following are the tab-delimited dihedral class selection " + "strings and the atomic groups each produced:\n"; + for (uint i = 0; i < topts->dihedral_sels.size(); i++) { + cerr << i << "\t"; + for (auto sel : topts->dihedral_sels.at(i)) { + cerr << sel << "\t"; + } + cerr << "\n["; + for (auto ag : dihedrals.at(i)) { + cerr << ag << ","; + } + cerr << "\b]\n"; + } + } + } + + // if PDB name string was given, write PDBs to indexed files by that prefix + if (!topts->pdb.empty()) { + for (uint i = 0; i < dihedrals.size(); i++) { + for (uint j = 0; j < dihedrals.at(i).size(); i++) { + PDB pdb = PDB::fromAtomicGroup(dihedrals[i, j]); + pdb.remarks().add(to_string(j) + " from: " + dihedral_sels.at(i)); + ofstream pdbFile; + pdbFile.open(topts->pdb + "_" + to_string(i) + "_" + to_string(j) + + ".pdb"); + pdbFile << pdb; + pdbFile.close(); + } + } + PDB scopePDB = PDB::fromAtomicGroup(scope); + ofstream scopeFile; + scopeFile.open(topts->pdb + "_scope.pdb"); + scope.remarks().add(header); + scopeFile << scopePDB; + } + + cout << header; + cout << "#\t"; + for (auto selset : dihedral_sels){ + for (auto sel : selset){ + cout << sel << ","; + } + } + // Trajectory Loop here. + while (traj->readFrame()) { + traj->updateGroupCoords(model); + for (auto ) + } +} \ No newline at end of file From 990448d06f9cb7f3916db9bd94a5198c03768945 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Tue, 14 Jan 2020 23:23:01 -0500 Subject: [PATCH 09/41] added 'tags' flag and parsing. Need to add tags to reporting. --- Tools/dihedrals.cpp | 46 ++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index b9f4d3edb..50cf1135d 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -48,8 +48,8 @@ string fullHelpMessage() { } // these determine where the string containing the dihedral selections is split -string quartet_delim = "|"; -string atom_delim = ","; +const string quartet_delim = "|"; +const string atom_delim = ","; // C++ 11 regex split // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 @@ -74,24 +74,26 @@ vector> deep_split(const string &input, } class ToolOptions : public opts::OptionsPackage { public: - ToolOptions() : dihedral_sel_strings(""), pdb(""), dihedral_sels{} {}; - + ToolOptions() + : dihedral_sel_strings(""), pdb(""), tags(""), dihedral_sels{} {}; + // clang-format off void addGeneric(po::options_description &o) { - o.add_options()("dihedral-sel-strings,D", - po::value(&dihedral_sel_strings)->default_value(""), - "Ordered quartets of selection strings; each quartet is " - "delimited by '" + - quartet_delim + ", and each string within by '" + - atom_delim + - "'.")("pdb", po::value(&pdb)->default_value(""), - "Prefix to write PDBs for each dihedral selected " - "from frame 1 of provided multi-traj."); + o.add_options() + ("dihedral-sel-strings,D", po::value(&dihedral_sel_strings)->default_value(""), + "Ordered quartets of selection strings; each quartet is delimited by '" + + quartet_delim + "', and each string within by '" + + atom_delim + "'.") + ("pdb", po::value(&pdb)->default_value(""), + "Prefix to write PDBs for each dihedral selected from frame 1 of provided multi-traj.") + ("tags,T", po::value(&tags)->default_value(""), + "String of tags for each class of dihedral, separated by a '" + atom_delim + "'."); } + // clang-format on string print() const { ostringstream oss; - oss << boost::format("dihedral-sel-strings=%s,pdb=%s") % - dihedral_sel_strings % pdb; + oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s") % + dihedral_sel_strings % pdb % tags; } bool postConditions(po::variables_map &map) { @@ -112,6 +114,7 @@ class ToolOptions : public opts::OptionsPackage { vector> dihedral_sels; string dihedral_sel_strings; string pdb; + string tags; }; // takes an atomic group for scope, and a vector of vectors of sel-strings. @@ -168,6 +171,11 @@ int main(int argc, char *argv[]) { // figure out what dihedrals to track vector dihedrals = sels_to_dihedralAGs(topts->dihedral_sels, scope); + // if tags provided, split those into vector. + if (topts->tags) + vector vtags = split(topts->tags, atom_delim) + + // if verbosity, and no pdbs were requested, then print each atomic group // found for each atom in each dihedral to stderr. if (bopts->verbosity > 0) { @@ -208,17 +216,17 @@ int main(int argc, char *argv[]) { scope.remarks().add(header); scopeFile << scopePDB; } - + cout << header; cout << "#\t"; - for (auto selset : dihedral_sels){ - for (auto sel : selset){ + for (auto selset : dihedral_sels) { + for (auto sel : selset) { cout << sel << ","; } } // Trajectory Loop here. while (traj->readFrame()) { traj->updateGroupCoords(model); - for (auto ) + for (auto) } } \ No newline at end of file From 95b054b54101b2d78cbb009e1984968281793610 Mon Sep 17 00:00:00 2001 From: louis Date: Wed, 15 Jan 2020 18:17:23 -0500 Subject: [PATCH 10/41] Fixed type statement errors for dihedralAGs in function sels_to_dihedralAGs, and fixed variable substitution in options_description args. --- Tools/dihedrals.cpp | 63 ++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 50cf1135d..264808a62 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -38,8 +38,6 @@ using namespace loos; namespace opts = loos::OptionsFramework; namespace po = loos::OptionsFramework::po; -typedef vector vGroup; - // @cond TOOL_INTERNAL string fullHelpMessage() { @@ -53,9 +51,9 @@ const string atom_delim = ","; // C++ 11 regex split // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 -vector split(const string &input, const string ®ex) { +vector split(const string &input, const string ®ular_expression) { // passing -1 as the submatch index parameter performs splitting - regex re(regex); + regex re(regular_expression); sregex_token_iterator first{input.begin(), input.end(), re, -1}, last; return {first, last}; } @@ -77,16 +75,17 @@ class ToolOptions : public opts::OptionsPackage { ToolOptions() : dihedral_sel_strings(""), pdb(""), tags(""), dihedral_sels{} {}; // clang-format off - void addGeneric(po::options_description &o) { + void addGeneric(po::options_description& o) { o.add_options() ("dihedral-sel-strings,D", po::value(&dihedral_sel_strings)->default_value(""), - "Ordered quartets of selection strings; each quartet is delimited by '" + ("Ordered quartets of selection strings; each quartet is delimited by '" + quartet_delim + "', and each string within by '" - + atom_delim + "'.") + + atom_delim + "'.").c_str()) ("pdb", po::value(&pdb)->default_value(""), "Prefix to write PDBs for each dihedral selected from frame 1 of provided multi-traj.") ("tags,T", po::value(&tags)->default_value(""), - "String of tags for each class of dihedral, separated by a '" + atom_delim + "'."); + ("String of tags for each class of dihedral, separated by a '" + atom_delim + "'.").c_str()) + ; } // clang-format on @@ -103,7 +102,7 @@ class ToolOptions : public opts::OptionsPackage { throw(LOOSError("The following selection did not split to a quartet of " "selections:\n")); for (auto s : d) - cout cout << s << "\t"; + cout << s << "\t"; cout << "\n"; return false; } @@ -120,10 +119,11 @@ class ToolOptions : public opts::OptionsPackage { // takes an atomic group for scope, and a vector of vectors of sel-strings. // Corrects order of discovery of each dihedral, and returns atomic group of // dihedrals. -vector sels_to_dihedralAGs(const vector> &dihedral_sels, - const AtomicGroup &scope) { +vector> +sels_to_dihedralAGs(const vector> &dihedral_sels, + const AtomicGroup &scope) { // append to this for return later. - vGroup dihedralAGs; + vector> dihedralAGs; for (auto dSels : dihedral_sels) { // first get a set of AGs that have all the atoms of the dihedral in them // They are likely to be in the order of the selection matched first @@ -133,15 +133,25 @@ vector sels_to_dihedralAGs(const vector> &dihedral_sels, outoforder_dihedralType += selectAtoms(scope, sel); } // This separates all non-connected atoms into separate atomic groups. - vGroup dihedralTypeVector = outoforder_dihedralType.splitByMolecule(); + vector dihedralTypeVector = + outoforder_dihedralType.splitByMolecule(); // reorder them here to match that provided by user // it may turn out this is unnecessary, // but the return order of selectAtoms calls is not specified. for (auto oo_D : dihedralTypeVector) { - AtomicGroup reordered; - for (auto sel : dSels) { - reordered += SelectAtoms(oo_D, sel); + if (oo_D.size() != 4) { + ostringstream oss; + oss << "WARNING: dihedral specification found " << oo_D.size(); + oss << " atoms, not 4 in selection string set: \n\t"; + for (auto sel : dSels) + oss << sel << ", "; + oss << "\b\n"; + throw(LOOSError(oss.str())); } + AtomicGroup reordered; + for (auto sel : dSels) + reordered += selectAtoms(oo_D, sel); + oo_D = move(reordered); } dihedralAGs.push_back(move(dihedralTypeVector)); @@ -169,12 +179,19 @@ int main(int argc, char *argv[]) { traj->updateGroupCoords(model); // figure out what dihedrals to track - vector dihedrals = sels_to_dihedralAGs(topts->dihedral_sels, scope); + vector> dihedrals = + sels_to_dihedralAGs(topts->dihedral_sels, scope); // if tags provided, split those into vector. - if (topts->tags) - vector vtags = split(topts->tags, atom_delim) - + if (topts->tags.empty()) { + vector vtags = split(topts->tags, atom_delim); + for (uint i = 0; i < vtags.size(); i++) { + vtags[i] += to_string(dihedrals[i][0][0]->resid()); + } + } else { + for (auto vAG : dihedrals) { + } + } // if verbosity, and no pdbs were requested, then print each atomic group // found for each atom in each dihedral to stderr. @@ -202,7 +219,7 @@ int main(int argc, char *argv[]) { for (uint i = 0; i < dihedrals.size(); i++) { for (uint j = 0; j < dihedrals.at(i).size(); i++) { PDB pdb = PDB::fromAtomicGroup(dihedrals[i, j]); - pdb.remarks().add(to_string(j) + " from: " + dihedral_sels.at(i)); + pdb.remarks().add(to_string(j) + " from: " + topts->dihedral_sels.at(i)); ofstream pdbFile; pdbFile.open(topts->pdb + "_" + to_string(i) + "_" + to_string(j) + ".pdb"); @@ -213,13 +230,13 @@ int main(int argc, char *argv[]) { PDB scopePDB = PDB::fromAtomicGroup(scope); ofstream scopeFile; scopeFile.open(topts->pdb + "_scope.pdb"); - scope.remarks().add(header); + scopePDB.remarks().add(header); scopeFile << scopePDB; } cout << header; cout << "#\t"; - for (auto selset : dihedral_sels) { + for (auto selset : topts->dihedral_sels) { for (auto sel : selset) { cout << sel << ","; } From b0943ba8c607a79509948b9af1471509aa15062b Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Thu, 16 Jan 2020 01:04:23 -0500 Subject: [PATCH 11/41] Updated to using tags as file names and writing each dihedral to its own TS file. --- Tools/dihedrals.cpp | 87 +++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 264808a62..b0a95d19e 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -48,10 +48,13 @@ string fullHelpMessage() { // these determine where the string containing the dihedral selections is split const string quartet_delim = "|"; const string atom_delim = ","; +const string tag_delim = "_"; +const string fsuffix = ".out" -// C++ 11 regex split -// https://stackoverflow.com/questions/9435385/split-a-string-using-c11 -vector split(const string &input, const string ®ular_expression) { + // C++ 11 regex split + // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 + vector + split(const string &input, const string ®ular_expression) { // passing -1 as the submatch index parameter performs splitting regex re(regular_expression); sregex_token_iterator first{input.begin(), input.end(), re, -1}, last; @@ -73,7 +76,8 @@ vector> deep_split(const string &input, class ToolOptions : public opts::OptionsPackage { public: ToolOptions() - : dihedral_sel_strings(""), pdb(""), tags(""), dihedral_sels{} {}; + : dihedral_sel_strings(""), pdb(""), tags(""), dihedral_sels{}, + prefix("dihedral"){}; // clang-format off void addGeneric(po::options_description& o) { o.add_options() @@ -85,14 +89,17 @@ class ToolOptions : public opts::OptionsPackage { "Prefix to write PDBs for each dihedral selected from frame 1 of provided multi-traj.") ("tags,T", po::value(&tags)->default_value(""), ("String of tags for each class of dihedral, separated by a '" + atom_delim + "'.").c_str()) + ("prefix,p", po::value(&prefix)->default_value("dihedral"), + "Prefix for file names for each monitored dihedral.") ; } // clang-format on string print() const { ostringstream oss; - oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s") % - dihedral_sel_strings % pdb % tags; + oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s") % + dihedral_sel_strings % pdb % tags % prefix; + return (oss.str()); } bool postConditions(po::variables_map &map) { @@ -114,6 +121,7 @@ class ToolOptions : public opts::OptionsPackage { string dihedral_sel_strings; string pdb; string tags; + string prefix; }; // takes an atomic group for scope, and a vector of vectors of sel-strings. @@ -163,7 +171,8 @@ int main(int argc, char *argv[]) { string header = invocationHeader(argc, argv); opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); - opts::BasicSelection *sopts = new opts::BasicSelection("all"); + opts::BasicSelection *sopts = + new opts::BasicSelection("backbone && !hydrogen"); opts::MultiTrajOptions *mtopts = new opts::MultiTrajOptions; ToolOptions *topts = new ToolOptions; @@ -182,14 +191,44 @@ int main(int argc, char *argv[]) { vector> dihedrals = sels_to_dihedralAGs(topts->dihedral_sels, scope); - // if tags provided, split those into vector. + // make tags, either from scratch or by adding to user appended tags. + vector> vv_fileOutputs; if (topts->tags.empty()) { - vector vtags = split(topts->tags, atom_delim); - for (uint i = 0; i < vtags.size(); i++) { - vtags[i] += to_string(dihedrals[i][0][0]->resid()); + int resid; + for (auto dihedralType : dihedrals) { + vector v_fileOutputs; + for (auto dihedral : dihedralType) { + resid = dihedral[0]->resid(); + string tag; + for (auto patom : dihedral) { + // put a residue number with the name for each atom not from residue + // of atom zero. + if (resid != patom->resid()) + tag = tag_delim + to_string(patom->resid()) + patom->name(); + else + tag = tag_delim + patom->name(); + } + ofstream dihedral_outFile(topts->prefix + tag_delim + tag + fsuffix); + dihedral_outFile << "# " << header << "\n"; + v_fileOutputs.push_back(move(dihedral_outFile)); + } + vv_fileOutputs.push_back(move(v_fileOutputs)); } } else { - for (auto vAG : dihedrals) { + vector user_tags = split(topts->tags, atom_delim); + for (uint i = 0; i < user_tags.size(); i++) { + vector v_fileOutputs; + for (auto dihedral : dihedrals.at(i)) { + string tag = user_tags.at(i); + tag += tag_delim + dihedral[0]->resid(); + for (auto patom : dihedral) + tag += tag_delim + patom->name(); // append atom names to tag with + // tag delimiter + ofstream dihedral_outFile(topts->prefix + tag_delim + tag + fsuffix); + dihedral_outFile << "# " << header << "\n"; + v_fileOutputs.push_back(move(dihedral_outFile)); + } + vv_fileOutputs.push_back(move(v_fileOutputs)); } } @@ -197,7 +236,7 @@ int main(int argc, char *argv[]) { // found for each atom in each dihedral to stderr. if (bopts->verbosity > 0) { if (topts->pdb.empty()) { - cerr << header; + cerr << "# " << header << "\n"; cerr << "# Following are the tab-delimited dihedral class selection " "strings and the atomic groups each produced:\n"; for (uint i = 0; i < topts->dihedral_sels.size(); i++) { @@ -219,7 +258,8 @@ int main(int argc, char *argv[]) { for (uint i = 0; i < dihedrals.size(); i++) { for (uint j = 0; j < dihedrals.at(i).size(); i++) { PDB pdb = PDB::fromAtomicGroup(dihedrals[i, j]); - pdb.remarks().add(to_string(j) + " from: " + topts->dihedral_sels.at(i)); + pdb.remarks().add(to_string(j) + + " from: " + topts->dihedral_sels.at(i)); ofstream pdbFile; pdbFile.open(topts->pdb + "_" + to_string(i) + "_" + to_string(j) + ".pdb"); @@ -234,16 +274,19 @@ int main(int argc, char *argv[]) { scopeFile << scopePDB; } - cout << header; - cout << "#\t"; - for (auto selset : topts->dihedral_sels) { - for (auto sel : selset) { - cout << sel << ","; - } - } // Trajectory Loop here. + double dihedral_angle; while (traj->readFrame()) { traj->updateGroupCoords(model); - for (auto) + for (uint dtIndex = 0; dtIndex < dihedrals.size(); dtIndex++) { + for (uint dIndex = 0; dIndex < dihedrals[dtIndex].size(); dIndex++) { + dihedral_angle = Math::torsion(dihedrals[dtIndex, dIndex][0]->coords(), + dihedrals[dtIndex, dIndex][1]->coords(), + dihedrals[dtIndex, dIndex][2]->coords(), + dihedrals[dtIndex, dIndex][3]->coords()); + vv_fileOutputs[dtIndex, dIndex] << traj->currentFrame << "\t" + << dihedral_angle << "\n";:w + } + } } } \ No newline at end of file From d16614883be19bce36ca48b49c125b5c945b108f Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Thu, 16 Jan 2020 01:08:52 -0500 Subject: [PATCH 12/41] Updated sconscript and gitignore in prep for build tests. --- .gitignore | 1 + Tools/SConscript | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 652a3bf8d..8c85f59d7 100644 --- a/.gitignore +++ b/.gitignore @@ -259,6 +259,7 @@ Tools/coverlap Tools/subsetter Tools/xy_rdf Tools/model-select +Tools/dihedrals Packages/ElasticNetworks/heavy-ca Packages/ElasticNetworks/psf-masses Packages/ElasticNetworks/vsa diff --git a/Tools/SConscript b/Tools/SConscript index 325a2a35b..a63050d1f 100644 --- a/Tools/SConscript +++ b/Tools/SConscript @@ -36,6 +36,7 @@ apps = apps + ' traj2pdb merge-traj center-molecule contact-time perturb-structu apps = apps + ' big-svd kurskew periodic_box area_per_lipid residue-contact-map' apps = apps + ' cross-dist fcontacts serialize-selection transition_contacts fixdcd smooth-traj membrane_map packing_score' apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap' +apps = apps + ' dihedrals' list = [] From bd259bcd3b03128b58d0006d6c3eabaaa6a14bc7 Mon Sep 17 00:00:00 2001 From: louis Date: Thu, 16 Jan 2020 12:02:41 -0500 Subject: [PATCH 13/41] Added file closure loop after trajectory loop. --- Tools/dihedrals.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index b0a95d19e..c48c5bae6 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -289,4 +289,8 @@ int main(int argc, char *argv[]) { } } } + // close all these output files now that we're done looping over traj + for (auto v_fileOutputs : vv_fileOutputs) + for (auto ofs : v_fileOutputs) + ofs.close(); } \ No newline at end of file From b2f6603f59101c6c14ad2ec3c47e85e92423463b Mon Sep 17 00:00:00 2001 From: louis Date: Thu, 16 Jan 2020 12:43:08 -0500 Subject: [PATCH 14/41] Changed delimiter character. --- Tools/dihedrals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index c48c5bae6..ef86329ee 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -46,7 +46,7 @@ string fullHelpMessage() { } // these determine where the string containing the dihedral selections is split -const string quartet_delim = "|"; +const string quartet_delim = ":"; const string atom_delim = ","; const string tag_delim = "_"; const string fsuffix = ".out" From 4ba0b190b362c68331d68c094ba05b494f17ab6d Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Mon, 20 Jan 2020 16:00:49 -0500 Subject: [PATCH 15/41] Fixed up gitignore. Changed outfiles to shared_ptrs to outfiles. --- .gitignore | 8 ++++++++ Tools/dihedrals.cpp | 49 +++++++++++++++++++++++---------------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 8c85f59d7..d09cf2c3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ ### Some files to ignore in LOOS +# ignore mac crap +**.DS_Store + # Object files *.o *.os @@ -58,6 +61,11 @@ GTAGS # VSCode cruft **.vscode +# stuff from sublime text +**.sublime-project +**.sublime-workspace +**.clang_complete + # JSON files .*.json diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index ef86329ee..d4ddec970 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -49,7 +49,7 @@ string fullHelpMessage() { const string quartet_delim = ":"; const string atom_delim = ","; const string tag_delim = "_"; -const string fsuffix = ".out" +const string fsuffix = ".out"; // C++ 11 regex split // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 @@ -192,11 +192,11 @@ int main(int argc, char *argv[]) { sels_to_dihedralAGs(topts->dihedral_sels, scope); // make tags, either from scratch or by adding to user appended tags. - vector> vv_fileOutputs; + vector>> vv_filePtrs; if (topts->tags.empty()) { int resid; for (auto dihedralType : dihedrals) { - vector v_fileOutputs; + vector> v_filePtrs; for (auto dihedral : dihedralType) { resid = dihedral[0]->resid(); string tag; @@ -208,27 +208,27 @@ int main(int argc, char *argv[]) { else tag = tag_delim + patom->name(); } - ofstream dihedral_outFile(topts->prefix + tag_delim + tag + fsuffix); - dihedral_outFile << "# " << header << "\n"; - v_fileOutputs.push_back(move(dihedral_outFile)); + auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); + *(p_ofstream) << "# " << header << "\n"; + v_filePtrs.push_back(p_ofstream); } - vv_fileOutputs.push_back(move(v_fileOutputs)); + vv_filePtrs.push_back(move(v_filePtrs)); } } else { vector user_tags = split(topts->tags, atom_delim); for (uint i = 0; i < user_tags.size(); i++) { - vector v_fileOutputs; + vector> v_filePtrs; for (auto dihedral : dihedrals.at(i)) { string tag = user_tags.at(i); - tag += tag_delim + dihedral[0]->resid(); + tag += tag_delim + to_string(dihedral[0]->resid()); for (auto patom : dihedral) tag += tag_delim + patom->name(); // append atom names to tag with // tag delimiter - ofstream dihedral_outFile(topts->prefix + tag_delim + tag + fsuffix); - dihedral_outFile << "# " << header << "\n"; - v_fileOutputs.push_back(move(dihedral_outFile)); + auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); + *p_ofstream << "# " << header << "\n"; + v_filePtrs.push_back(p_ofstream); } - vv_fileOutputs.push_back(move(v_fileOutputs)); + vv_filePtrs.push_back(move(v_filePtrs)); } } @@ -257,9 +257,9 @@ int main(int argc, char *argv[]) { if (!topts->pdb.empty()) { for (uint i = 0; i < dihedrals.size(); i++) { for (uint j = 0; j < dihedrals.at(i).size(); i++) { - PDB pdb = PDB::fromAtomicGroup(dihedrals[i, j]); + PDB pdb = PDB::fromAtomicGroup(dihedrals[i][j]); pdb.remarks().add(to_string(j) + - " from: " + topts->dihedral_sels.at(i)); + " from: " + (topts->dihedral_sels.at(i).at(j))); ofstream pdbFile; pdbFile.open(topts->pdb + "_" + to_string(i) + "_" + to_string(j) + ".pdb"); @@ -280,17 +280,18 @@ int main(int argc, char *argv[]) { traj->updateGroupCoords(model); for (uint dtIndex = 0; dtIndex < dihedrals.size(); dtIndex++) { for (uint dIndex = 0; dIndex < dihedrals[dtIndex].size(); dIndex++) { - dihedral_angle = Math::torsion(dihedrals[dtIndex, dIndex][0]->coords(), - dihedrals[dtIndex, dIndex][1]->coords(), - dihedrals[dtIndex, dIndex][2]->coords(), - dihedrals[dtIndex, dIndex][3]->coords()); - vv_fileOutputs[dtIndex, dIndex] << traj->currentFrame << "\t" - << dihedral_angle << "\n";:w + dihedral_angle = Math::torsion(dihedrals[dtIndex][dIndex][0]->coords(), + dihedrals[dtIndex][dIndex][1]->coords(), + dihedrals[dtIndex][dIndex][2]->coords(), + dihedrals[dtIndex][dIndex][3]->coords()); + vv_filePtrs[dtIndex, dIndex] << traj->currentFrame << "\t" + << dihedral_angle << "\n"; } } } // close all these output files now that we're done looping over traj - for (auto v_fileOutputs : vv_fileOutputs) - for (auto ofs : v_fileOutputs) - ofs.close(); + for (auto v_fps: vv_filePtrs) + for (auto p_ofs : v_fps) + p_ofs->close(); + } \ No newline at end of file From 3d34aefc4c0b9f9ff96e192974f99f43ec45e625 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Mon, 20 Jan 2020 17:37:00 -0500 Subject: [PATCH 16/41] fixed issues from pointer ostreams. Fixed move constructor warning. Builds; now to testing. --- Tools/dihedrals.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index d4ddec970..3ebc89889 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -69,14 +69,14 @@ vector> deep_split(const string &input, vector outer_frags = split(input, outer_regex); // split the outer fragments and push these vectors into return object for (string &frag : outer_frags) { - vector_of_frags.push_back(move(split(frag, inner_regex))); + vector_of_frags.push_back(split(frag, inner_regex)); } return vector_of_frags; } class ToolOptions : public opts::OptionsPackage { public: ToolOptions() - : dihedral_sel_strings(""), pdb(""), tags(""), dihedral_sels{}, + : dihedral_sels{}, dihedral_sel_strings(""), pdb(""), tags(""), prefix("dihedral"){}; // clang-format off void addGeneric(po::options_description& o) { @@ -284,7 +284,7 @@ int main(int argc, char *argv[]) { dihedrals[dtIndex][dIndex][1]->coords(), dihedrals[dtIndex][dIndex][2]->coords(), dihedrals[dtIndex][dIndex][3]->coords()); - vv_filePtrs[dtIndex, dIndex] << traj->currentFrame << "\t" + *(vv_filePtrs[dtIndex][dIndex]) << traj->currentFrame() << "\t" << dihedral_angle << "\n"; } } From 50f50c9777bbb7b1e566ae7b624f3ae32d428ad8 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Wed, 22 Jan 2020 00:15:00 -0500 Subject: [PATCH 17/41] Changed bad sized group operation to erase clause in dihedralTypesConstruction. New runtime errors introduced, old ones squashed. --- Tools/dihedrals.cpp | 82 +++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 26 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 3ebc89889..1f7c6c085 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -51,10 +51,9 @@ const string atom_delim = ","; const string tag_delim = "_"; const string fsuffix = ".out"; - // C++ 11 regex split - // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 - vector - split(const string &input, const string ®ular_expression) { +// C++ 11 regex split +// https://stackoverflow.com/questions/9435385/split-a-string-using-c11 +vector split(const string &input, const string ®ular_expression) { // passing -1 as the submatch index parameter performs splitting regex re(regular_expression); sregex_token_iterator first{input.begin(), input.end(), re, -1}, last; @@ -76,7 +75,7 @@ vector> deep_split(const string &input, class ToolOptions : public opts::OptionsPackage { public: ToolOptions() - : dihedral_sels{}, dihedral_sel_strings(""), pdb(""), tags(""), + : dihedral_sels{}, dihedral_sel_strings(""), pdb(""), tags(""), prefix("dihedral"){}; // clang-format off void addGeneric(po::options_description& o) { @@ -146,22 +145,52 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // reorder them here to match that provided by user // it may turn out this is unnecessary, // but the return order of selectAtoms calls is not specified. - for (auto oo_D : dihedralTypeVector) { - if (oo_D.size() != 4) { - ostringstream oss; - oss << "WARNING: dihedral specification found " << oo_D.size(); - oss << " atoms, not 4 in selection string set: \n\t"; - for (auto sel : dSels) - oss << sel << ", "; - oss << "\b\n"; - throw(LOOSError(oss.str())); - } - AtomicGroup reordered; - for (auto sel : dSels) - reordered += selectAtoms(oo_D, sel); + // Remove any AGs that didn't manage to contain four atoms after the split. + dihedralTypeVector.erase(remove_if( + dihedralTypeVector.begin(), dihedralTypeVector.end(), + // lambda that filters by incorrectly sized AGs, emitting warnings as it + // goes. + [&](AtomicGroup& oo_D) -> bool { + if (oo_D.size() != 4) { + cerr << "WARNING: dihedral specification found " << oo_D.size(); + cerr << " atoms, not 4 in selection string set: \n\t"; + for (auto sel : dSels) + cerr << sel << ", "; + cerr << "\b\n"; + cerr << "Offending group: \n"; + cerr << oo_D; + cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; + return true; + } else { + AtomicGroup reordered; + for (auto sel : dSels) + reordered += selectAtoms(oo_D, sel); - oo_D = move(reordered); - } + oo_D = move(reordered); + return false; + } + })); + + // for (auto oo_D : dihedralTypeVector) { + // if (oo_D.size() != 4) { + // ostringstream oss; + // oss << "WARNING: dihedral specification found " << oo_D.size(); + // oss << " atoms, not 4 in selection string set: \n\t"; + // for (auto sel : dSels) + // oss << sel << ", "; + // oss << "\b\n"; + // oss << "Offending group: \n"; + // oss << oo_D; + // oss << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; + + // throw(LOOSError(oss.str())); + // } + // AtomicGroup reordered; + // for (auto sel : dSels) + // reordered += selectAtoms(oo_D, sel); + + // oo_D = move(reordered); + // } dihedralAGs.push_back(move(dihedralTypeVector)); } return dihedralAGs; @@ -208,7 +237,8 @@ int main(int argc, char *argv[]) { else tag = tag_delim + patom->name(); } - auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); + auto p_ofstream = + make_shared(topts->prefix + tag_delim + tag + fsuffix); *(p_ofstream) << "# " << header << "\n"; v_filePtrs.push_back(p_ofstream); } @@ -224,7 +254,8 @@ int main(int argc, char *argv[]) { for (auto patom : dihedral) tag += tag_delim + patom->name(); // append atom names to tag with // tag delimiter - auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); + auto p_ofstream = + make_shared(topts->prefix + tag_delim + tag + fsuffix); *p_ofstream << "# " << header << "\n"; v_filePtrs.push_back(p_ofstream); } @@ -284,14 +315,13 @@ int main(int argc, char *argv[]) { dihedrals[dtIndex][dIndex][1]->coords(), dihedrals[dtIndex][dIndex][2]->coords(), dihedrals[dtIndex][dIndex][3]->coords()); - *(vv_filePtrs[dtIndex][dIndex]) << traj->currentFrame() << "\t" - << dihedral_angle << "\n"; + *(vv_filePtrs[dtIndex][dIndex]) + << traj->currentFrame() << "\t" << dihedral_angle << "\n"; } } } // close all these output files now that we're done looping over traj - for (auto v_fps: vv_filePtrs) + for (auto v_fps : vv_filePtrs) for (auto p_ofs : v_fps) p_ofs->close(); - } \ No newline at end of file From 5896423d5c35fb3908c1566056f5a105af28d08a Mon Sep 17 00:00:00 2001 From: louis Date: Wed, 22 Jan 2020 17:50:10 -0500 Subject: [PATCH 18/41] updating ending loop. Not finished. --- Tools/dihedrals.cpp | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 1f7c6c085..668dab354 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -150,7 +150,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, dihedralTypeVector.begin(), dihedralTypeVector.end(), // lambda that filters by incorrectly sized AGs, emitting warnings as it // goes. - [&](AtomicGroup& oo_D) -> bool { + [&](AtomicGroup &oo_D) -> bool { if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; @@ -170,27 +170,6 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, return false; } })); - - // for (auto oo_D : dihedralTypeVector) { - // if (oo_D.size() != 4) { - // ostringstream oss; - // oss << "WARNING: dihedral specification found " << oo_D.size(); - // oss << " atoms, not 4 in selection string set: \n\t"; - // for (auto sel : dSels) - // oss << sel << ", "; - // oss << "\b\n"; - // oss << "Offending group: \n"; - // oss << oo_D; - // oss << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; - - // throw(LOOSError(oss.str())); - // } - // AtomicGroup reordered; - // for (auto sel : dSels) - // reordered += selectAtoms(oo_D, sel); - - // oo_D = move(reordered); - // } dihedralAGs.push_back(move(dihedralTypeVector)); } return dihedralAGs; From 6390c5f520fddbe9801a6f16b880437dba484c88 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Thu, 23 Jan 2020 00:31:42 -0500 Subject: [PATCH 19/41] in erase-remove_if final arg to erase was omitted, causing failure to remove incorrectly numbered groups. Also added function pointer to dihedral size checker to make it responsive to verbosity. Program appears to be working. Checking for miscalculations... --- Tools/dihedrals.cpp | 136 +++++++++++++++++++++++++++++++------------- 1 file changed, 96 insertions(+), 40 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 668dab354..d20761af4 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -128,7 +128,48 @@ class ToolOptions : public opts::OptionsPackage { // dihedrals. vector> sels_to_dihedralAGs(const vector> &dihedral_sels, - const AtomicGroup &scope) { + const AtomicGroup &scope, const int verbosity) { + + // pick whether to puke up atomic group when group has wrong num elts. + bool (*chkDihedralSize)(AtomicGroup &, vector); + if (verbosity > 0) { + chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + if (oo_D.size() != 4) { + cerr << "WARNING: dihedral specification found " << oo_D.size(); + cerr << " atoms, not 4 in selection string set: \n\t"; + for (auto sel : sels) + cerr << sel << ", "; + cerr << "\b\b\n"; + cerr << "Offending group: \n"; + cerr << oo_D; + cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; + return true; + } else { + AtomicGroup reordered; + for (auto sel : sels) + reordered += selectAtoms(oo_D, sel); + + oo_D = move(reordered); + cerr << "included group of size: " << to_string(reordered.size()) + << "\n"; + return false; + } + }; + } else { + chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + if (oo_D.size() != 4) + return true; + else { + AtomicGroup reordered; + for (auto sel : sels) + reordered += selectAtoms(oo_D, sel); + + oo_D = move(reordered); + return false; + } + }; + } + // append to this for return later. vector> dihedralAGs; for (auto dSels : dihedral_sels) { @@ -140,37 +181,46 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, outoforder_dihedralType += selectAtoms(scope, sel); } // This separates all non-connected atoms into separate atomic groups. - vector dihedralTypeVector = + vector dihedralInstances = outoforder_dihedralType.splitByMolecule(); // reorder them here to match that provided by user // it may turn out this is unnecessary, // but the return order of selectAtoms calls is not specified. - // Remove any AGs that didn't manage to contain four atoms after the split. - dihedralTypeVector.erase(remove_if( - dihedralTypeVector.begin(), dihedralTypeVector.end(), - // lambda that filters by incorrectly sized AGs, emitting warnings as it - // goes. - [&](AtomicGroup &oo_D) -> bool { - if (oo_D.size() != 4) { - cerr << "WARNING: dihedral specification found " << oo_D.size(); - cerr << " atoms, not 4 in selection string set: \n\t"; - for (auto sel : dSels) - cerr << sel << ", "; - cerr << "\b\n"; - cerr << "Offending group: \n"; - cerr << oo_D; - cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; - return true; - } else { - AtomicGroup reordered; - for (auto sel : dSels) - reordered += selectAtoms(oo_D, sel); - - oo_D = move(reordered); - return false; - } - })); - dihedralAGs.push_back(move(dihedralTypeVector)); + // Remove any AGs that didn't manage to contain four atoms after the + // split. + dihedralInstances.erase( + remove_if(dihedralInstances.begin(), dihedralInstances.end(), + [&](AtomicGroup &oo_D) -> bool { + return (*chkDihedralSize)(oo_D, dSels); + }), + // lambda filters incorrectly sized AGs, warning for each such AG. + // [&](AtomicGroup &oo_D) -> bool { + // if (oo_D.size() != 4) { + // if (verbosity > 0) { + // cerr << "WARNING: dihedral specification found " + // << oo_D.size(); + // cerr << " atoms, not 4 in selection string set: \n\t"; + // for (auto sel : dSels) + // cerr << sel << ", "; + // cerr << "\b\b\n"; + // cerr << "Offending group: \n"; + // cerr << oo_D; + // cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; + // } + // return true; + // } else { + // AtomicGroup reordered; + // for (auto sel : dSels) + // reordered += selectAtoms(oo_D, sel); + + // oo_D = move(reordered); + // cerr << "included group of size: " + // << to_string(reordered.size()) << "\n"; + // return false; + // } + // }), + dihedralInstances.end()); + dihedralAGs.push_back(move(dihedralInstances)); } return dihedralAGs; } @@ -197,7 +247,7 @@ int main(int argc, char *argv[]) { // figure out what dihedrals to track vector> dihedrals = - sels_to_dihedralAGs(topts->dihedral_sels, scope); + sels_to_dihedralAGs(topts->dihedral_sels, scope, bopts->verbosity); // make tags, either from scratch or by adding to user appended tags. vector>> vv_filePtrs; @@ -266,10 +316,16 @@ int main(int argc, char *argv[]) { // if PDB name string was given, write PDBs to indexed files by that prefix if (!topts->pdb.empty()) { for (uint i = 0; i < dihedrals.size(); i++) { - for (uint j = 0; j < dihedrals.at(i).size(); i++) { - PDB pdb = PDB::fromAtomicGroup(dihedrals[i][j]); - pdb.remarks().add(to_string(j) + - " from: " + (topts->dihedral_sels.at(i).at(j))); + for (uint j = 0; j < (dihedrals.at(i)).size(); j++) { + PDB pdb = PDB::fromAtomicGroup(dihedrals.at(i).at(j)); + string rmks = to_string(j) + " from: "; + for (auto sel : topts->dihedral_sels.at(i)) + rmks += sel + ", "; + + rmks += "\b\b"; + + pdb.remarks().add(rmks); + ofstream pdbFile; pdbFile.open(topts->pdb + "_" + to_string(i) + "_" + to_string(j) + ".pdb"); @@ -288,13 +344,13 @@ int main(int argc, char *argv[]) { double dihedral_angle; while (traj->readFrame()) { traj->updateGroupCoords(model); - for (uint dtIndex = 0; dtIndex < dihedrals.size(); dtIndex++) { - for (uint dIndex = 0; dIndex < dihedrals[dtIndex].size(); dIndex++) { - dihedral_angle = Math::torsion(dihedrals[dtIndex][dIndex][0]->coords(), - dihedrals[dtIndex][dIndex][1]->coords(), - dihedrals[dtIndex][dIndex][2]->coords(), - dihedrals[dtIndex][dIndex][3]->coords()); - *(vv_filePtrs[dtIndex][dIndex]) + for (uint typeIdx = 0; typeIdx < dihedrals.size(); typeIdx++) { + for (uint dIdx = 0; dIdx < dihedrals.at(typeIdx).size(); dIdx++) { + dihedral_angle = Math::torsion(dihedrals[typeIdx][dIdx][0]->coords(), + dihedrals[typeIdx][dIdx][1]->coords(), + dihedrals[typeIdx][dIdx][2]->coords(), + dihedrals[typeIdx][dIdx][3]->coords()); + *(vv_filePtrs[typeIdx][dIdx]) << traj->currentFrame() << "\t" << dihedral_angle << "\n"; } } From 62da65837092d199bd76bd8d7ddbe0499bf46609 Mon Sep 17 00:00:00 2001 From: louis Date: Thu, 23 Jan 2020 15:25:55 -0500 Subject: [PATCH 20/41] added small functionality to swap \' out of filenames. --- Tools/dihedrals.cpp | 66 +++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index d20761af4..3d9d4156a 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -27,6 +27,7 @@ */ #include +#include #include #include #include @@ -76,7 +77,7 @@ class ToolOptions : public opts::OptionsPackage { public: ToolOptions() : dihedral_sels{}, dihedral_sel_strings(""), pdb(""), tags(""), - prefix("dihedral"){}; + prefix("dihedral"), quotes("p") {}; // clang-format off void addGeneric(po::options_description& o) { o.add_options() @@ -89,15 +90,17 @@ class ToolOptions : public opts::OptionsPackage { ("tags,T", po::value(&tags)->default_value(""), ("String of tags for each class of dihedral, separated by a '" + atom_delim + "'.").c_str()) ("prefix,p", po::value(&prefix)->default_value("dihedral"), - "Prefix for file names for each monitored dihedral.") + "Prefix for file names for each monitored dihedral."), + ("swap-single-quotes,Q", po::value("es)->default_value("p"), + "Swap single quote character in tags for some alternative. Provide single quote if no change desired..") ; } // clang-format on string print() const { ostringstream oss; - oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s") % - dihedral_sel_strings % pdb % tags % prefix; + oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s,quotes=%s") % + dihedral_sel_strings % pdb % tags % prefix % quotes; return (oss.str()); } @@ -121,6 +124,7 @@ class ToolOptions : public opts::OptionsPackage { string pdb; string tags; string prefix; + string quotes; }; // takes an atomic group for scope, and a vector of vectors of sel-strings. @@ -188,38 +192,12 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // but the return order of selectAtoms calls is not specified. // Remove any AGs that didn't manage to contain four atoms after the // split. - dihedralInstances.erase( - remove_if(dihedralInstances.begin(), dihedralInstances.end(), - [&](AtomicGroup &oo_D) -> bool { - return (*chkDihedralSize)(oo_D, dSels); - }), - // lambda filters incorrectly sized AGs, warning for each such AG. - // [&](AtomicGroup &oo_D) -> bool { - // if (oo_D.size() != 4) { - // if (verbosity > 0) { - // cerr << "WARNING: dihedral specification found " - // << oo_D.size(); - // cerr << " atoms, not 4 in selection string set: \n\t"; - // for (auto sel : dSels) - // cerr << sel << ", "; - // cerr << "\b\b\n"; - // cerr << "Offending group: \n"; - // cerr << oo_D; - // cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; - // } - // return true; - // } else { - // AtomicGroup reordered; - // for (auto sel : dSels) - // reordered += selectAtoms(oo_D, sel); - - // oo_D = move(reordered); - // cerr << "included group of size: " - // << to_string(reordered.size()) << "\n"; - // return false; - // } - // }), - dihedralInstances.end()); + dihedralInstances.erase(remove_if(dihedralInstances.begin(), + dihedralInstances.end(), + [&](AtomicGroup &oo_D) -> bool { + return (*chkDihedralSize)(oo_D, dSels); + }), + dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } return dihedralAGs; @@ -249,8 +227,9 @@ int main(int argc, char *argv[]) { vector> dihedrals = sels_to_dihedralAGs(topts->dihedral_sels, scope, bopts->verbosity); - // make tags, either from scratch or by adding to user appended tags. + // make file names, either from scratch or by adding to user appended tags. vector>> vv_filePtrs; + // if user supplied tags for file names, use those with reduced dihedral name info. if (topts->tags.empty()) { int resid; for (auto dihedralType : dihedrals) { @@ -261,10 +240,12 @@ int main(int argc, char *argv[]) { for (auto patom : dihedral) { // put a residue number with the name for each atom not from residue // of atom zero. + string name = patom->name(); + boost::replace_all(name, "\'", topts->quotes); if (resid != patom->resid()) - tag = tag_delim + to_string(patom->resid()) + patom->name(); + tag = tag_delim + to_string(patom->resid()) + name; else - tag = tag_delim + patom->name(); + tag = tag_delim + name; } auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); @@ -280,8 +261,11 @@ int main(int argc, char *argv[]) { for (auto dihedral : dihedrals.at(i)) { string tag = user_tags.at(i); tag += tag_delim + to_string(dihedral[0]->resid()); - for (auto patom : dihedral) - tag += tag_delim + patom->name(); // append atom names to tag with + for (auto patom : dihedral){ + string name = patom->name(); + boost::replace_all(name, "\'", topts->quotes); + tag += tag_delim + name; // append atom names to tag with + } // tag delimiter auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); From 716d82e75712ac09de8cf040a9d3397650089ce8 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Fri, 24 Jan 2020 17:35:31 -0500 Subject: [PATCH 21/41] removed unneeded reordering check --- Tools/dihedrals.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 3d9d4156a..2a9a50a1f 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -148,29 +148,15 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, cerr << oo_D; cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; return true; - } else { - AtomicGroup reordered; - for (auto sel : sels) - reordered += selectAtoms(oo_D, sel); - - oo_D = move(reordered); - cerr << "included group of size: " << to_string(reordered.size()) - << "\n"; + } else return false; - } }; } else { chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { if (oo_D.size() != 4) return true; - else { - AtomicGroup reordered; - for (auto sel : sels) - reordered += selectAtoms(oo_D, sel); - - oo_D = move(reordered); + else return false; - } }; } From 8d8842afe406284446185fb3cb4843f5cda64e71 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Fri, 24 Jan 2020 20:23:24 -0500 Subject: [PATCH 22/41] Added fullhelp message, made sure the -Q flag was added. --- Tools/dihedrals.cpp | 134 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 21 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 2a9a50a1f..df27a8489 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -41,16 +41,109 @@ namespace po = loos::OptionsFramework::po; // @cond TOOL_INTERNAL -string fullHelpMessage() { - string s = "XXX"; - return s; -} - // these determine where the string containing the dihedral selections is split const string quartet_delim = ":"; const string atom_delim = ","; const string tag_delim = "_"; const string fsuffix = ".out"; + +// clang-format off +const string msg = +"This tool is designed to allow the tracking of classes of dihedral angles \n" +"specified by atom selection. Unlike the torsion tool, also in LOOS, this tool \n" +"is designed to track the dihedral angle between chemically connected groups of \n" +"four atoms. The original intention for the tool was to monitor classes of \n" +"customarily defined dihedrals that might be defined for a large number of \n" +"residues, without having to write a separate command line for each such \n" +"dihedral. For example, you could use this tool to monitor all of the phi \n" +"backbone dihedrals in a protein, making only one pass through the trajectory as\n" +" you did so. The tool creates a file name for each dihedral angle chosen for \n" +"monitoring, and writes the frame number and the angle out in two columns, \n" +"separated by white space, for each frame provided to the tool. How these names \n" +"are created, how many classes of dihedral to monitor, and what frames to \n" +"consider from the input trajectory(ies) are all configurable. Because it \n" +"handles output through a number of out files, i. \n" +" \n" +"The --selection flag controls the scope of the search for dihedrals to monitor.\n" +" So in the aforementioned protein example, if you only wanted to monitor the \n" +"phi of the first five residues of some protein, you would provide a selection \n" +"string like 'resid < 6' (assuming of course that your protein's residues are \n" +"the first such in the overall list of residues, which is commonly the case). \n" +" \n" +"several of the flags are from LOOS classes devoted to providing basic tool \n" +"functionality, and they work the same as in other tools. For example, \n" +"trajectories are read using a MultiTrajectory, and so the skip, stride, and \n" +"range flags all do what they do for multi-trajectory based tools. This is also \n" +"why you can provide an arbitrary number of trajectories to this tool, and it \n" +"will gracefully treat them as one long trajectory. \n" +" \n" +"The --dihedral-sel-strings flag is obligate. It should be a string that \n" +"provides a list of atom selections in quartets separated by a '"+atom_delim+"'. Each \n" +"selection string should grab only one atom so that each quartet selects four \n" +"atoms, in the order that you would like them fed to the loos::Math::torsion() \n" +"function. If you'd like to monitor multiple types of dihedral, even if it's the\n" +" same dihedral across different residues (for example, chi, the glycosidic \n" +"dihedral in nucleic acids) you can include multiple quartets by interspersing \n" +"'"+quartet_delim+"' between each quartet. For example, to select the chi dihedral in nucleic \n" +"acids you could write: \n" +" \n" +" --dihedral-sel-strings $\'name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N9\""+atom_delim+" \\\n" +"name == \"C4\" "+quartet_delim+" name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N1\""+atom_delim+" name == \"C2\"\' \n" +" \n" +"Noting that the four selection strings before the '"+quartet_delim+"' are for purine chis, and \n" +"the four after are for pyrimidine chis. In the case of nucleic acids, which \n" +"usually have the \"\'\" character in the atom name, it can be very helpful to \n" +"put the arguments to this tool in a config file. See the LOOS online docs for \n" +"how to go about that. \n" +" \n" +"The --pdb flag is for debugging. If you want to use it, provide a prefix by \n" +"which to name the reported pdb files. It takes the first frame of the multi-\n" +"trajectory and writes out the scope, and each four atom sequence it found as \n" +"separate PDB files, prefixed with the provided argument. For each PDB created \n" +"thus, it numbers the files first by dihedral class, then by which element in \n" +"the class it is. So if you provide the 'test' as an argument, your PDBs might \n" +"look like: \n" +" \n" +" test_x_y.pdb \n" +" \n" +"Where the contents will be the yth dihedral of type x found. To get a nice \n" +"visual representation of how the selection went, I like to say 'pymol *.pdb' in\n" +" the subdirectory I made for this analysis, then show all as 'sticks/licorice',\n" +" and overwrite that setting for just the scope with 'lines'. This makes it \n" +"patently clear where the dihedrals being tracked will be in the molecule. \n" +" \n" +"The --tags option is for providing tags that correspond to each class of \n" +"dihedrals monitored by each quartet. Each tag provides an infix name that \n" +"corresponds to the selection string that is in that position in the '"+atom_delim+"' \n" +"separated list of --dihedral-sel-strings. For the chi example: \n" +" \n" +" --tags 'chi_R,chi_Y' \n" +" \n" +"Since the first of the two quartets corresponds to purines and the second to \n" +"pyrimidines. If you do provide this argument, it needs to have the same number \n" +"of ',' separated strings as you've provided quartets above. If you elect not to\n" +" provide it, then a tag is fabricated from the residue name, resid, and each of\n" +" the atoms selected, separated by '"+tag_delim+"'. If some of the atoms cross into another \n" +"residue, those atom names will have the resid of that neighboring residue \n" +"appearing after the name. If you do provide tags, then the tag, followed by the\n" +" resid of the first atom, then the names of the atoms in that particular \n" +"dihedral will be the filename instead, also separated by '"+tag_delim+"'. In the chi \n" +"example, because of the tags provided, an output file might look like the \n" +"following: \n" +" \n" +" roc_chi_R_1_O4p_C1p_N9_C4" +fsuffix+" \n" +" \n" +"Note that the primes have been replaced by the letter p, which can be changed \n" +"(even back to _shudder_ a \') if the user specifies the --swap-single-quotes \n" +"flag. \n" +" \n" +"The --prefix flag is a string that precedes all the dihedral time series file \n" +"names (aside from the output caused by --pdbs) This permits exclusive names for\n" +" different runs of the program and helps keep things organized. I often use a \n" +"system specifying prefix.\n" +; +// clang-format on + // C++ 11 regex split // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 @@ -90,10 +183,9 @@ class ToolOptions : public opts::OptionsPackage { ("tags,T", po::value(&tags)->default_value(""), ("String of tags for each class of dihedral, separated by a '" + atom_delim + "'.").c_str()) ("prefix,p", po::value(&prefix)->default_value("dihedral"), - "Prefix for file names for each monitored dihedral."), + "Prefix for file names for each monitored dihedral.") ("swap-single-quotes,Q", po::value("es)->default_value("p"), - "Swap single quote character in tags for some alternative. Provide single quote if no change desired..") - ; + "Swap single quote character in outfile names for some alternative. Provide single quote if no change desired."); } // clang-format on @@ -137,23 +229,23 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // pick whether to puke up atomic group when group has wrong num elts. bool (*chkDihedralSize)(AtomicGroup &, vector); if (verbosity > 0) { - chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { - if (oo_D.size() != 4) { - cerr << "WARNING: dihedral specification found " << oo_D.size(); + chkDihedralSize = [](AtomicGroup &dihedralAG, vector sels) -> bool { + if (dihedralAG.size() != 4) { + cerr << "WARNING: dihedral specification found " << dihedralAG.size(); cerr << " atoms, not 4 in selection string set: \n\t"; for (auto sel : sels) cerr << sel << ", "; cerr << "\b\b\n"; cerr << "Offending group: \n"; - cerr << oo_D; + cerr << dihedralAG; cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; return true; } else return false; }; } else { - chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { - if (oo_D.size() != 4) + chkDihedralSize = [](AtomicGroup &dihedralAG, vector sels) -> bool { + if (dihedralAG.size() != 4) return true; else return false; @@ -166,13 +258,13 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // first get a set of AGs that have all the atoms of the dihedral in them // They are likely to be in the order of the selection matched first // i.e. all the matches for selection 1, then all for 2, and so forth. - AtomicGroup outoforder_dihedralType; + AtomicGroup dihedralTypes; for (auto sel : dSels) { - outoforder_dihedralType += selectAtoms(scope, sel); + dihedralTypes += selectAtoms(scope, sel); } // This separates all non-connected atoms into separate atomic groups. vector dihedralInstances = - outoforder_dihedralType.splitByMolecule(); + dihedralTypes.splitByMolecule(); // reorder them here to match that provided by user // it may turn out this is unnecessary, // but the return order of selectAtoms calls is not specified. @@ -180,8 +272,8 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // split. dihedralInstances.erase(remove_if(dihedralInstances.begin(), dihedralInstances.end(), - [&](AtomicGroup &oo_D) -> bool { - return (*chkDihedralSize)(oo_D, dSels); + [&](AtomicGroup &dihedralAG) -> bool { + return (*chkDihedralSize)(dihedralAG, dSels); }), dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); @@ -192,7 +284,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, int main(int argc, char *argv[]) { string header = invocationHeader(argc, argv); - opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); + opts::BasicOptions *bopts = new opts::BasicOptions(msg); opts::BasicSelection *sopts = new opts::BasicSelection("backbone && !hydrogen"); opts::MultiTrajOptions *mtopts = new opts::MultiTrajOptions; @@ -222,7 +314,7 @@ int main(int argc, char *argv[]) { vector> v_filePtrs; for (auto dihedral : dihedralType) { resid = dihedral[0]->resid(); - string tag; + string tag(dihedral[0]->resname() + to_string(resid)); for (auto patom : dihedral) { // put a residue number with the name for each atom not from residue // of atom zero. From 678d777146c1743f8f5ce975d1b9b066d3ab81ab Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Fri, 31 Jan 2020 09:54:32 -0500 Subject: [PATCH 23/41] brought sels_to_dihedrals back from 9a904337 to re-include reordering check. --- Tools/dihedrals.cpp | 59 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index df27a8489..8fc138007 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -229,29 +229,72 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // pick whether to puke up atomic group when group has wrong num elts. bool (*chkDihedralSize)(AtomicGroup &, vector); if (verbosity > 0) { - chkDihedralSize = [](AtomicGroup &dihedralAG, vector sels) -> bool { - if (dihedralAG.size() != 4) { - cerr << "WARNING: dihedral specification found " << dihedralAG.size(); + chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + if (oo_D.size() != 4) { + cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; for (auto sel : sels) cerr << sel << ", "; cerr << "\b\b\n"; cerr << "Offending group: \n"; - cerr << dihedralAG; + cerr << oo_D; cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; return true; - } else + } else { + AtomicGroup reordered; + for (auto sel : sels) + reordered += selectAtoms(oo_D, sel); + + oo_D = move(reordered); + cerr << "included group of size: " << to_string(reordered.size()) + << "\n"; return false; + } }; } else { - chkDihedralSize = [](AtomicGroup &dihedralAG, vector sels) -> bool { - if (dihedralAG.size() != 4) + chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + if (oo_D.size() != 4) return true; - else + else { + AtomicGroup reordered; + for (auto sel : sels) + reordered += selectAtoms(oo_D, sel); + + oo_D = move(reordered); return false; + } }; } + // append to this for return later. + vector> dihedralAGs; + for (auto dSels : dihedral_sels) { + // first get a set of AGs that have all the atoms of the dihedral in them + // They are likely to be in the order of the selection matched first + // i.e. all the matches for selection 1, then all for 2, and so forth. + AtomicGroup outoforder_dihedralType; + for (auto sel : dSels) { + outoforder_dihedralType += selectAtoms(scope, sel); + } + // This separates all non-connected atoms into separate atomic groups. + vector dihedralInstances = + outoforder_dihedralType.splitByMolecule(); + // reorder them here to match that provided by user + // it may turn out this is unnecessary, + // but the return order of selectAtoms calls is not specified. + // Remove any AGs that didn't manage to contain four atoms after the + // split. + dihedralInstances.erase(remove_if(dihedralInstances.begin(), + dihedralInstances.end(), + [&](AtomicGroup &oo_D) -> bool { + return (*chkDihedralSize)(oo_D, dSels); + }), + dihedralInstances.end()); + dihedralAGs.push_back(move(dihedralInstances)); + } + return dihedralAGs; +} + // append to this for return later. vector> dihedralAGs; for (auto dSels : dihedral_sels) { From 9252e3ab739665093b9d4ba6ccc5f38685bcbb06 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Fri, 31 Jan 2020 10:54:08 -0500 Subject: [PATCH 24/41] Adjusted reordering to work on compiled selectors instead of selstrings. --- Tools/dihedrals.cpp | 105 +++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 55 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 8fc138007..340a70b3b 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -26,8 +26,8 @@ along with this program. If not, see . */ -#include #include +#include #include #include #include @@ -46,7 +46,7 @@ const string quartet_delim = ":"; const string atom_delim = ","; const string tag_delim = "_"; const string fsuffix = ".out"; - + // clang-format off const string msg = "This tool is designed to allow the tracking of classes of dihedral angles \n" @@ -144,7 +144,6 @@ const string msg = ; // clang-format on - // C++ 11 regex split // https://stackoverflow.com/questions/9435385/split-a-string-using-c11 vector split(const string &input, const string ®ular_expression) { @@ -170,7 +169,7 @@ class ToolOptions : public opts::OptionsPackage { public: ToolOptions() : dihedral_sels{}, dihedral_sel_strings(""), pdb(""), tags(""), - prefix("dihedral"), quotes("p") {}; + prefix("dihedral"), quotes("p"){}; // clang-format off void addGeneric(po::options_description& o) { o.add_options() @@ -191,7 +190,8 @@ class ToolOptions : public opts::OptionsPackage { string print() const { ostringstream oss; - oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s,quotes=%s") % + oss << boost::format( + "dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s,quotes=%s") % dihedral_sel_strings % pdb % tags % prefix % quotes; return (oss.str()); } @@ -226,24 +226,45 @@ vector> sels_to_dihedralAGs(const vector> &dihedral_sels, const AtomicGroup &scope, const int verbosity) { - // pick whether to puke up atomic group when group has wrong num elts. - bool (*chkDihedralSize)(AtomicGroup &, vector); + // Map all the sel strings to kernels in style of utils.cpp: selectAtoms + vector> selectorClasses; + for (auto sel_class : dihedral_sels) { + vector selectorClass; + for (auto sel : sel_class) { + Parser parser; + parser.parse(sel); + try { + parser.parse(sel); + } catch (ParseError e) { + throw(ParseError("Error in parsing '" + sel + "' ... " + e.what())); + } + + KernelSelector selector(parser.kernel()); + selectorClass.push_back(move(selector)); + } + selectorClasses.push_back(selectorClass); + } + // In this if-else, pick whether to puke up atomic group when group has wrong num elts. + // Use function pointer strategy to elide test in loop. + bool (*chkDihedralSize)(AtomicGroup &, vector&, const vector& selstrs); if (verbosity > 0) { - chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + chkDihedralSize = [](AtomicGroup &oo_D, vector &sels, + const vector &selstrs) -> bool { + // Puke if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; - for (auto sel : sels) - cerr << sel << ", "; + for (auto i = 0; i < selstrs.size(); i++) + cerr << selstrs[i] << ", "; cerr << "\b\b\n"; cerr << "Offending group: \n"; cerr << oo_D; cerr << "\nDROPPING THIS GROUP AND PROCEEDING.\n"; return true; - } else { + } else { // append AG, correctly reordered. AtomicGroup reordered; for (auto sel : sels) - reordered += selectAtoms(oo_D, sel); + reordered += oo_D.select(sel); oo_D = move(reordered); cerr << "included group of size: " << to_string(reordered.size()) @@ -252,13 +273,14 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } }; } else { - chkDihedralSize = [](AtomicGroup &oo_D, vector sels) -> bool { + chkDihedralSize = [](AtomicGroup &oo_D, vector &sels, + const vector &selstrs) -> bool { if (oo_D.size() != 4) return true; else { AtomicGroup reordered; for (auto sel : sels) - reordered += selectAtoms(oo_D, sel); + reordered += oo_D.select(sel); oo_D = move(reordered); return false; @@ -268,13 +290,13 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // append to this for return later. vector> dihedralAGs; - for (auto dSels : dihedral_sels) { + for (auto i = 0; i < selectorClasses.size(); i++) { // first get a set of AGs that have all the atoms of the dihedral in them // They are likely to be in the order of the selection matched first // i.e. all the matches for selection 1, then all for 2, and so forth. AtomicGroup outoforder_dihedralType; - for (auto sel : dSels) { - outoforder_dihedralType += selectAtoms(scope, sel); + for (auto sel : selectorClasses[i]) { + outoforder_dihedralType += scope.select(sel); } // This separates all non-connected atoms into separate atomic groups. vector dihedralInstances = @@ -284,41 +306,13 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // but the return order of selectAtoms calls is not specified. // Remove any AGs that didn't manage to contain four atoms after the // split. - dihedralInstances.erase(remove_if(dihedralInstances.begin(), - dihedralInstances.end(), - [&](AtomicGroup &oo_D) -> bool { - return (*chkDihedralSize)(oo_D, dSels); - }), - dihedralInstances.end()); - dihedralAGs.push_back(move(dihedralInstances)); - } - return dihedralAGs; -} - - // append to this for return later. - vector> dihedralAGs; - for (auto dSels : dihedral_sels) { - // first get a set of AGs that have all the atoms of the dihedral in them - // They are likely to be in the order of the selection matched first - // i.e. all the matches for selection 1, then all for 2, and so forth. - AtomicGroup dihedralTypes; - for (auto sel : dSels) { - dihedralTypes += selectAtoms(scope, sel); - } - // This separates all non-connected atoms into separate atomic groups. - vector dihedralInstances = - dihedralTypes.splitByMolecule(); - // reorder them here to match that provided by user - // it may turn out this is unnecessary, - // but the return order of selectAtoms calls is not specified. - // Remove any AGs that didn't manage to contain four atoms after the - // split. - dihedralInstances.erase(remove_if(dihedralInstances.begin(), - dihedralInstances.end(), - [&](AtomicGroup &dihedralAG) -> bool { - return (*chkDihedralSize)(dihedralAG, dSels); - }), - dihedralInstances.end()); + dihedralInstances.erase( + remove_if(dihedralInstances.begin(), dihedralInstances.end(), + [&](AtomicGroup &oo_D) -> bool { + return (*chkDihedralSize)(oo_D, selectorClasses[i], + dihedral_sels[i]); + }), + dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } return dihedralAGs; @@ -350,7 +344,8 @@ int main(int argc, char *argv[]) { // make file names, either from scratch or by adding to user appended tags. vector>> vv_filePtrs; - // if user supplied tags for file names, use those with reduced dihedral name info. + // if user supplied tags for file names, use those with reduced dihedral name + // info. if (topts->tags.empty()) { int resid; for (auto dihedralType : dihedrals) { @@ -382,12 +377,12 @@ int main(int argc, char *argv[]) { for (auto dihedral : dihedrals.at(i)) { string tag = user_tags.at(i); tag += tag_delim + to_string(dihedral[0]->resid()); - for (auto patom : dihedral){ + for (auto patom : dihedral) { string name = patom->name(); boost::replace_all(name, "\'", topts->quotes); tag += tag_delim + name; // append atom names to tag with } - // tag delimiter + // tag delimiter auto p_ofstream = make_shared(topts->prefix + tag_delim + tag + fsuffix); *p_ofstream << "# " << header << "\n"; From 0f4624a733969b0b746c6a0c87f53c4b1ef48053 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Sun, 2 Feb 2020 15:11:34 -0500 Subject: [PATCH 25/41] This was an erroneous attempt to re-use a compiled selector, which I should not have engaged in. Reverting. --- Tools/dihedrals.cpp | 39 +++++++++++++++++++++------------------ Tools/dihedrals.ini | 5 +++++ 2 files changed, 26 insertions(+), 18 deletions(-) create mode 100644 Tools/dihedrals.ini diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 340a70b3b..8ed67788d 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -227,28 +227,29 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, const AtomicGroup &scope, const int verbosity) { // Map all the sel strings to kernels in style of utils.cpp: selectAtoms - vector> selectorClasses; + vector>> selectorClasses; for (auto sel_class : dihedral_sels) { - vector selectorClass; + vector> selectorClass; + for (auto sel : sel_class) { - Parser parser; - parser.parse(sel); + Parser parser(sel); + try { parser.parse(sel); } catch (ParseError e) { throw(ParseError("Error in parsing '" + sel + "' ... " + e.what())); } - KernelSelector selector(parser.kernel()); - selectorClass.push_back(move(selector)); + shared_ptr sk = make_shared(selector); + selectorClass.push_back(move(sk)); } - selectorClasses.push_back(selectorClass); + selectorClasses.push_back(move(selectorClass)); } // In this if-else, pick whether to puke up atomic group when group has wrong num elts. // Use function pointer strategy to elide test in loop. - bool (*chkDihedralSize)(AtomicGroup &, vector&, const vector& selstrs); + bool (*chkDihedralSize)(AtomicGroup &, vector>&, const vector& selstrs); if (verbosity > 0) { - chkDihedralSize = [](AtomicGroup &oo_D, vector &sels, + chkDihedralSize = [](AtomicGroup &oo_D, vector> &sels, const vector &selstrs) -> bool { // Puke if (oo_D.size() != 4) { @@ -264,7 +265,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } else { // append AG, correctly reordered. AtomicGroup reordered; for (auto sel : sels) - reordered += oo_D.select(sel); + reordered += oo_D.select(*sel); oo_D = move(reordered); cerr << "included group of size: " << to_string(reordered.size()) @@ -273,14 +274,15 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } }; } else { - chkDihedralSize = [](AtomicGroup &oo_D, vector &sels, + chkDihedralSize = [](AtomicGroup &oo_D, vector> &sels, const vector &selstrs) -> bool { + if (oo_D.size() != 4) return true; else { AtomicGroup reordered; for (auto sel : sels) - reordered += oo_D.select(sel); + reordered += oo_D.select(*sel); oo_D = move(reordered); return false; @@ -290,13 +292,14 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // append to this for return later. vector> dihedralAGs; - for (auto i = 0; i < selectorClasses.size(); i++) { + for (uint i = 0; i < selectorClasses.size(); i++) { // first get a set of AGs that have all the atoms of the dihedral in them // They are likely to be in the order of the selection matched first // i.e. all the matches for selection 1, then all for 2, and so forth. AtomicGroup outoforder_dihedralType; - for (auto sel : selectorClasses[i]) { - outoforder_dihedralType += scope.select(sel); + for (auto sel : selectorClasses.at(i)) { + cout << "HERE\n"; + outoforder_dihedralType += scope.select(*sel); } // This separates all non-connected atoms into separate atomic groups. vector dihedralInstances = @@ -309,8 +312,8 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, dihedralInstances.erase( remove_if(dihedralInstances.begin(), dihedralInstances.end(), [&](AtomicGroup &oo_D) -> bool { - return (*chkDihedralSize)(oo_D, selectorClasses[i], - dihedral_sels[i]); + return (*chkDihedralSize)(oo_D, selectorClasses.at(i), + dihedral_sels.at(i)); }), dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); @@ -324,7 +327,7 @@ int main(int argc, char *argv[]) { opts::BasicOptions *bopts = new opts::BasicOptions(msg); opts::BasicSelection *sopts = new opts::BasicSelection("backbone && !hydrogen"); - opts::MultiTrajOptions *mtopts = new opts::MultiTrajOptions; + opts::MultiTrajOptions* mtopts = new opts::MultiTrajOptions; ToolOptions *topts = new ToolOptions; opts::AggregateOptions options; diff --git a/Tools/dihedrals.ini b/Tools/dihedrals.ini new file mode 100644 index 000000000..c4d164cc3 --- /dev/null +++ b/Tools/dihedrals.ini @@ -0,0 +1,5 @@ +dihedral-sel-strings = resid == 1 && name == "O4'", resid == 1 && name == "C1'", resid == 1 && name == "C2'", resid == 1 && name == "C3'" +tags = /Users/louissmith/test_nu1 +prefix = /Users/louissmith/test_nu1 +model = /Users/louissmith/test_nu1/testnu1.prmtop +traj = /Users/louissmith/test_nu1/test_nu1.pdb \ No newline at end of file From ed9bb03df587687bcb163fd6ace02ea02953ec2d Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Fri, 17 Apr 2020 14:14:01 -0400 Subject: [PATCH 26/41] Resolved merge conflict in Tools/dihedrals.cpp --- Tools/dihedrals.cpp | 48 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 8ed67788d..940bf2130 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -226,32 +226,10 @@ vector> sels_to_dihedralAGs(const vector> &dihedral_sels, const AtomicGroup &scope, const int verbosity) { - // Map all the sel strings to kernels in style of utils.cpp: selectAtoms - vector>> selectorClasses; - for (auto sel_class : dihedral_sels) { - vector> selectorClass; - - for (auto sel : sel_class) { - Parser parser(sel); - - try { - parser.parse(sel); - } catch (ParseError e) { - throw(ParseError("Error in parsing '" + sel + "' ... " + e.what())); - } - KernelSelector selector(parser.kernel()); - shared_ptr sk = make_shared(selector); - selectorClass.push_back(move(sk)); - } - selectorClasses.push_back(move(selectorClass)); - } - // In this if-else, pick whether to puke up atomic group when group has wrong num elts. - // Use function pointer strategy to elide test in loop. - bool (*chkDihedralSize)(AtomicGroup &, vector>&, const vector& selstrs); + // pick whether to puke up atomic group when group has wrong num elts. + bool (*chkSizeReorder)(AtomicGroup &, vector); if (verbosity > 0) { - chkDihedralSize = [](AtomicGroup &oo_D, vector> &sels, - const vector &selstrs) -> bool { - // Puke + chkSizeReorder = [](AtomicGroup &oo_D, vector sels) -> bool { if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; @@ -274,9 +252,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } }; } else { - chkDihedralSize = [](AtomicGroup &oo_D, vector> &sels, - const vector &selstrs) -> bool { - + chkSizeReorder = [](AtomicGroup &oo_D, vector sels) -> bool { if (oo_D.size() != 4) return true; else { @@ -309,18 +285,18 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // but the return order of selectAtoms calls is not specified. // Remove any AGs that didn't manage to contain four atoms after the // split. - dihedralInstances.erase( - remove_if(dihedralInstances.begin(), dihedralInstances.end(), - [&](AtomicGroup &oo_D) -> bool { - return (*chkDihedralSize)(oo_D, selectorClasses.at(i), - dihedral_sels.at(i)); - }), - dihedralInstances.end()); + dihedralInstances.erase(remove_if(dihedralInstances.begin(), + dihedralInstances.end(), + [&](AtomicGroup &oo_D) -> bool { + return (*chkSizeReorder)(oo_D, dSels); + }), + dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } return dihedralAGs; } + int main(int argc, char *argv[]) { string header = invocationHeader(argc, argv); @@ -462,4 +438,4 @@ int main(int argc, char *argv[]) { for (auto v_fps : vv_filePtrs) for (auto p_ofs : v_fps) p_ofs->close(); -} \ No newline at end of file +} From 38803c9a83713fd21f104924a48c3f4d61ce6bcb Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Fri, 17 Apr 2020 14:16:49 -0400 Subject: [PATCH 27/41] Resolved merge conflict in Tools/dihedrals.ini --- Tools/dihedrals.cpp | 16 +++++++++++----- Tools/dihedrals.ini | 8 ++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 940bf2130..be33fe555 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include using namespace std; @@ -219,6 +220,10 @@ class ToolOptions : public opts::OptionsPackage { string quotes; }; + +template +bool pruner(ChkF&& f){return f;} + // takes an atomic group for scope, and a vector of vectors of sel-strings. // Corrects order of discovery of each dihedral, and returns atomic group of // dihedrals. @@ -227,9 +232,10 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, const AtomicGroup &scope, const int verbosity) { // pick whether to puke up atomic group when group has wrong num elts. - bool (*chkSizeReorder)(AtomicGroup &, vector); + bool (*chkSizeReorder)(AtomicGroup &, vector&); + if (verbosity > 0) { - chkSizeReorder = [](AtomicGroup &oo_D, vector sels) -> bool { + chkSizeReorder = [](AtomicGroup &oo_D, vector& sels) -> bool { if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; @@ -252,7 +258,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } }; } else { - chkSizeReorder = [](AtomicGroup &oo_D, vector sels) -> bool { + chkSizeReorder = [](AtomicGroup &oo_D, vector& sels) -> bool { if (oo_D.size() != 4) return true; else { @@ -287,9 +293,9 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // split. dihedralInstances.erase(remove_if(dihedralInstances.begin(), dihedralInstances.end(), - [&](AtomicGroup &oo_D) -> bool { + pruner([&](AtomicGroup &oo_D) -> bool { return (*chkSizeReorder)(oo_D, dSels); - }), + })), dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } diff --git a/Tools/dihedrals.ini b/Tools/dihedrals.ini index c4d164cc3..51e6a7386 100644 --- a/Tools/dihedrals.ini +++ b/Tools/dihedrals.ini @@ -1,5 +1,5 @@ dihedral-sel-strings = resid == 1 && name == "O4'", resid == 1 && name == "C1'", resid == 1 && name == "C2'", resid == 1 && name == "C3'" -tags = /Users/louissmith/test_nu1 -prefix = /Users/louissmith/test_nu1 -model = /Users/louissmith/test_nu1/testnu1.prmtop -traj = /Users/louissmith/test_nu1/test_nu1.pdb \ No newline at end of file +tags = nu1 +prefix = ../test_nu1/test_nu1 +model = ../test_nu1/testnu1.prmtop +traj = ../test_nu1/test_nu1.pdb From a51ca6c20d94be681c1059b62e9ae4684b42c95b Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Sun, 2 Feb 2020 17:04:56 -0500 Subject: [PATCH 28/41] improved fullhelp further --- Tools/dihedrals.cpp | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index be33fe555..b3bfda4d9 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -50,15 +50,19 @@ const string fsuffix = ".out"; // clang-format off const string msg = +"SYNOPSIS\n" +"\n" "This tool is designed to allow the tracking of classes of dihedral angles \n" -"specified by atom selection. Unlike the torsion tool, also in LOOS, this tool \n" -"is designed to track the dihedral angle between chemically connected groups of \n" -"four atoms. The original intention for the tool was to monitor classes of \n" -"customarily defined dihedrals that might be defined for a large number of \n" -"residues, without having to write a separate command line for each such \n" -"dihedral. For example, you could use this tool to monitor all of the phi \n" +"specified by atom selection. \n" +"\nDESCRIPTION\n" +"\n" +"Unlike the torsion tool, also in LOOS, this tool is designed to track the\n" +"dihedral angle between chemically connected groups of four atoms. The \n" +"original intention for the tool was to monitor classes of customarily\n" +"defined dihedrals that might exist in a large number of residues in one pass.\n" +"For example, one could use this tool to monitor all of the phi and psi\n" "backbone dihedrals in a protein, making only one pass through the trajectory as\n" -" you did so. The tool creates a file name for each dihedral angle chosen for \n" +"one did so. The tool creates a file name for each dihedral angle chosen for \n" "monitoring, and writes the frame number and the angle out in two columns, \n" "separated by white space, for each frame provided to the tool. How these names \n" "are created, how many classes of dihedral to monitor, and what frames to \n" @@ -142,6 +146,26 @@ const string msg = "names (aside from the output caused by --pdbs) This permits exclusive names for\n" " different runs of the program and helps keep things organized. I often use a \n" "system specifying prefix.\n" +"\n" +"EXAMPLE\n" +"\n" +"dihedrals --dihedral-sel-strings $\'name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N9\""+atom_delim+" \\\n" +"name == \"C4\" "+quartet_delim+" name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N1\""+atom_delim+" name == \"C2\"\' \\\n" +"--tags 'chi_Y,chi_R' --selection 'resid < 6' --prefix nucleicX nucleic.pdb nucleic.dcd\n" +"\n" +"This should do the calculation discussed in the description above. In particular\n" +"it will look for dihedrals matching the conventional names for chi from \n" +"purines and pyrimidines, writing each instance of these classes out to different\n" +"output files with names based on --prefix.\n" +"\n" +"POTENTIAL COMPLICATIONS\n" +"\n" +"Using verbosity and the --pdb flag can help diagnose problems with dihedral definitions.\n" +"This is a very good idea to check with all tools, but especially here, where you can get\n" +"results that look right but are not with selection strings that may be subtly off.\n" +"Another thing to bear in mind is that your model needs to have connectivity. You can\n" +"remedy this with the --infer-connectivity flag, but do so with caution. That inference\n" +"can be low quality if you get unlucky with the first structure in your file.\n" ; // clang-format on From b460a66581563e7d7d1d2e69f1acf1ca2d4acf31 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Sun, 2 Feb 2020 20:22:31 -0500 Subject: [PATCH 29/41] eliminated the template idea. I was mistaken about it building. --- Tools/dihedrals.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index b3bfda4d9..6f60c120a 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include using namespace std; @@ -245,8 +244,6 @@ class ToolOptions : public opts::OptionsPackage { }; -template -bool pruner(ChkF&& f){return f;} // takes an atomic group for scope, and a vector of vectors of sel-strings. // Corrects order of discovery of each dihedral, and returns atomic group of @@ -263,8 +260,8 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; - for (auto i = 0; i < selstrs.size(); i++) - cerr << selstrs[i] << ", "; + for (auto sel: sels) + cerr << sel << ", "; cerr << "\b\b\n"; cerr << "Offending group: \n"; cerr << oo_D; @@ -273,7 +270,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } else { // append AG, correctly reordered. AtomicGroup reordered; for (auto sel : sels) - reordered += oo_D.select(*sel); + reordered += selectAtoms(oo_D, sel); oo_D = move(reordered); cerr << "included group of size: " << to_string(reordered.size()) @@ -288,7 +285,7 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, else { AtomicGroup reordered; for (auto sel : sels) - reordered += oo_D.select(*sel); + reordered += selectAtoms(oo_D, sel); oo_D = move(reordered); return false; @@ -298,14 +295,13 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // append to this for return later. vector> dihedralAGs; - for (uint i = 0; i < selectorClasses.size(); i++) { + for (auto dSels : dihedral_sels) { // first get a set of AGs that have all the atoms of the dihedral in them // They are likely to be in the order of the selection matched first // i.e. all the matches for selection 1, then all for 2, and so forth. AtomicGroup outoforder_dihedralType; - for (auto sel : selectorClasses.at(i)) { - cout << "HERE\n"; - outoforder_dihedralType += scope.select(*sel); + for (auto sel : dSels) { + outoforder_dihedralType += selectAtoms(scope, sel); } // This separates all non-connected atoms into separate atomic groups. vector dihedralInstances = @@ -317,9 +313,9 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // split. dihedralInstances.erase(remove_if(dihedralInstances.begin(), dihedralInstances.end(), - pruner([&](AtomicGroup &oo_D) -> bool { + [&dSels, chkSizeReorder](AtomicGroup &oo_D) -> bool { return (*chkSizeReorder)(oo_D, dSels); - })), + }), dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } From 6eab193355d65a2e1332889106ef20fdeded50bd Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Sun, 2 Feb 2020 20:55:38 -0500 Subject: [PATCH 30/41] touched up fullhelp. Added infer-connectivity. --- Tools/dihedrals.cpp | 63 +++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index 6f60c120a..0c21ef1ca 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -148,9 +148,9 @@ const string msg = "\n" "EXAMPLE\n" "\n" -"dihedrals --dihedral-sel-strings $\'name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N9\""+atom_delim+" \\\n" +"dihedrals \\\n--dihedral-sel-strings $\'name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N9\""+atom_delim+" \\\n" "name == \"C4\" "+quartet_delim+" name == \"O4'\""+atom_delim+" name == \"C1'\""+atom_delim+" name == \"N1\""+atom_delim+" name == \"C2\"\' \\\n" -"--tags 'chi_Y,chi_R' --selection 'resid < 6' --prefix nucleicX nucleic.pdb nucleic.dcd\n" +"--tags 'chi_Y,chi_R' --selection 'resid < 6' --prefix nucX nuc.pdb nuc.dcd\n" "\n" "This should do the calculation discussed in the description above. In particular\n" "it will look for dihedrals matching the conventional names for chi from \n" @@ -159,12 +159,16 @@ const string msg = "\n" "POTENTIAL COMPLICATIONS\n" "\n" -"Using verbosity and the --pdb flag can help diagnose problems with dihedral definitions.\n" -"This is a very good idea to check with all tools, but especially here, where you can get\n" -"results that look right but are not with selection strings that may be subtly off.\n" -"Another thing to bear in mind is that your model needs to have connectivity. You can\n" -"remedy this with the --infer-connectivity flag, but do so with caution. That inference\n" -"can be low quality if you get unlucky with the first structure in your file.\n" +"Verbosity and the --pdb flag help diagnose problems with dihedral selections.\n" +"This is a very good thing to check with all tools, but especially here, where \n" +"results could look right but be wrong with selection strings that are subtly off.\n" +"\n" +"Another thing to bear in mind is that the model needs connectivity. One can\n" +"remedy this with the --infer-connectivity flag, but use caution. That inference\n" +"can be low quality if one gets unlucky with the first frame in the file, since\n" +"it is based on how far apart atoms are from one another. Regardless of what\n" +"is provided for this flag, if connectivity information is found then none will\n" +"be inferrd.\n" ; // clang-format on @@ -201,12 +205,15 @@ class ToolOptions : public opts::OptionsPackage { ("Ordered quartets of selection strings; each quartet is delimited by '" + quartet_delim + "', and each string within by '" + atom_delim + "'.").c_str()) + ("infer-connectivity", po::value(&bondlength)->default_value(-1), + "Infer connectivity using provided distance for models lacking this. ALERT: uses hard distance cutoff on first frame of traj to infer connectivity. Only does this for values greater than zero.") ("pdb", po::value(&pdb)->default_value(""), "Prefix to write PDBs for each dihedral selected from frame 1 of provided multi-traj.") ("tags,T", po::value(&tags)->default_value(""), ("String of tags for each class of dihedral, separated by a '" + atom_delim + "'.").c_str()) ("prefix,p", po::value(&prefix)->default_value("dihedral"), "Prefix for file names for each monitored dihedral.") + ("swap-single-quotes,Q", po::value("es)->default_value("p"), "Swap single quote character in outfile names for some alternative. Provide single quote if no change desired."); } @@ -214,9 +221,9 @@ class ToolOptions : public opts::OptionsPackage { string print() const { ostringstream oss; - oss << boost::format( - "dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s,quotes=%s") % - dihedral_sel_strings % pdb % tags % prefix % quotes; + oss << boost::format("dihedral-sel-strings=%s,pdb=%s,tags=%s,prefix=%s," + "quotes=%s,bondlength=%d") % + dihedral_sel_strings % pdb % tags % prefix % quotes % bondlength; return (oss.str()); } @@ -241,10 +248,9 @@ class ToolOptions : public opts::OptionsPackage { string tags; string prefix; string quotes; + float bondlength; }; - - // takes an atomic group for scope, and a vector of vectors of sel-strings. // Corrects order of discovery of each dihedral, and returns atomic group of // dihedrals. @@ -253,14 +259,14 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, const AtomicGroup &scope, const int verbosity) { // pick whether to puke up atomic group when group has wrong num elts. - bool (*chkSizeReorder)(AtomicGroup &, vector&); + bool (*chkSizeReorder)(AtomicGroup &, vector &); if (verbosity > 0) { - chkSizeReorder = [](AtomicGroup &oo_D, vector& sels) -> bool { + chkSizeReorder = [](AtomicGroup &oo_D, vector &sels) -> bool { if (oo_D.size() != 4) { cerr << "WARNING: dihedral specification found " << oo_D.size(); cerr << " atoms, not 4 in selection string set: \n\t"; - for (auto sel: sels) + for (auto sel : sels) cerr << sel << ", "; cerr << "\b\b\n"; cerr << "Offending group: \n"; @@ -279,7 +285,8 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, } }; } else { - chkSizeReorder = [](AtomicGroup &oo_D, vector& sels) -> bool { + + chkSizeReorder = [](AtomicGroup &oo_D, vector &sels) -> bool { if (oo_D.size() != 4) return true; else { @@ -311,25 +318,24 @@ sels_to_dihedralAGs(const vector> &dihedral_sels, // but the return order of selectAtoms calls is not specified. // Remove any AGs that didn't manage to contain four atoms after the // split. - dihedralInstances.erase(remove_if(dihedralInstances.begin(), - dihedralInstances.end(), - [&dSels, chkSizeReorder](AtomicGroup &oo_D) -> bool { - return (*chkSizeReorder)(oo_D, dSels); - }), - dihedralInstances.end()); + dihedralInstances.erase( + remove_if(dihedralInstances.begin(), dihedralInstances.end(), + [&dSels, chkSizeReorder](AtomicGroup &oo_D) -> bool { + return (*chkSizeReorder)(oo_D, dSels); + }), + dihedralInstances.end()); dihedralAGs.push_back(move(dihedralInstances)); } return dihedralAGs; } - int main(int argc, char *argv[]) { string header = invocationHeader(argc, argv); opts::BasicOptions *bopts = new opts::BasicOptions(msg); opts::BasicSelection *sopts = new opts::BasicSelection("backbone && !hydrogen"); - opts::MultiTrajOptions* mtopts = new opts::MultiTrajOptions; + opts::MultiTrajOptions *mtopts = new opts::MultiTrajOptions; ToolOptions *topts = new ToolOptions; opts::AggregateOptions options; @@ -339,6 +345,13 @@ int main(int argc, char *argv[]) { // set up system for looping. Load coords from frame 0 into scope. AtomicGroup model = mtopts->model; + if (model.hasBonds()) { + } else if (topts->bondlength > 0) + model.findBonds(topts->bondlength); + else + throw(LOOSError( + "Model does not appear to have chemical connectivity, and " + "infer-connectivity has not been set to a positive value.\n")); AtomicGroup scope = selectAtoms(model, sopts->selection); pTraj traj = mtopts->trajectory; traj->updateGroupCoords(model); From 2e555d7a78c11c37415e3a6df476dd9e61131e27 Mon Sep 17 00:00:00 2001 From: Louis Smith Date: Thu, 13 Feb 2020 09:52:12 -0500 Subject: [PATCH 31/41] Getting rid of travis.yml in compliance with PR comment. --- .travis.yml | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ac148f0b6..000000000 --- a/.travis.yml +++ /dev/null @@ -1,50 +0,0 @@ -language: cpp - -branches: - only: - - master - -matrix: - include: - - os: linux - - os: osx - osx_image: xcode10.1 - - -#addons: -# apt: -# packages: -# - scons -# - libboost-all-dev -# - libboost-regex-dev -# - libatlas-base-dev -# - libnetcdf-dev -# - swig -# - python3-dev -# - python3-numpy -# - python3-scipy -# - libeigen3-dev -# -#before_install: -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi -## - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install boost; fi -## - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install netcdf; fi -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install swig; fi -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install doxygen; fi -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install graphviz; fi -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install scons; fi -# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install eigen; fi - -install: - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; - fi - - bash miniconda.sh -b -p $HOME/miniconda - - source "$HOME/miniconda/etc/profile.d/conda.sh" - - hash -r - - conda config --set always_yes yes --set changeps1 no - -script: - - ./conda_build.sh loos 1 From 1fbb3cb70e44b8b034a417beee535071e9f18691 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Sat, 21 Mar 2020 00:19:18 -0400 Subject: [PATCH 32/41] Header for RnaSuite class --- src/RnaSuite.hpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 src/RnaSuite.hpp diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp new file mode 100644 index 000000000..dcb4d3178 --- /dev/null +++ b/src/RnaSuite.hpp @@ -0,0 +1,75 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#if !defined(LOOS_RNASUITE_HPP) +#define LOOS_RNASUITE_HPP + +#include +#include +#include +#include + +namespace loos { + + //! Class for assigning backbone suites to an RNA + /** + * This class acts on an AtomicGroup and assigns backbone suites (as + * defined in Richardson et al. (2008) RNA 14, 465-481) to any RNA residues + * present. It also calculates the "suiteness" score that describes how + * well the residue fits into its assigned suite. + */ + class RnaSuite { + public: + + RnaSuite(const AtomicGroup &group, const double &suiteness_tolerance); + + RnaSuite(const AtomicGroup &group); + + RnaSuite(); + + //! Method to extract RNA backbone atoms from an AtomicGroup + /** + * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', + * and O3') and splits them into AtomicGroups by residue id. + */ + void extractRnaBackboneAtoms(); + + //! Method to calculate backbone dihedrals for each RNA residue + /** + * This methods calculates the six RNA backbone dihedrals (i.e. alpha, + * beta, gamma, delta, epsilon, and zeta) for each residue. + */ + void calculateBackboneDihedrals(); + + private: + vector> alpha_atoms; + vector> beta_atoms; + vector> gamma_atoms; + vector> delta_atoms; + vector> epsilon_atoms; + vector> zeta_atoms; + double suiteness_tolerance; + + }; +} + +#endif + From 11f01c871987c67fa1ac69dc6d3a7b6df6aea8d5 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Tue, 24 Mar 2020 19:02:53 -0400 Subject: [PATCH 33/41] Method to extract RNA backbone atoms; skeleton of tool --- .gitignore | 2 + Tools/SConscript | 2 +- Tools/rna_suites.cpp | 132 ++++++++++++++++++ src/RnaSuite.cpp | 309 +++++++++++++++++++++++++++++++++++++++++++ src/RnaSuite.hpp | 59 ++++++--- src/SConscript | 2 + src/loos.hpp | 1 + 7 files changed, 491 insertions(+), 16 deletions(-) create mode 100644 Tools/rna_suites.cpp create mode 100644 src/RnaSuite.cpp diff --git a/.gitignore b/.gitignore index d09cf2c3c..666a37b11 100644 --- a/.gitignore +++ b/.gitignore @@ -155,6 +155,8 @@ Tools/coverlap Tools/subsetter Tools/xy_rdf Tools/model-select +Tools/dihedrals +Tools/rna_suites Packages/Clustering/** !Packages/Clustering/*.cpp !Packages/Clustering/*.hpp diff --git a/Tools/SConscript b/Tools/SConscript index a63050d1f..8afe06e72 100644 --- a/Tools/SConscript +++ b/Tools/SConscript @@ -36,7 +36,7 @@ apps = apps + ' traj2pdb merge-traj center-molecule contact-time perturb-structu apps = apps + ' big-svd kurskew periodic_box area_per_lipid residue-contact-map' apps = apps + ' cross-dist fcontacts serialize-selection transition_contacts fixdcd smooth-traj membrane_map packing_score' apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap' -apps = apps + ' dihedrals' +apps = apps + ' dihedrals rna_suites' list = [] diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp new file mode 100644 index 000000000..0053e44bb --- /dev/null +++ b/Tools/rna_suites.cpp @@ -0,0 +1,132 @@ +/* + rna_suites.cpp + + Assigns backbone suites to RNAs based on backbone dihedrals + + Chapin E. Cavender 2020-03 +*/ + +/* + + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008-2020 Tod D. Romo & Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; +using namespace loos; + +namespace opts = loos::OptionsFramework; +namespace po = loos::OptionsFramework::po; + +string fullHelpMessage(void) { + + string full_help_message = +"\n" +" SYNOPSIS\n" +"\n" +" Assigns backbone suites to RNAs based on backbone dihedrals\n" +"\n" +" DESCRIPTION\n" +"\n" +" This tool\n" +"\n" +" EXAMPLES\n" +"\n" +" rna_suites\n" + ; + + return full_help_message; + +} + +class ToolOptions : public opts::OptionsPackage { + +public: + + ToolOptions() {} + + void addGeneric(po::options_description& o) { + + o.add_options() + ("suiteness_cutoff,c", + po::value(&suiteness_cutoff)->default_value(0.01), + "Cutoff for the suiteness score of non-outliers") + ; + + } + + string print() const { + + ostringstream oss; + oss << boost::format( + "suiteness_cutoff=%f" + ) % suiteness_cutoff; + return (oss.str()); + + } + + double suiteness_cutoff; + +}; // ToolOptions + +// Tool functions + +int main(int argc, char *argv[]) { + + // Get command-line input + string header = invocationHeader(argc, argv); + + // Set up tool options + opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); + opts::BasicSelection *sopts = new opts::BasicSelection("!hydrogen"); + opts::TrajectoryWithFrameIndices *tropts = + new opts::TrajectoryWithFrameIndices; + ToolOptions *topts = new ToolOptions; + + opts::AggregateOptions options; + options.add(bopts).add(sopts).add(tropts).add(topts); + if (!options.parse(argc, argv)) + exit(-1); + + // Assign tool options to variables + const double suiteness_cutoff = topts->suiteness_cutoff; + + // Print command-line input + cout << "# " << header << "\n"; + + // Do some error-checking on tool options + + // Build LOOS system and generate atom selection + AtomicGroup model = tropts->model; + pTraj traj = tropts->trajectory; + vector indices = tropts->frameList(); +// AtomicGroup rna_atoms = selectAtoms(model, topts->selection); + + // Number of frames in trajectory + const uint N_frame = indices.size(); + + // Create RNASuite object from RNA atoms + RnaSuite rna_suite = RnaSuite(model, suiteness_cutoff); + + // Print dihedrals + rna_suite.printBackboneAtoms(); + +} diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp new file mode 100644 index 000000000..1906f6c2b --- /dev/null +++ b/src/RnaSuite.cpp @@ -0,0 +1,309 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; + +namespace loos { + + // Constructors + + RnaSuite::RnaSuite(const AtomicGroup &group, + const double suiteness_cutoff_) { + + extractRnaBackboneAtoms(group); + suiteness_cutoff = suiteness_cutoff_; + + } + + RnaSuite::RnaSuite(const AtomicGroup &group) { + + extractRnaBackboneAtoms(group); + suiteness_cutoff = 0.01; + + } + + RnaSuite::RnaSuite() { + suiteness_cutoff = 0.01; + } + + // Methods + + void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { + + std::vector continuous_alpha_atoms; + std::vector continuous_beta_atoms; + std::vector continuous_gamma_atoms; + std::vector continuous_delta_atoms; + std::vector continuous_epsilon_atoms; + std::vector continuous_zeta_atoms; + AtomicGroup dihedral_atoms; + AtomicGroup residue_p; + AtomicGroup residue_o5p; + AtomicGroup residue_c5p; + AtomicGroup residue_c4p; + AtomicGroup residue_c3p; + AtomicGroup residue_o3p; + AtomicGroup prev_residue_c4p; + AtomicGroup prev_residue_c3p; + AtomicGroup prev_residue_o3p; + int current_resid = -2; + + // True if this is the initial residue in a continuous group + bool first_res = true; + + // Clear vector of vectors of AtomicGroups for each backbone dihedral + alpha_atoms.clear(); + beta_atoms.clear(); + gamma_atoms.clear(); + delta_atoms.clear(); + epsilon_atoms.clear(); + zeta_atoms.clear(); + + // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into + // one AtomicGroup. Use raw string literal R"()" to avoid escaping " + AtomicGroup backbone = selectAtoms(group, + R"(name =~ "^(P|C[345]'|O[35]')$")"); + + // Split by resid and loop over residues + for (AtomicGroup residue : backbone.splitByResidue()) { + + // Select RNA backbone atoms from residue + residue_p = selectAtoms(residue, R"(name == "P")"); + residue_o5p = selectAtoms(residue, R"(name == "O5'")"); + residue_c5p = selectAtoms(residue, R"(name == "C5'")"); + residue_c4p = selectAtoms(residue, R"(name == "C4'")"); + residue_c3p = selectAtoms(residue, R"(name == "C3'")"); + residue_o3p = selectAtoms(residue, R"(name == "O3'")"); + + // If any atom besides P is missing, skip this residue and start a + // new continuous group + if (residue_o5p.size() != 1 || residue_c5p.size() != 1 || + residue_c4p.size() != 1 || residue_c3p.size() != 1 || + residue_o3p.size() != 1) { + + first_res = true; + continue; + + } + + // If the resid is not sequential, this is not a continuous group + if (residue_p.size() != 1 + || residue_p[0]->resid() != current_resid + 1) first_res = true; + + if (first_res) { + + first_res = false; + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + // Clear vectors of AtomicGroups for this continuous groups + continuous_alpha_atoms.clear(); + continuous_beta_atoms.clear(); + continuous_gamma_atoms.clear(); + continuous_delta_atoms.clear(); + continuous_epsilon_atoms.clear(); + continuous_zeta_atoms.clear(); + + // Record delta for this initial residue + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } else { + + // Record backbone dihedrals for the remainder of the suite, + // i.e. epsilon and zeta of the previous residue and alpha, + // beta, gamma, and delta of the current residue + dihedral_atoms = prev_residue_c4p; + dihedral_atoms.append(prev_residue_c3p); + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + continuous_epsilon_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_c3p; + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + continuous_zeta_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_o3p; + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + continuous_alpha_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_p; + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + continuous_beta_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_o5p; + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + continuous_gamma_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } + + // Save C4', C3', and O3' for dihedrals in the next residue + prev_residue_c4p = residue_c4p; + prev_residue_c3p = residue_c3p; + prev_residue_o3p = residue_o3p; + + // Update resid + current_resid = residue_o5p[0]->resid(); + + } // loop over residues + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + } // extractRnaBackboneAtoms() + + double RnaSuite::getSuitenessCutoff() const { + return suiteness_cutoff; + } // getSuitenessCutoff() + + void RnaSuite::printBackboneAtoms() const { + + uint continuous_counter; + uint residue_counter; + + cout << boost::format("Sizes %d %d %d %d %d %d\n") % alpha_atoms.size() + % beta_atoms.size() % gamma_atoms.size() % delta_atoms.size() + % epsilon_atoms.size() % zeta_atoms.size(); + + continuous_counter = 0; + for (std::vector continuous_atoms : alpha_atoms) { + continuous_counter++; + cout << boost::format("Alpha %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Alpha %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : beta_atoms) { + continuous_counter++; + cout << boost::format("Beta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Beta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : gamma_atoms) { + continuous_counter++; + cout << boost::format("Gamma %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Gamma %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : delta_atoms) { + continuous_counter++; + cout << boost::format("Delta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Delta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : epsilon_atoms) { + continuous_counter++; + cout << boost::format("Epsilon %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Epsilon %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + continuous_counter = 0; + for (std::vector continuous_atoms : zeta_atoms) { + continuous_counter++; + cout << boost::format("Zeta %d Size %d\n") % continuous_counter + % continuous_atoms.size(); + residue_counter = 0; + for (AtomicGroup residue_atoms : continuous_atoms) { + residue_counter++; + cout << boost::format("Zeta %d %d\n") % continuous_counter + % residue_counter; + cout << residue_atoms << endl; + } + } + + } // printBackboneAtoms() + + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + } // setSuitenessCutoff() + +} diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index dcb4d3178..f6cb50bf8 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -23,9 +23,9 @@ #define LOOS_RNASUITE_HPP #include -#include #include -#include + +using namespace std; namespace loos { @@ -37,36 +37,65 @@ namespace loos { * well the residue fits into its assigned suite. */ class RnaSuite { + public: - RnaSuite(const AtomicGroup &group, const double &suiteness_tolerance); + RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_); RnaSuite(const AtomicGroup &group); RnaSuite(); - //! Method to extract RNA backbone atoms from an AtomicGroup + //! Method to assign residues to backbone suites from Richardson et al. /** - * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', - * and O3') and splits them into AtomicGroups by residue id. + * This method assigns residues to one of the 46 backbone suites + * defined in Richardson et al. (2008) RNA 14, 465-481. The suite of a + * residue is defined from delta of the previous residue to delta of + * the current residue. */ - void extractRnaBackboneAtoms(); + void assignRichardsonSuites(); //! Method to calculate backbone dihedrals for each RNA residue /** - * This methods calculates the six RNA backbone dihedrals (i.e. alpha, + * This method calculates the six RNA backbone dihedrals (i.e. alpha, * beta, gamma, delta, epsilon, and zeta) for each residue. */ void calculateBackboneDihedrals(); + //! Method to extract RNA backbone atoms from an AtomicGroup + /** + * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', + * and O3') and splits them into AtomicGroups by residue id. + */ + void extractRnaBackboneAtoms(const AtomicGroup &group); + + //! Method to return the cutoff for the suiteness score of non-outliers + double getSuitenessCutoff() const; + + //! Method to print groups of backbone atoms for each dihedral + void printBackboneAtoms() const; + + //! Method to set the cutoff for the suiteness score of non-outliers + void setSuitenessCutoff(const double suiteness_cutoff_); + private: - vector> alpha_atoms; - vector> beta_atoms; - vector> gamma_atoms; - vector> delta_atoms; - vector> epsilon_atoms; - vector> zeta_atoms; - double suiteness_tolerance; + + std::vector> alpha_atoms; + std::vector> beta_atoms; + std::vector> gamma_atoms; + std::vector> delta_atoms; + std::vector> epsilon_atoms; + std::vector> zeta_atoms; + std::vector> alpha; + std::vector> beta; + std::vector> gamma; + std::vector> delta; + std::vector> epsilon; + std::vector> zeta; + std::vector suite_name_hemi5; + std::vector suite_name_hemi3; + std::vector suiteness; + double suiteness_cutoff; }; } diff --git a/src/SConscript b/src/SConscript index f3274eeab..5d1ae37cf 100644 --- a/src/SConscript +++ b/src/SConscript @@ -41,6 +41,7 @@ apps = apps + ' charmm.cpp AtomicNumberDeducer.cpp OptionsFramework.cpp revision apps = apps + ' utils_random.cpp utils_structural.cpp LineReader.cpp xtcwriter.cpp alignment.cpp MultiTraj.cpp' apps = apps + ' index_range_parser.cpp' apps = apps + ' Weights.cpp' +apps = apps + ' RnaSuite.cpp' if (env['HAS_NETCDF']): apps = apps + ' amber_netcdf.cpp' @@ -73,6 +74,7 @@ hdr = hdr + ' xdr.hpp xtc.hpp gro.hpp trr.hpp exceptions.hpp MatrixOps.hpp sorti hdr = hdr + ' Simplex.hpp charmm.hpp AtomicNumberDeducer.hpp OptionsFramework.hpp' hdr = hdr + ' utils_random.hpp utils_structural.hpp LineReader.hpp xtcwriter.hpp' hdr = hdr + ' trajwriter.hpp MultiTraj.hpp index_range_parser.hpp' +hdr = hdr + ' RnaSuite.hpp' if (env['HAS_NETCDF']): hdr = hdr + ' amber_netcdf.hpp' diff --git a/src/loos.hpp b/src/loos.hpp index df91f206e..8e00197a3 100644 --- a/src/loos.hpp +++ b/src/loos.hpp @@ -114,6 +114,7 @@ #include #include +#include #endif From 526aad48b3da42971076afe2b018b19b83c71471 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Thu, 26 Mar 2020 00:36:25 -0400 Subject: [PATCH 34/41] Method to calculate backbone dihedrals --- Tools/rna_suites.cpp | 15 ++- src/RnaSuite.cpp | 258 +++++++++++++++++++++++++++++-------------- src/RnaSuite.hpp | 26 +++++ 3 files changed, 213 insertions(+), 86 deletions(-) diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index 0053e44bb..5c470c3e3 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -118,15 +118,26 @@ int main(int argc, char *argv[]) { AtomicGroup model = tropts->model; pTraj traj = tropts->trajectory; vector indices = tropts->frameList(); -// AtomicGroup rna_atoms = selectAtoms(model, topts->selection); + AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); // Number of frames in trajectory const uint N_frame = indices.size(); // Create RNASuite object from RNA atoms - RnaSuite rna_suite = RnaSuite(model, suiteness_cutoff); + RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); // Print dihedrals rna_suite.printBackboneAtoms(); + // Loop over trajectory + for (vector::iterator i = indices.begin(); i != indices.end(); i++) { + + traj->readFrame(*i); + traj->updateGroupCoords(model); + + rna_suite.calculateBackboneDihedrals(); + rna_suite.printBackboneDihedrals(); + + } + } diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 1906f6c2b..77591e2f9 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -25,7 +25,9 @@ using namespace std; namespace loos { - // Constructors + // |------------------------------------------------------------------------ + // | Constructors + // |------------------------------------------------------------------------ RnaSuite::RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_) { @@ -46,7 +48,99 @@ namespace loos { suiteness_cutoff = 0.01; } - // Methods + // |------------------------------------------------------------------------ + // | Methods + // |------------------------------------------------------------------------ + + void RnaSuite::calculateBackboneDihedrals() { + + // Clear vector of vectors of doubles for each backbone dihedral + alpha.clear(); + beta.clear(); + gamma.clear(); + delta.clear(); + epsilon.clear(); + zeta.clear(); + + for (size_t i = 0; i < N_continuous_group; i++) { + + std::vector continuous_alpha(N_residue[i]); + std::vector continuous_beta(N_residue[i]); + std::vector continuous_gamma(N_residue[i]); + std::vector continuous_delta(N_residue[i] + 1); + std::vector continuous_epsilon(N_residue[i]); + std::vector continuous_zeta(N_residue[i]); + + for (size_t j = 0; j < N_residue[i]; j++) { + + continuous_alpha[j] = Math::torsion( + alpha_atoms[i][j][0], alpha_atoms[i][j][1], + alpha_atoms[i][j][2], alpha_atoms[i][j][3]); + continuous_beta[j] = Math::torsion( + beta_atoms[i][j][0], beta_atoms[i][j][1], + beta_atoms[i][j][2], beta_atoms[i][j][3]); + continuous_gamma[j] = Math::torsion( + gamma_atoms[i][j][0], gamma_atoms[i][j][1], + gamma_atoms[i][j][2], gamma_atoms[i][j][3]); + continuous_delta[j] = Math::torsion( + delta_atoms[i][j][0], delta_atoms[i][j][1], + delta_atoms[i][j][2], delta_atoms[i][j][3]); + continuous_epsilon[j] = Math::torsion( + epsilon_atoms[i][j][0], epsilon_atoms[i][j][1], + epsilon_atoms[i][j][2], epsilon_atoms[i][j][3]); + continuous_zeta[j] = Math::torsion( + zeta_atoms[i][j][0], zeta_atoms[i][j][1], + zeta_atoms[i][j][2], zeta_atoms[i][j][3]); + + } + + continuous_delta[N_residue[i]] = Math::torsion( + delta_atoms[i][N_residue[i]][0], + delta_atoms[i][N_residue[i]][1], + delta_atoms[i][N_residue[i]][2], + delta_atoms[i][N_residue[i]][3]); + + alpha.push_back(continuous_alpha); + beta.push_back(continuous_beta); + gamma.push_back(continuous_gamma); + delta.push_back(continuous_delta); + epsilon.push_back(continuous_epsilon); + zeta.push_back(continuous_zeta); + + } + + } // calculateBackboneDihedrals() + + void RnaSuite::checkContinuousGroupSize( + const std::vector> &group_vector, + const size_t target_size, const string dihedral_name) const { + + if (group_vector.size() != target_size) { + + cout << boost::format("Error: different number of continuous " + "groups for alpha (%d) and %s (%d)\n") % target_size + % dihedral_name % group_vector.size(); + throw(LOOSError()); + + } + + } // checkContinuousGroupSize() + + void RnaSuite::checkResidueSize( + const std::vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const { + + if (residue_vector.size() != target_size) { + + cout << boost::format("Error: different number of residues in " + "continuous group %d for alpha (%d) and %s (%d)\n") + % group_index % target_size % dihedral_name + % residue_vector.size(); + + } + + } // checkResidueSize() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -201,6 +295,32 @@ namespace loos { } + // Get number of continuous groups and check that all dihedral groups + // have same size + N_continuous_group = alpha_atoms.size(); + checkContinuousGroupSize(beta_atoms, N_continuous_group, "beta"); + checkContinuousGroupSize(gamma_atoms, N_continuous_group, "gamma"); + checkContinuousGroupSize(delta_atoms, N_continuous_group, "delta"); + checkContinuousGroupSize(epsilon_atoms, N_continuous_group, "epsilon"); + checkContinuousGroupSize(zeta_atoms, N_continuous_group, "zeta"); + + // Get number of residues in each continuous group and check that these + // are consistent across backbone dihedrals. Delta should have one + // additional residue per continuous group. + size_t residue_size; + + for (size_t i = 0; i < N_continuous_group; i++) { + + residue_size = alpha_atoms[i].size(); + checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1); + checkResidueSize(gamma_atoms[i], residue_size, "gamma", i + 1); + checkResidueSize(delta_atoms[i], residue_size + 1, "delta", i + 1); + checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); + checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); + N_residue.push_back(residue_size); + + } + } // extractRnaBackboneAtoms() double RnaSuite::getSuitenessCutoff() const { @@ -209,98 +329,68 @@ namespace loos { void RnaSuite::printBackboneAtoms() const { - uint continuous_counter; - uint residue_counter; - - cout << boost::format("Sizes %d %d %d %d %d %d\n") % alpha_atoms.size() - % beta_atoms.size() % gamma_atoms.size() % delta_atoms.size() - % epsilon_atoms.size() % zeta_atoms.size(); - - continuous_counter = 0; - for (std::vector continuous_atoms : alpha_atoms) { - continuous_counter++; - cout << boost::format("Alpha %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Alpha %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + size_t i_plus; + size_t j_plus; - continuous_counter = 0; - for (std::vector continuous_atoms : beta_atoms) { - continuous_counter++; - cout << boost::format("Beta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Beta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + cout << boost::format("Number of continuous groups: %d\n") + % N_continuous_group; - continuous_counter = 0; - for (std::vector continuous_atoms : gamma_atoms) { - continuous_counter++; - cout << boost::format("Gamma %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Gamma %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + if (N_continuous_group == 0) return; - continuous_counter = 0; - for (std::vector continuous_atoms : delta_atoms) { - continuous_counter++; - cout << boost::format("Delta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Delta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; - } - } + for (size_t i = 0; i < N_continuous_group; i++) { + + i_plus = i + 1; + cout << boost::format("Continuous group %d has %d residues\n") + % i_plus % N_residue[i]; + + for (size_t j = 0; j < N_residue[i]; j++) { + + j_plus = j + 1; + cout << boost::format("Delta %d %d\n") % i_plus % j_plus; + cout << delta_atoms[i][j] << endl; + cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus; + cout << epsilon_atoms[i][j] << endl; + cout << boost::format("Zeta %d %d\n") % i_plus % j_plus; + cout << zeta_atoms[i][j] << endl; + cout << boost::format("Alpha %d %d\n") % i_plus % j_plus; + cout << alpha_atoms[i][j] << endl; + cout << boost::format("Beta %d %d\n") % i_plus % j_plus; + cout << beta_atoms[i][j] << endl; + cout << boost::format("Gamma %d %d\n") % i_plus % j_plus; + cout << gamma_atoms[i][j] << endl; - continuous_counter = 0; - for (std::vector continuous_atoms : epsilon_atoms) { - continuous_counter++; - cout << boost::format("Epsilon %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Epsilon %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; } + + cout << boost::format("Delta %d %d\n") % i_plus + % (N_residue[i] + 1); + cout << delta_atoms[i][N_residue[i]] << endl; + } - continuous_counter = 0; - for (std::vector continuous_atoms : zeta_atoms) { - continuous_counter++; - cout << boost::format("Zeta %d Size %d\n") % continuous_counter - % continuous_atoms.size(); - residue_counter = 0; - for (AtomicGroup residue_atoms : continuous_atoms) { - residue_counter++; - cout << boost::format("Zeta %d %d\n") % continuous_counter - % residue_counter; - cout << residue_atoms << endl; + } // printBackboneAtoms() + + void RnaSuite::printBackboneDihedrals() const { + + if (alpha.empty()) return; + + for (size_t i = 0; i < N_continuous_group; i++) { + + for (size_t j = 0; j < N_residue[i]; j++) { + + cout << boost::format("%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % gamma_atoms[i][j][0]->resid() % delta[i][j] + % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] + % gamma[i][j]; + } + + cout << boost::format("%4d %8.3f\n") + % delta_atoms[i][N_residue[i]][0]->resid() + % delta[i][N_residue[i]]; + } - } // printBackboneAtoms() + } // printBackboneDihedrals() void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index f6cb50bf8..29d0d829e 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -24,6 +24,7 @@ #include #include +#include using namespace std; @@ -62,6 +63,16 @@ namespace loos { */ void calculateBackboneDihedrals(); + //! Method to check the size of a vector of continuous groups + void checkContinuousGroupSize( + const std::vector> &group_vector, + const size_t target_size, const string dihedral_name) const; + + //! Method to check the size of a vector of residues + void checkResidueSize(const std::vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const; + //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -75,26 +86,41 @@ namespace loos { //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; + //! Method to print backbone dihedrals for each residue + void printBackboneDihedrals() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); private: + // Vector of continuous groups, composed of vectors of AtomicGroups + // for each residue within a continuous group std::vector> alpha_atoms; std::vector> beta_atoms; std::vector> gamma_atoms; std::vector> delta_atoms; std::vector> epsilon_atoms; std::vector> zeta_atoms; + + // Vector of vectors of backbone dihedrals std::vector> alpha; std::vector> beta; std::vector> gamma; std::vector> delta; std::vector> epsilon; std::vector> zeta; + + // Output: suite name (composed of a number-like character for the + // 5' hemi-nucleotide and a letter-like character for the + // 3' hemi-nucleotide) and suiteness score std::vector suite_name_hemi5; std::vector suite_name_hemi3; std::vector suiteness; + + // Other internal variables + size_t N_continuous_group = 0; + vector N_residue; double suiteness_cutoff; }; From 8ad12490ce163217cb19e998469b91cebb3f33b3 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Fri, 27 Mar 2020 19:06:50 -0400 Subject: [PATCH 35/41] (Untested) Assign ddg_index, reference suites from suitename --- src/RnaSuite.cpp | 397 ++++++++++++++++++++++++++++++++++++++++------- src/RnaSuite.hpp | 77 ++++++--- 2 files changed, 397 insertions(+), 77 deletions(-) diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 77591e2f9..2bf64c694 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -30,7 +30,7 @@ namespace loos { // |------------------------------------------------------------------------ RnaSuite::RnaSuite(const AtomicGroup &group, - const double suiteness_cutoff_) { + const double suiteness_cutoff_) { extractRnaBackboneAtoms(group); suiteness_cutoff = suiteness_cutoff_; @@ -52,6 +52,86 @@ namespace loos { // | Methods // |------------------------------------------------------------------------ + void RnaSuite::assignRichardsonSuites() { + + bool outlier; + size_t N_delta = delta_min.size(); + size_t N_gamma = gamma_min.size(); + size_t N_dg = N_delta * N_gamma; + uint suite_counter = 0; + vector suite(7); + + // Index into delta, delta(j+1), gamma clusters + uint ddg_index; + + if (alpha.empty()) { + + cout << "Warning: backbone dihedrals are empty" << endl; + return; + + } + + // Initialize vectors of suite names and suiteness scores + suite_names.clear() + suiteness.clear() + suite_names.reserve(N_suite); + suiteness.reserve(N_suite); + + for (size_t i = 0; i < N_continuous_group; ++i) { + + for (size_t j = 0; j < N_residue[i]; ++j) { + + suite = {delta[i][j], epsilon[i][j], zeta[i][j], alpha[i][j], + beta[i][j], gamma[i][j], delta[i][j + 1]}; + + // Assign delta(j-1), delta, gamma index. These 3 dihedrals have + // 12 clusters that are independent of the other 4 dihedrals. + ddg_index = 0; + + // Filter on 5' delta. Values outside of this range are + // indicative of incorrect stereochemistry in the ribose. + if (filterDDG(suite[0], delta_min, delta_max, N_dg, ddg_index) + == N_delta) { + + suite_names[suite_counter] = "!d"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on 3' delta + if (filterDDG(suite[6], delta_min, delta_max, N_gamma, ddg_index) + == N_delta) { + + suite_names[suite_counter] = "!d"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on gamma + if (filterDDG(suite[5], gamma_min, gamma_max, 1, ddg_index) + == N_gamma) { + + suite_names[suite_counter] = "!g"; + suiteness[suite_counter] = 0.0; + continue; + + } + + // Filter on epsilon. Values outside of this range are + // indicative of a misfit sugar pucker. + + // Get 4D scaled hyperellipsoid distance + + suite_counter++; + + } // loop over residues + + } // loop over continuous groups + + } // assignRichardsonSuites() + void RnaSuite::calculateBackboneDihedrals() { // Clear vector of vectors of doubles for each backbone dihedral @@ -62,43 +142,28 @@ namespace loos { epsilon.clear(); zeta.clear(); - for (size_t i = 0; i < N_continuous_group; i++) { - - std::vector continuous_alpha(N_residue[i]); - std::vector continuous_beta(N_residue[i]); - std::vector continuous_gamma(N_residue[i]); - std::vector continuous_delta(N_residue[i] + 1); - std::vector continuous_epsilon(N_residue[i]); - std::vector continuous_zeta(N_residue[i]); - - for (size_t j = 0; j < N_residue[i]; j++) { - - continuous_alpha[j] = Math::torsion( - alpha_atoms[i][j][0], alpha_atoms[i][j][1], - alpha_atoms[i][j][2], alpha_atoms[i][j][3]); - continuous_beta[j] = Math::torsion( - beta_atoms[i][j][0], beta_atoms[i][j][1], - beta_atoms[i][j][2], beta_atoms[i][j][3]); - continuous_gamma[j] = Math::torsion( - gamma_atoms[i][j][0], gamma_atoms[i][j][1], - gamma_atoms[i][j][2], gamma_atoms[i][j][3]); - continuous_delta[j] = Math::torsion( - delta_atoms[i][j][0], delta_atoms[i][j][1], - delta_atoms[i][j][2], delta_atoms[i][j][3]); - continuous_epsilon[j] = Math::torsion( - epsilon_atoms[i][j][0], epsilon_atoms[i][j][1], - epsilon_atoms[i][j][2], epsilon_atoms[i][j][3]); - continuous_zeta[j] = Math::torsion( - zeta_atoms[i][j][0], zeta_atoms[i][j][1], - zeta_atoms[i][j][2], zeta_atoms[i][j][3]); + for (size_t i = 0; i < N_continuous_group; ++i) { + + vector continuous_alpha(N_residue[i]); + vector continuous_beta(N_residue[i]); + vector continuous_gamma(N_residue[i]); + vector continuous_delta(N_residue[i] + 1); + vector continuous_epsilon(N_residue[i]); + vector continuous_zeta(N_residue[i]); + + for (size_t j = 0; j < N_residue[i]; ++j) { + + continuous_alpha[j] = calculateDihedral(alpha_atoms[i][j]); + continuous_beta[j] = calculateDihedral(beta_atoms[i][j]); + continuous_gamma[j] = calculateDihedral(gamma_atoms[i][j]); + continuous_delta[j] = calculateDihedral(delta_atoms[i][j]); + continuous_epsilon[j] = calculateDihedral(epsilon_atoms[i][j]); + continuous_zeta[j] = calculateDihedral(zeta_atoms[i][j]); } - continuous_delta[N_residue[i]] = Math::torsion( - delta_atoms[i][N_residue[i]][0], - delta_atoms[i][N_residue[i]][1], - delta_atoms[i][N_residue[i]][2], - delta_atoms[i][N_residue[i]][3]); + continuous_delta[N_residue[i]] = + calculateDihedral(delta_atoms[i][N_residue[i]]); alpha.push_back(continuous_alpha); beta.push_back(continuous_beta); @@ -111,8 +176,16 @@ namespace loos { } // calculateBackboneDihedrals() + double RnaSuite::calculateDihedral(const AtomicGroup &group) { + + double dihedral = Math::torsion(group[0], group[1], group[2], group[3]) + if (dihedral < 0.0) dihedral += 360.0; + return dihedral; + + } // calculateDihedral() + void RnaSuite::checkContinuousGroupSize( - const std::vector> &group_vector, + const vector> &group_vector, const size_t target_size, const string dihedral_name) const { if (group_vector.size() != target_size) { @@ -127,7 +200,7 @@ namespace loos { } // checkContinuousGroupSize() void RnaSuite::checkResidueSize( - const std::vector &residue_vector, + const vector &residue_vector, const size_t target_size, const string dihedral_name, const size_t group_index) const { @@ -142,14 +215,152 @@ namespace loos { } // checkResidueSize() + void defineSuites(const string suite_definition) { + + reference_suites.clear(); + + if (suite_definition == "suitename" + || suite_definition == "richardson") defineSuitesFromSuitename(); + + else { + + cout << boost::format("%s is not a recognized suite definition\n") + % suite_definition; + cout << "Must be one of: suitename" << endl; + throw(LOOSError()); + + } + + } // defineSuites() + + void defineSuitesFromFile(const string suite_definition_filename) { + + // TODO read suite definitions from file + cout << "Reading suite definitions from a file is not yet supported\n" + "Go yell at Chapin" << endl; + + } // defineSuitesFromFile() + + void defineSuitesFromSuitename() { + + // Means of dihedral angles + reference_suite_dihedrals = { + { // ddg index 0: C3' C3' plus + { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, + { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, + { 85.664, 245.014, 268.257, 303.879, 138.164, 61.950, 79.457}, + { 82.112, 190.682, 264.945, 295.967, 181.839, 51.455, 81.512}, + { 83.414, 217.400, 222.006, 302.856, 160.719, 49.097, 82.444}, + { 85.072, 216.324, 173.276, 289.320, 164.132, 45.876, 84.956}, + { 83.179, 210.347, 121.474, 288.568, 157.268, 49.347, 81.047}, + { 80.888, 218.636, 290.735, 167.447, 159.565, 51.326, 85.213}, + { 83.856, 238.750, 256.875, 69.562, 170.200, 52.800, 85.287}, + { 85.295, 244.085, 203.815, 65.880, 181.130, 54.680, 86.035}, + { 79.671, 202.471, 63.064, 68.164, 143.450, 49.664, 82.757}, + { 84.000, 195.000, 146.000, 170.000, 170.000, 52.000, 84.000} + }, { // ddg index 1: C3' C3' trans + { 80.514, 200.545, 280.510, 249.314, 82.662, 167.890, 85.507}, + { 80.223, 196.591, 291.299, 153.060, 194.379, 179.061, 83.648}, + { 81.395, 203.030, 294.445, 172.195, 138.540, 175.565, 84.470}, + { 87.417, 223.558, 80.175, 66.667, 109.150, 176.475, 83.833}, + { 86.055, 246.502, 100.392, 73.595, 213.752, 183.395, 85.483} + }, { // ddg index 2: C3' C3' minus + }, { // ddg index 3: C3' C2' plus + { 84.215, 215.014, 288.672, 300.420, 177.476, 58.307, 144.841}, + { 82.731, 220.463, 288.665, 296.983, 221.654, 54.213, 143.771}, + { 84.700, 226.400, 168.336, 292.771, 177.629, 48.629, 147.950}, + { 83.358, 206.042, 277.567, 195.700, 161.600, 50.750, 145.258}, + { 82.614, 206.440, 52.524, 163.669, 148.421, 50.176, 147.590}, + { 84.285, 236.600, 220.400, 68.300, 200.122, 53.693, 145.730}, + { 84.457, 213.286, 69.086, 75.500, 156.671, 57.486, 147.686} + }, { // ddg index 4: C3' C2' trans + { 81.200, 199.243, 288.986, 180.286, 194.743, 178.200, 147.386}, + { 82.133, 204.933, 69.483, 63.417, 115.233, 176.283, 145.733} + }, { // ddg index 5: C3' C2' minus + { 83.977, 216.508, 287.192, 297.254, 225.154, 293.738, 150.677}, + { 84.606, 232.856, 248.125, 63.269, 181.975, 295.744, 149.744}, + { 83.000, 196.900, 65.350, 60.150, 138.425, 292.550, 154.275} + }, { // ddg index 6: C2' C3' plus + {145.399, 260.339, 288.756, 288.444, 192.733, 53.097, 84.067}, + {146.275, 259.783, 169.958, 298.450, 169.583, 50.908, 83.967}, + {149.286, 223.159, 139.421, 284.559, 158.107, 47.900, 84.424}, + {148.006, 191.944, 146.231, 289.288, 150.781, 42.419, 84.956}, + {148.028, 256.922, 165.194, 204.961, 165.194, 49.383, 82.983}, + {145.337, 262.869, 79.588, 203.863, 189.688, 58.000, 84.900}, + {148.992, 270.596, 240.892, 62.225, 176.271, 53.600, 87.262}, + {149.822, 249.956, 187.678, 80.433, 198.133, 61.000, 89.378}, + {146.922, 241.222, 88.894, 59.344, 160.683, 52.333, 83.417}, + {141.900, 258.383, 286.517, 178.267, 165.217, 48.350, 84.783} + }, { // ddg index 7: C2' C3' trans + {147.782, 260.712, 290.424, 296.200, 177.282, 175.594, 86.565}, + {143.722, 227.256, 203.789, 73.856, 216.733, 194.444, 80.911}, + {148.717, 274.683, 100.283, 80.600, 248.133, 181.817, 82.600}, + {150.311, 268.383, 84.972, 63.811, 191.483, 176.644, 85.600}, + {141.633, 244.100, 66.056, 71.667, 122.167, 182.200, 83.622} + }, { // ddg index 8: C2' C3' minus + {149.070, 249.780, 111.520, 278.370, 207.780, 287.820, 86.650} + }, { // ddg index 9: C2' C2' plus + {146.383, 259.402, 291.275, 291.982, 210.048, 54.412, 147.760}, + {145.256, 244.622, 162.822, 294.159, 171.630, 45.900, 145.804}, + {147.593, 248.421, 112.086, 274.943, 164.764, 56.843, 146.264}, + {150.077, 260.246, 213.785, 71.900, 207.638, 56.715, 148.131}, + {146.415, 257.831, 89.597, 67.923, 173.051, 55.513, 147.623}, + {142.900, 236.550, 268.800, 180.783, 185.133, 54.467, 143.350} + }, { // ddg index 10: C2' C2' trans + {149.863, 247.562, 170.488, 277.938, 84.425, 176.413, 148.087}, + {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} + }, { // ddg index 11: C2' C2' minus + {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} + }; + + // Two-character suite name + reference_suite_names = { + {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", + "3g"}, + {"1e", "1c", "1f", "5j", "5n"}, + { }, + {"1b", "1[", "3b", "1z", "5z", "7p", "5p"}, + {"1t", "5q"}, + {"1o", "7r", "5r"}, + {"2a", "4a", "0a", "#a", "4g", "6g", "8d", "4d", "6d", "2g"}, + {"2h", "4n", "0i", "6n", "6j"}, + {"0k"}, + {"2[", "4b", "0b", "4p", "6p", "2z"}, + {"4s", "2u"}, + {"2o"} + }; + + // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or + // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). + reference_suite_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", + "23t", "23m", "22p", "22t", "22m"}; + + // Widths used to scale each dihedral dimension + dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; + + // Satellite widths used to scale overlapping clusters + satellite_width = {50.0, 50.0, 45.0, 60.0}; + + // Boundaries for allowed regions of delta(i-1), delta, and gamma + delta_min = { 60.0, 125.0}; + delta_max = {105.0, 165.0}; + gamma_min = { 20.0, 140.0, 260.0}; + gamma_max = { 95.0, 215.0, 335.0}; + + // Boundaries used to filter suites based on epsilon, zeta, alpha, beta + filter_min = {155.0, 25.0, 25.0, 50.0}; + filter_max = {310.0, 335.0, 335.0, 290.0}; + + } // defineSuitesFromSuitename() + void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { - std::vector continuous_alpha_atoms; - std::vector continuous_beta_atoms; - std::vector continuous_gamma_atoms; - std::vector continuous_delta_atoms; - std::vector continuous_epsilon_atoms; - std::vector continuous_zeta_atoms; + vector continuous_alpha_atoms; + vector continuous_beta_atoms; + vector continuous_gamma_atoms; + vector continuous_delta_atoms; + vector continuous_epsilon_atoms; + vector continuous_zeta_atoms; AtomicGroup dihedral_atoms; AtomicGroup residue_p; AtomicGroup residue_o5p; @@ -308,8 +519,9 @@ namespace loos { // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. size_t residue_size; + N_suite = 0; - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { residue_size = alpha_atoms[i].size(); checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1); @@ -318,11 +530,31 @@ namespace loos { checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); N_residue.push_back(residue_size); + N_suite += residue_size; } } // extractRnaBackboneAtoms() + size_t RnaSuite::filterDDG(dihedral, vector &min, + vector &max, uint increment, uint ddg_index) { + + size_t i = 0; + while (i < min.size()) { + + if (dihedral >= min[i] && dihedral <= max[i]) { + + ddg_index += i * increment; + return i; + + } + + ++i; + + } + + } // filterDDG() + double RnaSuite::getSuitenessCutoff() const { return suiteness_cutoff; } // getSuitenessCutoff() @@ -332,38 +564,45 @@ namespace loos { size_t i_plus; size_t j_plus; + cout << "\n ==== Printing backbone atoms ====\n" << endl; + + if (N_continuous_group == 0) { + + cout << "Warning: backbone atoms are empty" << endl; + return; + + } + cout << boost::format("Number of continuous groups: %d\n") % N_continuous_group; - if (N_continuous_group == 0) return; - - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { i_plus = i + 1; cout << boost::format("Continuous group %d has %d residues\n") % i_plus % N_residue[i]; - for (size_t j = 0; j < N_residue[i]; j++) { + for (size_t j = 0; j < N_residue[i]; ++j) { j_plus = j + 1; cout << boost::format("Delta %d %d\n") % i_plus % j_plus; - cout << delta_atoms[i][j] << endl; + cout << delta_atoms[i][j] << "\n"; cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus; - cout << epsilon_atoms[i][j] << endl; + cout << epsilon_atoms[i][j] << "\n"; cout << boost::format("Zeta %d %d\n") % i_plus % j_plus; - cout << zeta_atoms[i][j] << endl; + cout << zeta_atoms[i][j] << "\n"; cout << boost::format("Alpha %d %d\n") % i_plus % j_plus; - cout << alpha_atoms[i][j] << endl; + cout << alpha_atoms[i][j] << "\n"; cout << boost::format("Beta %d %d\n") % i_plus % j_plus; - cout << beta_atoms[i][j] << endl; + cout << beta_atoms[i][j] << "\n"; cout << boost::format("Gamma %d %d\n") % i_plus % j_plus; - cout << gamma_atoms[i][j] << endl; + cout << gamma_atoms[i][j] << "\n"; } cout << boost::format("Delta %d %d\n") % i_plus % (N_residue[i] + 1); - cout << delta_atoms[i][N_residue[i]] << endl; + cout << delta_atoms[i][N_residue[i]] << "\n"; } @@ -371,13 +610,21 @@ namespace loos { void RnaSuite::printBackboneDihedrals() const { - if (alpha.empty()) return; + cout << "\n ==== Printing backbone dihedrals ====\n" << endl; + + if (alpha.empty()) { + + cout << "Warning: backbone dihedrals are empty" << endl; + return; + + } - for (size_t i = 0; i < N_continuous_group; i++) { + for (size_t i = 0; i < N_continuous_group; ++i) { - for (size_t j = 0; j < N_residue[i]; j++) { + for (size_t j = 0; j < N_residue[i]; ++j) { - cout << boost::format("%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + cout << boost::format( + "%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") % gamma_atoms[i][j][0]->resid() % delta[i][j] % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j]; @@ -392,6 +639,38 @@ namespace loos { } // printBackboneDihedrals() + void RnaSuite::printReferenceSuites() const { + + cout << "\n ==== Printing reference suites ====\n" << endl; + + if (reference_suite_dihedrals.empty()) { + + cout << "Warning: reference suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < reference_suite_dihedrals.size(); ++i) { + + for (size_t j = 0; j < reference_suite_dihedrals[i].size(); ++j) { + + cout << boost::format( + "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % reference_suite_names[i][j] % reference_suite_ddg[i][j] + % reference_suite_dihedrals[i][j][0] + % reference_suite_dihedrals[i][j][1] + % reference_suite_dihedrals[i][j][2] + % reference_suite_dihedrals[i][j][3] + % reference_suite_dihedrals[i][j][4] + % reference_suite_dihedrals[i][j][5] + % reference_suite_dihedrals[i][j][6]; + + } + + } + + } // printReferenceSuites() + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; } // setSuitenessCutoff() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index 29d0d829e..da416feeb 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -63,16 +63,28 @@ namespace loos { */ void calculateBackboneDihedrals(); + //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] + double calculateDihedral(const AtomicGroup &group); + //! Method to check the size of a vector of continuous groups void checkContinuousGroupSize( - const std::vector> &group_vector, + const vector> &group_vector, const size_t target_size, const string dihedral_name) const; //! Method to check the size of a vector of residues - void checkResidueSize(const std::vector &residue_vector, + void checkResidueSize(const vector &residue_vector, const size_t target_size, const string dihedral_name, const size_t group_index) const; + //! Method to define suites used for assignment from an existing scheme + void defineSuites(const string suite_definition); + + //! Method to define suites used for assignment from a file + void defineSuitesFromFile(const string suite_definition_filename); + + //! Method to define suites used for assignment from suitename + void defineSuitesFromSuitename(); + //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -80,6 +92,10 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t RnaSuite::filterDDG(dihedral, vector &min, + vector &max, uint increment, uint ddg_index); + //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; @@ -89,41 +105,66 @@ namespace loos { //! Method to print backbone dihedrals for each residue void printBackboneDihedrals() const; + //! Method to print reference suite names and mean dihedrals + void printReferenceSuites() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); private: + // Reference suites used for assignment + vector>> reference_suite_dihedrals; + vector> reference_suite_names; + vector reference_suite_ddgs; + + // Widths used to scale each dihedral dimension + vector dihedral_width(7); + + // Satellite widths used to scale overlapping clusters + vector satellite_width(4); + + // Boundaries for allowed regions of delta(i-1), delta, and gamma + vector delta_min; + vector delta_max; + vector gamma_min; + vector gamma_max; + + // Boundaries used to filter suites based on epsilon, zeta, alpha, beta + vector filter_min(4); + vector filter_max(4); + // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group - std::vector> alpha_atoms; - std::vector> beta_atoms; - std::vector> gamma_atoms; - std::vector> delta_atoms; - std::vector> epsilon_atoms; - std::vector> zeta_atoms; + vector> alpha_atoms; + vector> beta_atoms; + vector> gamma_atoms; + vector> delta_atoms; + vector> epsilon_atoms; + vector> zeta_atoms; // Vector of vectors of backbone dihedrals - std::vector> alpha; - std::vector> beta; - std::vector> gamma; - std::vector> delta; - std::vector> epsilon; - std::vector> zeta; + vector> alpha; + vector> beta; + vector> gamma; + vector> delta; + vector> epsilon; + vector> zeta; // Output: suite name (composed of a number-like character for the // 5' hemi-nucleotide and a letter-like character for the // 3' hemi-nucleotide) and suiteness score - std::vector suite_name_hemi5; - std::vector suite_name_hemi3; - std::vector suiteness; + vector suite_names; + vector suiteness; // Other internal variables size_t N_continuous_group = 0; vector N_residue; + size_t N_suite; double suiteness_cutoff; - }; + }; // RnaSuite class + } #endif From 58b9cf7fdfd63d90a98f71a916516f800af15f72 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Mon, 30 Mar 2020 01:07:18 -0400 Subject: [PATCH 36/41] Assignment of delta(i-1), delta, gamma index and closest cluster --- Tools/rna_suites.cpp | 6 ++ src/RnaSuite.cpp | 242 ++++++++++++++++++++++++++++++++++--------- src/RnaSuite.hpp | 30 ++++-- 3 files changed, 217 insertions(+), 61 deletions(-) diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index 5c470c3e3..a44249621 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -126,6 +126,10 @@ int main(int argc, char *argv[]) { // Create RNASuite object from RNA atoms RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); + // Define reference suites from suitename + rna_suite.defineSuites("suitename"); + rna_suite.printReferenceSuites(); + // Print dihedrals rna_suite.printBackboneAtoms(); @@ -137,6 +141,8 @@ int main(int argc, char *argv[]) { rna_suite.calculateBackboneDihedrals(); rna_suite.printBackboneDihedrals(); + rna_suite.assignRichardsonSuites(); + rna_suite.printSuites(); } diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 2bf64c694..eebb021bc 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -52,18 +52,45 @@ namespace loos { // | Methods // |------------------------------------------------------------------------ + size_t RnaSuite::assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index) { + + size_t i = 0; + while (i < min.size()) { + + if (dihedral >= min[i] && dihedral <= max[i]) { + + ddg_index += i * increment; + return i; + + } + + ++i; + + } + + return i; + + } // assignDDGIndex() + void RnaSuite::assignRichardsonSuites() { - bool outlier; size_t N_delta = delta_min.size(); size_t N_gamma = gamma_min.size(); size_t N_dg = N_delta * N_gamma; - uint suite_counter = 0; vector suite(7); - // Index into delta, delta(j+1), gamma clusters + // Index into delta(i-1), delta, gamma clusters uint ddg_index; + // Scaled 4D hyperellipsoid distance in epsilon, zeta, alpha, beta + double dist_ezab; + + // Closest scaled 4D hyperellipsoid distance to a cluster and index of + // the associated cluster + double min_dist_ezab; + size_t min_dist_ezab_index; + if (alpha.empty()) { cout << "Warning: backbone dihedrals are empty" << endl; @@ -72,9 +99,11 @@ namespace loos { } // Initialize vectors of suite names and suiteness scores - suite_names.clear() - suiteness.clear() + suite_names.clear(); + suite_ddg.clear(); + suiteness.clear(); suite_names.reserve(N_suite); + suite_ddg.reserve(N_suite); suiteness.reserve(N_suite); for (size_t i = 0; i < N_continuous_group; ++i) { @@ -90,41 +119,113 @@ namespace loos { // Filter on 5' delta. Values outside of this range are // indicative of incorrect stereochemistry in the ribose. - if (filterDDG(suite[0], delta_min, delta_max, N_dg, ddg_index) - == N_delta) { + if (assignDDGIndex(suite[0], delta_min, delta_max, N_dg, + ddg_index) == N_delta) { - suite_names[suite_counter] = "!d"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!d"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); continue; } // Filter on 3' delta - if (filterDDG(suite[6], delta_min, delta_max, N_gamma, ddg_index) - == N_delta) { + if (assignDDGIndex(suite[6], delta_min, delta_max, N_gamma, + ddg_index) == N_delta) { - suite_names[suite_counter] = "!d"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!d"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); continue; } // Filter on gamma - if (filterDDG(suite[5], gamma_min, gamma_max, 1, ddg_index) - == N_gamma) { + if (assignDDGIndex(suite[5], gamma_min, gamma_max, 1, ddg_index) + == N_gamma) { - suite_names[suite_counter] = "!g"; - suiteness[suite_counter] = 0.0; + suite_names.push_back("!g"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // If there are no clusters associated with this ddg_index, then + // this is an outlier + if (N_reference_suite[ddg_index] == 0) { + + suite_names.push_back("!!"); + suite_ddg.push_back(reference_suite_ddgs[ddg_index]); + suiteness.push_back(0.0); continue; } // Filter on epsilon. Values outside of this range are // indicative of a misfit sugar pucker. + if (suite[1] < filter_min[0] || suite[1] > filter_max[0]) { + + suite_names.push_back("!e"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on zeta + if (suite[2] < filter_min[1] || suite[2] > filter_max[1]) { + + suite_names.push_back("!z"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on alpha + if (suite[3] < filter_min[2] || suite[3] > filter_max[2]) { + + suite_names.push_back("!a"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on beta + if (suite[4] < filter_min[3] || suite[4] > filter_max[3]) { + + suite_names.push_back("!b"); + suite_ddg.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + cout << boost::format("%d %d %d") % i % j % ddg_index << endl; + // Find closest cluster in epsilon, zeta, alpha, beta + // Largest distance in 7D is 688.66 + min_dist_ezab = 999.0; + for (size_t k = 0; k < N_reference_suite[ddg_index]; ++k) { - // Get 4D scaled hyperellipsoid distance + // Get 4D scaled hyperellipsoid distance + dist_ezab = hyperellipsoidDist(suite, + reference_suite_dihedrals[ddg_index][k], 1, 4); - suite_counter++; + if (dist_ezab < min_dist_ezab) { + + min_dist_ezab = dist_ezab; + min_dist_ezab_index = k; + + } + + } + + suite_names.push_back( + reference_suite_names[ddg_index][min_dist_ezab_index]); + suite_ddg.push_back(reference_suite_ddgs[ddg_index]); + suiteness.push_back(1.0); } // loop over residues @@ -178,7 +279,7 @@ namespace loos { double RnaSuite::calculateDihedral(const AtomicGroup &group) { - double dihedral = Math::torsion(group[0], group[1], group[2], group[3]) + double dihedral = Math::torsion(group[0], group[1], group[2], group[3]); if (dihedral < 0.0) dihedral += 360.0; return dihedral; @@ -215,9 +316,11 @@ namespace loos { } // checkResidueSize() - void defineSuites(const string suite_definition) { + void RnaSuite::defineSuites(const string suite_definition) { - reference_suites.clear(); + reference_suite_dihedrals.clear(); + reference_suite_names.clear(); + reference_suite_ddgs.clear(); if (suite_definition == "suitename" || suite_definition == "richardson") defineSuitesFromSuitename(); @@ -233,7 +336,7 @@ namespace loos { } // defineSuites() - void defineSuitesFromFile(const string suite_definition_filename) { + void RnaSuite::defineSuitesFromFile(const string filename) { // TODO read suite definitions from file cout << "Reading suite definitions from a file is not yet supported\n" @@ -241,7 +344,7 @@ namespace loos { } // defineSuitesFromFile() - void defineSuitesFromSuitename() { + void RnaSuite::defineSuitesFromSuitename() { // Means of dihedral angles reference_suite_dihedrals = { @@ -311,7 +414,7 @@ namespace loos { {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} }, { // ddg index 11: C2' C2' minus {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} - }; + } }; // Two-character suite name reference_suite_names = { @@ -351,6 +454,12 @@ namespace loos { filter_min = {155.0, 25.0, 25.0, 50.0}; filter_max = {310.0, 335.0, 335.0, 290.0}; + // Get number of ddg clusters and number of suites in each ddg cluster + N_reference_ddg = reference_suite_dihedrals.size(); + N_reference_suite.clear(); + for (size_t i = 0; i < N_reference_ddg; ++i) + N_reference_suite.push_back(reference_suite_dihedrals[i].size()); + } // defineSuitesFromSuitename() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -372,6 +481,7 @@ namespace loos { AtomicGroup prev_residue_c3p; AtomicGroup prev_residue_o3p; int current_resid = -2; + size_t residue_size; // True if this is the initial residue in a continuous group bool first_res = true; @@ -518,7 +628,7 @@ namespace loos { // Get number of residues in each continuous group and check that these // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. - size_t residue_size; + N_residue.clear(); N_suite = 0; for (size_t i = 0; i < N_continuous_group; ++i) { @@ -536,28 +646,28 @@ namespace loos { } // extractRnaBackboneAtoms() - size_t RnaSuite::filterDDG(dihedral, vector &min, - vector &max, uint increment, uint ddg_index) { - - size_t i = 0; - while (i < min.size()) { + double RnaSuite::getSuitenessCutoff() const { + return suiteness_cutoff; + } // getSuitenessCutoff() - if (dihedral >= min[i] && dihedral <= max[i]) { + double RnaSuite::hyperellipsoidDist(vector &dihedrals, + vector &reference, uint first_index, uint last_index) { - ddg_index += i * increment; - return i; + double unscaled_diff; + double sum_scaled_powers = 0.0; - } + for (uint i = first_index; i <= last_index; ++i) { - ++i; + unscaled_diff = abs(dihedrals[i] - reference[i]); + // suitename program does not wrap unscaled coordinates + // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff; + sum_scaled_powers += pow(unscaled_diff / dihedral_width[i], 3.0); } - } // filterDDG() + return cbrt(sum_scaled_powers); - double RnaSuite::getSuitenessCutoff() const { - return suiteness_cutoff; - } // getSuitenessCutoff() + } // hyperellipsoidDist4() void RnaSuite::printBackboneAtoms() const { @@ -566,7 +676,7 @@ namespace loos { cout << "\n ==== Printing backbone atoms ====\n" << endl; - if (N_continuous_group == 0) { + if (alpha_atoms.empty()) { cout << "Warning: backbone atoms are empty" << endl; return; @@ -624,17 +734,14 @@ namespace loos { for (size_t j = 0; j < N_residue[i]; ++j) { cout << boost::format( - "%4d %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % gamma_atoms[i][j][0]->resid() % delta[i][j] + "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % gamma_atoms[i][j][0]->resid() + % gamma_atoms[i][j][0]->resname() % delta[i][j] % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] - % gamma[i][j]; + % gamma[i][j] % delta[i][j + 1]; } - cout << boost::format("%4d %8.3f\n") - % delta_atoms[i][N_residue[i]][0]->resid() - % delta[i][N_residue[i]]; - } } // printBackboneDihedrals() @@ -650,13 +757,13 @@ namespace loos { } - for (size_t i = 0; i < reference_suite_dihedrals.size(); ++i) { + for (size_t i = 0; i < N_reference_ddg; ++i) { - for (size_t j = 0; j < reference_suite_dihedrals[i].size(); ++j) { + for (size_t j = 0; j < N_reference_suite[i]; ++j) { cout << boost::format( "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % reference_suite_names[i][j] % reference_suite_ddg[i][j] + % reference_suite_names[i][j] % reference_suite_ddgs[i] % reference_suite_dihedrals[i][j][0] % reference_suite_dihedrals[i][j][1] % reference_suite_dihedrals[i][j][2] @@ -671,6 +778,39 @@ namespace loos { } // printReferenceSuites() + void RnaSuite::printSuites() const { + + uint suite_counter = 0; + + cout << "\n ==== Printing suites ====\n" << endl; + + if (suite_names.empty()) { + + cout << "Warning: suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < N_continuous_group; ++i) { + + for (size_t j = 0; j < N_residue[i]; ++j) { + + cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " + "%7.3f %7.3f %7.3f %7.3f\n") % gamma_atoms[i][j][0]->resid() + % gamma_atoms[i][j][0]->resname() + % suite_names[suite_counter] % suite_ddg[suite_counter] + % suiteness[suite_counter] % delta[i][j] % epsilon[i][j] + % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j] + % delta[i][j + 1]; + + ++suite_counter; + + } + + } + + } // printReferenceSuites() + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { suiteness_cutoff = suiteness_cutoff_; } // setSuitenessCutoff() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index da416feeb..8375cc040 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -47,6 +47,10 @@ namespace loos { RnaSuite(); + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index); + //! Method to assign residues to backbone suites from Richardson et al. /** * This method assigns residues to one of the 46 backbone suites @@ -80,7 +84,7 @@ namespace loos { void defineSuites(const string suite_definition); //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string suite_definition_filename); + void defineSuitesFromFile(const string filename); //! Method to define suites used for assignment from suitename void defineSuitesFromSuitename(); @@ -92,13 +96,13 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); - //! Method to assign residues to a delta(i-1), delta, gamma index - size_t RnaSuite::filterDDG(dihedral, vector &min, - vector &max, uint increment, uint ddg_index); - //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; + //! Calculate a scaled hyperellipsoid distance between two points + double hyperellipsoidDist(vector &dihedrals, + vector &reference, uint first_index, uint last_index); + //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; @@ -108,6 +112,9 @@ namespace loos { //! Method to print reference suite names and mean dihedrals void printReferenceSuites() const; + //! Method to print suite names, suiteness scores, and dihedrals + void printSuites() const; + //! Method to set the cutoff for the suiteness score of non-outliers void setSuitenessCutoff(const double suiteness_cutoff_); @@ -119,10 +126,10 @@ namespace loos { vector reference_suite_ddgs; // Widths used to scale each dihedral dimension - vector dihedral_width(7); + vector dihedral_width; // Satellite widths used to scale overlapping clusters - vector satellite_width(4); + vector satellite_width; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -131,8 +138,8 @@ namespace loos { vector gamma_max; // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - vector filter_min(4); - vector filter_max(4); + vector filter_min; + vector filter_max; // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group @@ -155,10 +162,13 @@ namespace loos { // 5' hemi-nucleotide and a letter-like character for the // 3' hemi-nucleotide) and suiteness score vector suite_names; + vector suite_ddg; vector suiteness; // Other internal variables - size_t N_continuous_group = 0; + size_t N_reference_ddg; + vector N_reference_suite; + size_t N_continuous_group; vector N_residue; size_t N_suite; double suiteness_cutoff; From 28267d858b1479472bda88f0918be5ca3a45bde9 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Wed, 8 Apr 2020 19:00:28 -0400 Subject: [PATCH 37/41] Suite assignment and SWIG interface; agrees with suitename for PDB 1S72 --- Tools/rna_suites.cpp | 33 ++- src/RnaSuite.cpp | 651 +++++++++++++++++++++++++++++-------------- src/RnaSuite.hpp | 116 +++++--- src/RnaSuite.i | 27 ++ src/loos.i | 3 +- 5 files changed, 567 insertions(+), 263 deletions(-) create mode 100644 src/RnaSuite.i diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index a44249621..46b620ac8 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -120,30 +120,39 @@ int main(int argc, char *argv[]) { vector indices = tropts->frameList(); AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); - // Number of frames in trajectory - const uint N_frame = indices.size(); - // Create RNASuite object from RNA atoms RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); - - // Define reference suites from suitename - rna_suite.defineSuites("suitename"); - rna_suite.printReferenceSuites(); + vector suite_resids = rna_suite.getSuiteResids(); + vector suite_resnames = rna_suite.getSuiteResnames(); + //rna_suite.printReferenceSuites(); // Print dihedrals - rna_suite.printBackboneAtoms(); + //rna_suite.printBackboneAtoms(); // Loop over trajectory - for (vector::iterator i = indices.begin(); i != indices.end(); i++) { + vector suite_names; + vector suite_ddgs; + vector suiteness; + uint t = 0; + for (vector::iterator i = indices.begin(); i != indices.end(); ++i) { traj->readFrame(*i); traj->updateGroupCoords(model); rna_suite.calculateBackboneDihedrals(); - rna_suite.printBackboneDihedrals(); - rna_suite.assignRichardsonSuites(); - rna_suite.printSuites(); + rna_suite.assignSuitenameSuites(); + suite_names = rna_suite.getSuiteNames(); + suite_ddgs = rna_suite.getSuiteDDGs(); + suiteness = rna_suite.getSuitenessScores(); + + for (uint j = 0; j < suite_resids.size(); ++j) + cout << boost::format("%5d %5d %3s %2s %2s %8.6f") % t + % suite_resids[j] % suite_resnames[j] % suite_names[j] + % suite_ddgs[j] % suiteness[j] << endl; + + ++t; } } + diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index eebb021bc..02429b556 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -29,23 +29,46 @@ namespace loos { // | Constructors // |------------------------------------------------------------------------ - RnaSuite::RnaSuite(const AtomicGroup &group, + RnaSuite::RnaSuite(const AtomicGroup &group, const string suite_definition, const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + defineSuites(suite_definition); extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite(const AtomicGroup &group, + const string suite_definition) { + + suiteness_cutoff = 0.01; + defineSuites(suite_definition); + extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite(const AtomicGroup &group, + const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + defineSuites("suitename"); + extractRnaBackboneAtoms(group); } RnaSuite::RnaSuite(const AtomicGroup &group) { - extractRnaBackboneAtoms(group); suiteness_cutoff = 0.01; + defineSuites("suitename"); + extractRnaBackboneAtoms(group); } RnaSuite::RnaSuite() { + suiteness_cutoff = 0.01; + defineSuites("suitename"); + } // |------------------------------------------------------------------------ @@ -73,7 +96,7 @@ namespace loos { } // assignDDGIndex() - void RnaSuite::assignRichardsonSuites() { + void RnaSuite::assignSuitenameSuites() { size_t N_delta = delta_min.size(); size_t N_gamma = gamma_min.size(); @@ -89,190 +112,292 @@ namespace loos { // Closest scaled 4D hyperellipsoid distance to a cluster and index of // the associated cluster double min_dist_ezab; - size_t min_dist_ezab_index; + size_t min_index; + + // Closest scaled 4D hyperellipsoid distance to a dominant cluster + double dom_min_dist_ezab; + size_t dom_min_index; - if (alpha.empty()) { + // Closest scaled 4D hyperellipsoid distance to a non-dominant cluster + double sat_min_dist_ezab; + size_t sat_min_index; - cout << "Warning: backbone dihedrals are empty" << endl; + // Index into vector of widths for pair of dominant-satellite clusters + size_t dom_sat_index; + + // Number of clusters this dinucleotide could belong to + uint candidates; + + // Scaled 7D hyperellipsoid distance + double dist_7; + + // Index of the assigned suite + size_t assigned_suite_index; + + // Goodness-of-fit for assigned suite + double suiteness_score; + + if (suite_dihedrals.empty()) { + + cerr << "Warning: backbone dihedrals are empty" << endl; return; } // Initialize vectors of suite names and suiteness scores suite_names.clear(); - suite_ddg.clear(); + suite_ddgs.clear(); suiteness.clear(); suite_names.reserve(N_suite); - suite_ddg.reserve(N_suite); + suite_ddgs.reserve(N_suite); suiteness.reserve(N_suite); - for (size_t i = 0; i < N_continuous_group; ++i) { + for (size_t i = 0; i < N_suite; ++i) { - for (size_t j = 0; j < N_residue[i]; ++j) { + // Assign delta(j-1), delta, gamma index. These 3 dihedrals have + // 12 clusters that are independent of the other 4 dihedrals. + ddg_index = 0; - suite = {delta[i][j], epsilon[i][j], zeta[i][j], alpha[i][j], - beta[i][j], gamma[i][j], delta[i][j + 1]}; + // Filter on 5' delta. Values outside of this range are + // indicative of incorrect stereochemistry in the ribose. + if (assignDDGIndex(suite_dihedrals[i][0], delta_min, delta_max, + N_dg, ddg_index) == N_delta) { - // Assign delta(j-1), delta, gamma index. These 3 dihedrals have - // 12 clusters that are independent of the other 4 dihedrals. - ddg_index = 0; + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on 5' delta. Values outside of this range are - // indicative of incorrect stereochemistry in the ribose. - if (assignDDGIndex(suite[0], delta_min, delta_max, N_dg, - ddg_index) == N_delta) { + } - suite_names.push_back("!d"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on 3' delta + if (assignDDGIndex(suite_dihedrals[i][6], delta_min, delta_max, + N_gamma, ddg_index) == N_delta) { - } + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on 3' delta - if (assignDDGIndex(suite[6], delta_min, delta_max, N_gamma, - ddg_index) == N_delta) { + } - suite_names.push_back("!d"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on gamma + if (assignDDGIndex(suite_dihedrals[i][5], gamma_min, gamma_max, 1, + ddg_index) == N_gamma) { - } + suite_names.push_back("!g"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on gamma - if (assignDDGIndex(suite[5], gamma_min, gamma_max, 1, ddg_index) - == N_gamma) { + } - suite_names.push_back("!g"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on epsilon. Values outside of this range are + // indicative of a misfit sugar pucker. + if (suite_dihedrals[i][1] < filter_min[0] + || suite_dihedrals[i][1] > filter_max[0]) { - } + suite_names.push_back("!e"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // If there are no clusters associated with this ddg_index, then - // this is an outlier - if (N_reference_suite[ddg_index] == 0) { + } - suite_names.push_back("!!"); - suite_ddg.push_back(reference_suite_ddgs[ddg_index]); - suiteness.push_back(0.0); - continue; + // Filter on zeta + if (suite_dihedrals[i][2] < filter_min[1] + || suite_dihedrals[i][2] > filter_max[1]) { - } + suite_names.push_back("!z"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on epsilon. Values outside of this range are - // indicative of a misfit sugar pucker. - if (suite[1] < filter_min[0] || suite[1] > filter_max[0]) { + } - suite_names.push_back("!e"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on alpha + if (suite_dihedrals[i][3] < filter_min[2] + || suite_dihedrals[i][3] > filter_max[2]) { - } + suite_names.push_back("!a"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on zeta - if (suite[2] < filter_min[1] || suite[2] > filter_max[1]) { + } - suite_names.push_back("!z"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + // Filter on beta + if (suite_dihedrals[i][4] < filter_min[3] + || suite_dihedrals[i][4] > filter_max[3]) { - } + suite_names.push_back("!b"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; - // Filter on alpha - if (suite[3] < filter_min[2] || suite[3] > filter_max[2]) { + } + + // If there are no clusters associated with this ddg_index, then + // this is an outlier + if (N_reference_suite[ddg_index] == 0) { - suite_names.push_back("!a"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); + continue; + + } + + // Find closest cluster in epsilon, zeta, alpha, beta + // Largest distance in 7D is 688.66^3, so 10^9 should be safe + min_dist_ezab = 999999999.0; + dom_min_dist_ezab = 999999999.0; + sat_min_dist_ezab = 999999999.0; + min_index = N_reference_suite[ddg_index]; + dom_min_index = N_reference_suite[ddg_index]; + sat_min_index = N_reference_suite[ddg_index]; + candidates = 0; + + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) { + + // Get 4D scaled hyperellipsoid distance + dist_ezab = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][j], dihedral_width, 1, 4); + + // Get closest cluster + if (dist_ezab < min_dist_ezab) { + + min_dist_ezab = dist_ezab; + min_index = j; } - // Filter on beta - if (suite[4] < filter_min[3] || suite[4] > filter_max[3]) { + // Get closest non-dominant cluster + if (dominant_suites[ddg_index][j] != j + && dist_ezab < sat_min_dist_ezab) { - suite_names.push_back("!b"); - suite_ddg.push_back("!!!"); - suiteness.push_back(0.0); - continue; + sat_min_dist_ezab = dist_ezab; + sat_min_index = j; } - cout << boost::format("%d %d %d") % i % j % ddg_index << endl; - // Find closest cluster in epsilon, zeta, alpha, beta - // Largest distance in 7D is 688.66 - min_dist_ezab = 999.0; - for (size_t k = 0; k < N_reference_suite[ddg_index]; ++k) { + // If 4D distance < 1, this reference suite is a candidate + if (dist_ezab < 1) { - // Get 4D scaled hyperellipsoid distance - dist_ezab = hyperellipsoidDist(suite, - reference_suite_dihedrals[ddg_index][k], 1, 4); + ++candidates; - if (dist_ezab < min_dist_ezab) { + // Is this candidate a dominant cluster? + if (dominant_suites[ddg_index][j] == j) { - min_dist_ezab = dist_ezab; - min_dist_ezab_index = k; + dom_min_dist_ezab = dist_ezab; + dom_min_index = j; } } + } // loop over reference suites + + // Assign membership to a reference suite + + // If there are multiple candidates, and the two canidates are + // a dominant-satellite pair, then reweight distances + if (candidates > 1 && dom_min_index != N_reference_suite[ddg_index] + && sat_min_index != N_reference_suite[ddg_index] + && dominant_suites[ddg_index][sat_min_index] == dom_min_index) { + + // Is the DNMP in between the dominant and satellite suites? + if (isBetweenDomSatPair(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + reference_dihedrals[ddg_index][sat_min_index])) { + + // Rescale distances from point to dominant and satellite + // suites by ratio of distances from suite centers to + // boundary plane and assign to closest of the two + dom_sat_index = dom_sat_pair_index[ddg_index][sat_min_index]; + if (hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][sat_min_index], + satellite_width[dom_sat_index], 1, 4) + <= hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + dominant_width[dom_sat_index], 1, 4)) + + assigned_suite_index = sat_min_index; + + else assigned_suite_index = dom_min_index; + + + } + + else { + + // Assign to closer of dominant or satellite suite + if (sat_min_dist_ezab <= dom_min_dist_ezab) + assigned_suite_index = sat_min_index; + else assigned_suite_index = dom_min_index; + + } + + } + + // If there is zero or one candidate or multiple candidates but no + // dominant-satellite pair, then assign to the closest suite + else assigned_suite_index = min_index; + + // Make a final decision on whether this is an outlier using 7D + // hyperellipsoid distance + dist_7 = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][assigned_suite_index], + dihedral_width, 0, 6); + + if (dist_7 < 1) { + suite_names.push_back( - reference_suite_names[ddg_index][min_dist_ezab_index]); - suite_ddg.push_back(reference_suite_ddgs[ddg_index]); - suiteness.push_back(1.0); + reference_names[ddg_index][assigned_suite_index]); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness_score = (1 + cos(M_PI * cbrt(dist_7))) / 2.0; + if (suiteness_score < suiteness_cutoff) + suiteness_score = suiteness_cutoff; + suiteness.push_back(suiteness_score); + + } else { - } // loop over residues + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); - } // loop over continuous groups + } + + } // loop over suites - } // assignRichardsonSuites() + } // assignSuitenameSuites() void RnaSuite::calculateBackboneDihedrals() { - // Clear vector of vectors of doubles for each backbone dihedral - alpha.clear(); - beta.clear(); - gamma.clear(); - delta.clear(); - epsilon.clear(); - zeta.clear(); + double prev_delta; + vector suite(7); + + // Clear vector of doubles for suite backbone dihedrals + suite_dihedrals.clear(); for (size_t i = 0; i < N_continuous_group; ++i) { - vector continuous_alpha(N_residue[i]); - vector continuous_beta(N_residue[i]); - vector continuous_gamma(N_residue[i]); - vector continuous_delta(N_residue[i] + 1); - vector continuous_epsilon(N_residue[i]); - vector continuous_zeta(N_residue[i]); + prev_delta = calculateDihedral(delta_atoms[i][0]); for (size_t j = 0; j < N_residue[i]; ++j) { - continuous_alpha[j] = calculateDihedral(alpha_atoms[i][j]); - continuous_beta[j] = calculateDihedral(beta_atoms[i][j]); - continuous_gamma[j] = calculateDihedral(gamma_atoms[i][j]); - continuous_delta[j] = calculateDihedral(delta_atoms[i][j]); - continuous_epsilon[j] = calculateDihedral(epsilon_atoms[i][j]); - continuous_zeta[j] = calculateDihedral(zeta_atoms[i][j]); + suite[0] = prev_delta; + suite[1] = calculateDihedral(epsilon_atoms[i][j]); + suite[2] = calculateDihedral(zeta_atoms[i][j]); + suite[3] = calculateDihedral(alpha_atoms[i][j]); + suite[4] = calculateDihedral(beta_atoms[i][j]); + suite[5] = calculateDihedral(gamma_atoms[i][j]); + prev_delta = calculateDihedral(delta_atoms[i][j + 1]); + suite[6] = prev_delta; + suite_dihedrals.push_back(suite); } - continuous_delta[N_residue[i]] = - calculateDihedral(delta_atoms[i][N_residue[i]]); - - alpha.push_back(continuous_alpha); - beta.push_back(continuous_beta); - gamma.push_back(continuous_gamma); - delta.push_back(continuous_delta); - epsilon.push_back(continuous_epsilon); - zeta.push_back(continuous_zeta); - } } // calculateBackboneDihedrals() @@ -291,7 +416,7 @@ namespace loos { if (group_vector.size() != target_size) { - cout << boost::format("Error: different number of continuous " + cerr << boost::format("Error: different number of continuous " "groups for alpha (%d) and %s (%d)\n") % target_size % dihedral_name % group_vector.size(); throw(LOOSError()); @@ -307,7 +432,7 @@ namespace loos { if (residue_vector.size() != target_size) { - cout << boost::format("Error: different number of residues in " + cerr << boost::format("Error: different number of residues in " "continuous group %d for alpha (%d) and %s (%d)\n") % group_index % target_size % dihedral_name % residue_vector.size(); @@ -318,18 +443,18 @@ namespace loos { void RnaSuite::defineSuites(const string suite_definition) { - reference_suite_dihedrals.clear(); - reference_suite_names.clear(); - reference_suite_ddgs.clear(); + reference_dihedrals.clear(); + reference_names.clear(); + reference_ddgs.clear(); if (suite_definition == "suitename" || suite_definition == "richardson") defineSuitesFromSuitename(); else { - cout << boost::format("%s is not a recognized suite definition\n") + cerr << boost::format("%s is not a recognized suite definition\n") % suite_definition; - cout << "Must be one of: suitename" << endl; + cerr << "Must be one of: suitename" << endl; throw(LOOSError()); } @@ -339,15 +464,17 @@ namespace loos { void RnaSuite::defineSuitesFromFile(const string filename) { // TODO read suite definitions from file - cout << "Reading suite definitions from a file is not yet supported\n" + cerr << "Reading suite definitions from a file is not yet supported\n" "Go yell at Chapin" << endl; } // defineSuitesFromFile() void RnaSuite::defineSuitesFromSuitename() { + size_t suite_size; + // Means of dihedral angles - reference_suite_dihedrals = { + reference_dihedrals = { { // ddg index 0: C3' C3' plus { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, @@ -416,8 +543,49 @@ namespace loos { {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} } }; + // Get number of ddg clusters and number of suites in each ddg cluster + // Dominant suites lists indices of the dominant suite associated with + // a satellite suite. A value of reference_suite_dihedrals.size() + // that this suite is neither dominant nor satellite. A dominant suite + // will point to its own index. + N_reference_ddg = reference_dihedrals.size(); + N_reference_suite.clear(); + dominant_suites.clear(); + for (size_t i = 0; i < N_reference_ddg; ++i) { + + suite_size = reference_dihedrals[i].size(); + N_reference_suite.push_back(suite_size); + vector dom_suites(suite_size, suite_size); + dominant_suites.push_back(dom_suites); + + } + + // 1m, 1L, and &a are satellites of 1a + dominant_suites[0][0] = 0; + dominant_suites[0][1] = 0; + dominant_suites[0][2] = 0; + dominant_suites[0][3] = 0; + + // 1f is a satellite of 1c + dominant_suites[1][1] = 1; + dominant_suites[1][2] = 1; + + // 1[ is a satellite of 1b + dominant_suites[3][0] = 0; + dominant_suites[3][1] = 0; + + // 4a and #a are satellites of 0a + dominant_suites[6][2] = 2; + dominant_suites[6][1] = 2; + dominant_suites[6][3] = 2; + + // 0i nd 6j are satellites of 6n + dominant_suites[7][3] = 3; + dominant_suites[7][2] = 3; + dominant_suites[7][4] = 3; + // Two-character suite name - reference_suite_names = { + reference_names = { {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", "3g"}, {"1e", "1c", "1f", "5j", "5n"}, @@ -433,16 +601,54 @@ namespace loos { {"2o"} }; - // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or - // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). - reference_suite_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", - "23t", "23m", "22p", "22t", "22m"}; - // Widths used to scale each dihedral dimension dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; - // Satellite widths used to scale overlapping clusters - satellite_width = {50.0, 50.0, 45.0, 60.0}; + // Alternative widths used to scale dominant-satellite pairs + dominant_width = { + {28.0, 60.0, 55.0, 50.0, 64.0, 35.0, 28.0}, + {28.0, 70.0, 55.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 60.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 65.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 56.0, 35.0, 28.0}, + {28.0, 50.0, 50.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 36.0, 36.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0} + }; + + satellite_width = { + {28.0, 60.0, 55.0, 50.0, 32.0, 35.0, 28.0}, + {28.0, 18.0, 55.0, 50.0, 18.0, 35.0, 28.0}, + {28.0, 20.0, 20.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 47.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 34.0, 35.0, 28.0}, + {28.0, 40.0, 40.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 26.0, 26.0, 50.0, 70.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, + }; + + // Index into dominant-satellite pair widths + dom_sat_pair_index = { + {9, 0, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9}, + {9, 9, 3, 9, 9}, + { }, + {9, 4, 9, 9, 9, 9, 9}, + {9, 9}, + {9, 9, 9}, + {9, 5, 9, 6, 9, 9, 9, 9, 9, 9}, + {9, 9, 7, 9, 8}, + {9}, + {9, 9, 9, 9, 9, 9}, + {9, 9}, + {9} + }; + + // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or + // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). + reference_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", + "23t", "23m", "22p", "22t", "22m"}; // Boundaries for allowed regions of delta(i-1), delta, and gamma delta_min = { 60.0, 125.0}; @@ -454,12 +660,6 @@ namespace loos { filter_min = {155.0, 25.0, 25.0, 50.0}; filter_max = {310.0, 335.0, 335.0, 290.0}; - // Get number of ddg clusters and number of suites in each ddg cluster - N_reference_ddg = reference_suite_dihedrals.size(); - N_reference_suite.clear(); - for (size_t i = 0; i < N_reference_ddg; ++i) - N_reference_suite.push_back(reference_suite_dihedrals[i].size()); - } // defineSuitesFromSuitename() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -629,7 +829,8 @@ namespace loos { // are consistent across backbone dihedrals. Delta should have one // additional residue per continuous group. N_residue.clear(); - N_suite = 0; + suite_resids.clear(); + suite_resnames.clear(); for (size_t i = 0; i < N_continuous_group; ++i) { @@ -640,18 +841,51 @@ namespace loos { checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); N_residue.push_back(residue_size); - N_suite += residue_size; + + for (size_t j = 0; j < residue_size; ++j) { + + suite_resids.push_back(gamma_atoms[i][j][0]->resid()); + suite_resnames.push_back(gamma_atoms[i][j][0]->resname()); + + } } + N_suite = suite_resids.size(); + } // extractRnaBackboneAtoms() + vector RnaSuite::getSuiteDDGs() const { + return suite_ddgs; + } // getSuiteDDGs() + + vector> RnaSuite::getSuiteDihedrals() const { + return suite_dihedrals; + } // getSuiteDihedrals() + + vector RnaSuite::getSuiteNames() const { + return suite_names; + } // getSuiteNames() + + vector RnaSuite::getSuiteResids() const { + return suite_resids; + } // getSuiteResids() + + vector RnaSuite::getSuiteResnames() const { + return suite_resnames; + } // getSuiteResnames() + double RnaSuite::getSuitenessCutoff() const { return suiteness_cutoff; } // getSuitenessCutoff() - double RnaSuite::hyperellipsoidDist(vector &dihedrals, - vector &reference, uint first_index, uint last_index) { + vector RnaSuite::getSuitenessScores() const { + return suiteness; + } // getSuitenessScores() + + double RnaSuite::hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index) { double unscaled_diff; double sum_scaled_powers = 0.0; @@ -661,14 +895,40 @@ namespace loos { unscaled_diff = abs(dihedrals[i] - reference[i]); // suitename program does not wrap unscaled coordinates // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff; - sum_scaled_powers += pow(unscaled_diff / dihedral_width[i], 3.0); + sum_scaled_powers += pow(unscaled_diff / width[i], 3.0); } - return cbrt(sum_scaled_powers); + return sum_scaled_powers; } // hyperellipsoidDist4() + bool RnaSuite::isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite) { + + double dom_to_sat; + double dom_dot_product = 0; + double sat_dot_product = 0; + + // If the point is in between the dominant and satellite reference + // suites, then the dot product between the vectors (point - dominant) + // and (satellite - dominant) and the dot product between the vectors + // (point - satellite) and (dominant - satellite) should both be + // positive, i.e. the cosine of the angles is positive. + for (uint i = 1; i <= 4; ++i) { + + dom_to_sat = satellite[i] - dominant[i]; + dom_dot_product += (dihedrals[i] - dominant[i]) * dom_to_sat; + // sat_dot_product += (dihedrals[i] - satellite[i]) * sat_to_dom + // sat_to_dom = -dom_to_sat + sat_dot_product += (satellite[i] - dihedrals[i]) * dom_to_sat; + + } + + return dom_dot_product > 0 && sat_dot_product > 0; + + } // isBetweenDomSatPair() + void RnaSuite::printBackboneAtoms() const { size_t i_plus; @@ -678,7 +938,7 @@ namespace loos { if (alpha_atoms.empty()) { - cout << "Warning: backbone atoms are empty" << endl; + cerr << "Warning: backbone atoms are empty" << endl; return; } @@ -722,27 +982,20 @@ namespace loos { cout << "\n ==== Printing backbone dihedrals ====\n" << endl; - if (alpha.empty()) { + if (suite_dihedrals.empty()) { - cout << "Warning: backbone dihedrals are empty" << endl; + cerr << "Warning: backbone dihedrals are empty" << endl; return; } - for (size_t i = 0; i < N_continuous_group; ++i) { - - for (size_t j = 0; j < N_residue[i]; ++j) { - - cout << boost::format( - "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % gamma_atoms[i][j][0]->resid() - % gamma_atoms[i][j][0]->resname() % delta[i][j] - % epsilon[i][j] % zeta[i][j] % alpha[i][j] % beta[i][j] - % gamma[i][j] % delta[i][j + 1]; - - } - - } + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format( + "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % suite_resids[i] % suite_resnames[i] % suite_dihedrals[i][0] + % suite_dihedrals[i][1] % suite_dihedrals[i][2] + % suite_dihedrals[i][3] % suite_dihedrals[i][4] + % suite_dihedrals[i][5] % suite_dihedrals[i][6]; } // printBackboneDihedrals() @@ -750,69 +1003,55 @@ namespace loos { cout << "\n ==== Printing reference suites ====\n" << endl; - if (reference_suite_dihedrals.empty()) { + if (reference_dihedrals.empty()) { - cout << "Warning: reference suites are empty" << endl; + cerr << "Warning: reference suites are empty" << endl; return; } - for (size_t i = 0; i < N_reference_ddg; ++i) { - - for (size_t j = 0; j < N_reference_suite[i]; ++j) { - + for (size_t i = 0; i < N_reference_ddg; ++i) + for (size_t j = 0; j < N_reference_suite[i]; ++j) cout << boost::format( "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") - % reference_suite_names[i][j] % reference_suite_ddgs[i] - % reference_suite_dihedrals[i][j][0] - % reference_suite_dihedrals[i][j][1] - % reference_suite_dihedrals[i][j][2] - % reference_suite_dihedrals[i][j][3] - % reference_suite_dihedrals[i][j][4] - % reference_suite_dihedrals[i][j][5] - % reference_suite_dihedrals[i][j][6]; - - } - - } + % reference_names[i][j] % reference_ddgs[i] + % reference_dihedrals[i][j][0] + % reference_dihedrals[i][j][1] + % reference_dihedrals[i][j][2] + % reference_dihedrals[i][j][3] + % reference_dihedrals[i][j][4] + % reference_dihedrals[i][j][5] + % reference_dihedrals[i][j][6]; } // printReferenceSuites() void RnaSuite::printSuites() const { - uint suite_counter = 0; - cout << "\n ==== Printing suites ====\n" << endl; if (suite_names.empty()) { - cout << "Warning: suites are empty" << endl; + cerr << "Warning: suites are empty" << endl; return; } - for (size_t i = 0; i < N_continuous_group; ++i) { - - for (size_t j = 0; j < N_residue[i]; ++j) { - - cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " - "%7.3f %7.3f %7.3f %7.3f\n") % gamma_atoms[i][j][0]->resid() - % gamma_atoms[i][j][0]->resname() - % suite_names[suite_counter] % suite_ddg[suite_counter] - % suiteness[suite_counter] % delta[i][j] % epsilon[i][j] - % zeta[i][j] % alpha[i][j] % beta[i][j] % gamma[i][j] - % delta[i][j + 1]; - - ++suite_counter; - - } - - } + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " + "%7.3f %7.3f %7.3f %7.3f\n") % suite_resids[i] + % suite_resnames[i] % suite_names[i] % suite_ddgs[i] + % suiteness[i] % suite_dihedrals[i][0] % suite_dihedrals[i][1] + % suite_dihedrals[i][2] % suite_dihedrals[i][3] + % suite_dihedrals[i][4] % suite_dihedrals[i][5] + % suite_dihedrals[i][6]; - } // printReferenceSuites() + } // printSuites() void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { + suiteness_cutoff = suiteness_cutoff_; + } // setSuitenessCutoff() } + diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index 8375cc040..b3d68e2d1 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -41,16 +41,17 @@ namespace loos { public: + RnaSuite(const AtomicGroup &group, const string suite_defintion, + const double suiteness_cutoff_); + + RnaSuite(const AtomicGroup &group, const string suite_definition); + RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_); RnaSuite(const AtomicGroup &group); RnaSuite(); - //! Method to assign residues to a delta(i-1), delta, gamma index - size_t assignDDGIndex(double dihedral, vector &min, - vector &max, uint increment, uint &ddg_index); - //! Method to assign residues to backbone suites from Richardson et al. /** * This method assigns residues to one of the 46 backbone suites @@ -58,7 +59,7 @@ namespace loos { * residue is defined from delta of the previous residue to delta of * the current residue. */ - void assignRichardsonSuites(); + void assignSuitenameSuites(); //! Method to calculate backbone dihedrals for each RNA residue /** @@ -67,28 +68,9 @@ namespace loos { */ void calculateBackboneDihedrals(); - //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] - double calculateDihedral(const AtomicGroup &group); - - //! Method to check the size of a vector of continuous groups - void checkContinuousGroupSize( - const vector> &group_vector, - const size_t target_size, const string dihedral_name) const; - - //! Method to check the size of a vector of residues - void checkResidueSize(const vector &residue_vector, - const size_t target_size, const string dihedral_name, - const size_t group_index) const; - //! Method to define suites used for assignment from an existing scheme void defineSuites(const string suite_definition); - //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string filename); - - //! Method to define suites used for assignment from suitename - void defineSuitesFromSuitename(); - //! Method to extract RNA backbone atoms from an AtomicGroup /** * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', @@ -96,12 +78,26 @@ namespace loos { */ void extractRnaBackboneAtoms(const AtomicGroup &group); + //! Method to return the current indices into delta delta gamma clusters + vector getSuiteDDGs() const; + + //! Method to return the current backbone dihedrals + vector> getSuiteDihedrals() const; + + //! Method to return the current assigned suite names + vector getSuiteNames() const; + + //! Method to return the suite residue indices + vector getSuiteResids() const; + + //! Method to return the suite residue names + vector getSuiteResnames() const; + //! Method to return the cutoff for the suiteness score of non-outliers double getSuitenessCutoff() const; - //! Calculate a scaled hyperellipsoid distance between two points - double hyperellipsoidDist(vector &dihedrals, - vector &reference, uint first_index, uint last_index); + //! Method to return the current suiteness scores + vector getSuitenessScores() const; //! Method to print groups of backbone atoms for each dihedral void printBackboneAtoms() const; @@ -120,16 +116,53 @@ namespace loos { private: + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index); + + //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] + double calculateDihedral(const AtomicGroup &group); + + //! Method to check the size of a vector of continuous groups + void checkContinuousGroupSize( + const vector> &group_vector, + const size_t target_size, const string dihedral_name) const; + + //! Method to check the size of a vector of residues + void checkResidueSize(const vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const; + + //! Method to define suites used for assignment from a file + void defineSuitesFromFile(const string filename); + + //! Method to define suites used for assignment from suitename + void defineSuitesFromSuitename(); + + //! Method to test whether a point is in between two reference points + bool isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite); + + //! Calculate a scaled hyperellipsoid distance between two points + double hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index); + // Reference suites used for assignment - vector>> reference_suite_dihedrals; - vector> reference_suite_names; - vector reference_suite_ddgs; + vector>> reference_dihedrals; + vector> reference_names; + vector reference_ddgs; + vector> dominant_suites; // Widths used to scale each dihedral dimension vector dihedral_width; - // Satellite widths used to scale overlapping clusters - vector satellite_width; + // Alternative widths used to scale dominant-satellite pairs + vector> dominant_width; + vector> satellite_width; + + // Index into dominant-satellite pair widths + vector> dom_sat_pair_index; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -150,19 +183,14 @@ namespace loos { vector> epsilon_atoms; vector> zeta_atoms; - // Vector of vectors of backbone dihedrals - vector> alpha; - vector> beta; - vector> gamma; - vector> delta; - vector> epsilon; - vector> zeta; - - // Output: suite name (composed of a number-like character for the - // 5' hemi-nucleotide and a letter-like character for the - // 3' hemi-nucleotide) and suiteness score + // Suite residue ids, residue names, and dihedrals + vector suite_resids; + vector suite_resnames; + vector> suite_dihedrals; + + // Assigned suite names, ddg indices, and suiteness scores vector suite_names; - vector suite_ddg; + vector suite_ddgs; vector suiteness; // Other internal variables diff --git a/src/RnaSuite.i b/src/RnaSuite.i new file mode 100644 index 000000000..444b78aed --- /dev/null +++ b/src/RnaSuite.i @@ -0,0 +1,27 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +%header %{ +#include +%} + +%include "RnaSuite.hpp" + diff --git a/src/loos.i b/src/loos.i index c251fb2b2..adbec630f 100644 --- a/src/loos.i +++ b/src/loos.i @@ -52,7 +52,7 @@ namespace loos { %template(DoubleVectorMatrix) std::vector< std::vector >; %template(IntVector) std::vector; %template(UIntVector) std::vector; - +%template(StringVector) std::vector; %include "exceptions.i" @@ -81,3 +81,4 @@ namespace loos { %include "gro.i" %include "utils_structural.i" %include "Weights.i" +%include "RnaSuite.i" From 1c8c2b37b049cd5daa67c5e3fc3649cf9b5d3dbf Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Wed, 15 Apr 2020 04:05:22 -0400 Subject: [PATCH 38/41] Read reference suite definitions from file; remove C++11 features --- Tools/suitename_definitions.dat | 82 ++++++ src/RnaSuite.cpp | 459 ++++++++++++++++---------------- src/RnaSuite.hpp | 47 ++-- 3 files changed, 338 insertions(+), 250 deletions(-) create mode 100644 Tools/suitename_definitions.dat diff --git a/Tools/suitename_definitions.dat b/Tools/suitename_definitions.dat new file mode 100644 index 000000000..1969a283f --- /dev/null +++ b/Tools/suitename_definitions.dat @@ -0,0 +1,82 @@ +# Reference suite dihedrals +# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035 +suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093 +suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457 +suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512 +suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444 +suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956 +suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047 +suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213 +suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287 +suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035 +suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757 +suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000 +suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507 +suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648 +suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470 +suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833 +suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483 +suite 33m +suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841 +suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771 +suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950 +suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258 +suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590 +suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730 +suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686 +suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386 +suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733 +suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677 +suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744 +suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275 +suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067 +suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967 +suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424 +suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956 +suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983 +suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900 +suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262 +suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378 +suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417 +suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783 +suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565 +suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911 +suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600 +suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600 +suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622 +suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650 +suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760 +suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804 +suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264 +suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131 +suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623 +suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350 +suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087 +suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120 +suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458 +# Default widths for hyperellipsoid distance +# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +width 28.000 60.000 55.000 50.000 70.000 35.000 28.000 +# Dominant-satellite pairs. Must come after suite and width +# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth +domsat 1m 1a 4 32.000 64.000 +domsat 1L 1a 1 18.000 70.000 4 18.000 70.000 +domsat &a 1a 1 20.000 60.000 2 20.000 60.000 +domsat 1f 1c 4 47.000 65.000 +domsat 1[ 1b 4 34.000 56.000 +domsat 4a 0a 1 40.000 50.000 2 40.000 50.000 +domsat #a 0a 1 26.000 36.000 2 26.000 36.000 +domsat 0i 6n 4 60.000 60.000 +domsat 6j 6n 4 60.000 60.000 +# Filter ranges +# Min Max +delta 60.000 105.000 +delta 125.000 165.000 +epsilon 155.000 310.000 +zeta 25.000 335.000 +alpha 25.000 335.000 +beta 50.000 290.000 +gamma 20.000 95.000 +gamma 140.000 215.000 +gamma 260.000 335.000 diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 02429b556..7a3bdf006 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -194,8 +194,8 @@ namespace loos { // Filter on epsilon. Values outside of this range are // indicative of a misfit sugar pucker. - if (suite_dihedrals[i][1] < filter_min[0] - || suite_dihedrals[i][1] > filter_max[0]) { + if (suite_dihedrals[i][1] < ezab_min[0] + || suite_dihedrals[i][1] > ezab_max[0]) { suite_names.push_back("!e"); suite_ddgs.push_back("!!!"); @@ -205,8 +205,8 @@ namespace loos { } // Filter on zeta - if (suite_dihedrals[i][2] < filter_min[1] - || suite_dihedrals[i][2] > filter_max[1]) { + if (suite_dihedrals[i][2] < ezab_min[1] + || suite_dihedrals[i][2] > ezab_max[1]) { suite_names.push_back("!z"); suite_ddgs.push_back("!!!"); @@ -216,8 +216,8 @@ namespace loos { } // Filter on alpha - if (suite_dihedrals[i][3] < filter_min[2] - || suite_dihedrals[i][3] > filter_max[2]) { + if (suite_dihedrals[i][3] < ezab_min[2] + || suite_dihedrals[i][3] > ezab_max[2]) { suite_names.push_back("!a"); suite_ddgs.push_back("!!!"); @@ -227,8 +227,8 @@ namespace loos { } // Filter on beta - if (suite_dihedrals[i][4] < filter_min[3] - || suite_dihedrals[i][4] > filter_max[3]) { + if (suite_dihedrals[i][4] < ezab_min[3] + || suite_dihedrals[i][4] > ezab_max[3]) { suite_names.push_back("!b"); suite_ddgs.push_back("!!!"); @@ -411,14 +411,14 @@ namespace loos { } // calculateDihedral() void RnaSuite::checkContinuousGroupSize( - const vector> &group_vector, + const vector > &group_vector, const size_t target_size, const string dihedral_name) const { if (group_vector.size() != target_size) { cerr << boost::format("Error: different number of continuous " - "groups for alpha (%d) and %s (%d)\n") % target_size - % dihedral_name % group_vector.size(); + "groups for alpha (%d) and %s (%d)") % target_size + % dihedral_name % group_vector.size() << endl; throw(LOOSError()); } @@ -433,234 +433,240 @@ namespace loos { if (residue_vector.size() != target_size) { cerr << boost::format("Error: different number of residues in " - "continuous group %d for alpha (%d) and %s (%d)\n") - % group_index % target_size % dihedral_name - % residue_vector.size(); + "continuous group %d for alpha (%d) and %s (%d)") % group_index + % target_size % dihedral_name % residue_vector.size() << endl; + throw(LOOSError()); } } // checkResidueSize() - void RnaSuite::defineSuites(const string suite_definition) { + void RnaSuite::defineSuites(const string& suite_definition) { + // Clear vectors for reference suites reference_dihedrals.clear(); reference_names.clear(); reference_ddgs.clear(); + dihedral_width.clear(); + dominant_suites.clear(); + dom_sat_pair_index.clear(); + dominant_width.clear(); + satellite_width.clear(); + delta_min.clear(); + delta_max.clear(); + gamma_min.clear(); + gamma_max.clear(); + ezab_min = vector(4); + ezab_max = vector(4); + N_reference_ddg = 0; + N_reference_suite.clear(); - if (suite_definition == "suitename" - || suite_definition == "richardson") defineSuitesFromSuitename(); + if (suite_definition == "suitename") + defineSuitesFromFile("suitename_definitions.dat"); - else { + else defineSuitesFromFile(suite_definition); - cerr << boost::format("%s is not a recognized suite definition\n") - % suite_definition; - cerr << "Must be one of: suitename" << endl; - throw(LOOSError()); + } // defineSuites() - } + void RnaSuite::defineSuitesFromFile(const string& filename) { - } // defineSuites() + size_t ddg_index; + size_t dom_index; + size_t sat_index; + size_t position; + string field; + string line; + string record; + vector dihedrals(7); - void RnaSuite::defineSuitesFromFile(const string filename) { + // Store dominant-satellite pairs + vector domsat_ddg; + vector domsat_dom; + vector domsat_sat; + vector > domsat_dihedral; + vector > domsat_dom_width; + vector > domsat_sat_width; - // TODO read suite definitions from file - cerr << "Reading suite definitions from a file is not yet supported\n" - "Go yell at Chapin" << endl; + // Read file contents + ifstream ifs(filename.c_str()); + if (!ifs) throw(FileOpenError(filename)); - } // defineSuitesFromFile() + while (getline(ifs, line)) { - void RnaSuite::defineSuitesFromSuitename() { - - size_t suite_size; - - // Means of dihedral angles - reference_dihedrals = { - { // ddg index 0: C3' C3' plus - { 81.495, 212.250, 288.831, 294.967, 173.990, 53.550, 81.035}, - { 83.513, 218.120, 291.593, 292.247, 222.300, 58.067, 86.093}, - { 85.664, 245.014, 268.257, 303.879, 138.164, 61.950, 79.457}, - { 82.112, 190.682, 264.945, 295.967, 181.839, 51.455, 81.512}, - { 83.414, 217.400, 222.006, 302.856, 160.719, 49.097, 82.444}, - { 85.072, 216.324, 173.276, 289.320, 164.132, 45.876, 84.956}, - { 83.179, 210.347, 121.474, 288.568, 157.268, 49.347, 81.047}, - { 80.888, 218.636, 290.735, 167.447, 159.565, 51.326, 85.213}, - { 83.856, 238.750, 256.875, 69.562, 170.200, 52.800, 85.287}, - { 85.295, 244.085, 203.815, 65.880, 181.130, 54.680, 86.035}, - { 79.671, 202.471, 63.064, 68.164, 143.450, 49.664, 82.757}, - { 84.000, 195.000, 146.000, 170.000, 170.000, 52.000, 84.000} - }, { // ddg index 1: C3' C3' trans - { 80.514, 200.545, 280.510, 249.314, 82.662, 167.890, 85.507}, - { 80.223, 196.591, 291.299, 153.060, 194.379, 179.061, 83.648}, - { 81.395, 203.030, 294.445, 172.195, 138.540, 175.565, 84.470}, - { 87.417, 223.558, 80.175, 66.667, 109.150, 176.475, 83.833}, - { 86.055, 246.502, 100.392, 73.595, 213.752, 183.395, 85.483} - }, { // ddg index 2: C3' C3' minus - }, { // ddg index 3: C3' C2' plus - { 84.215, 215.014, 288.672, 300.420, 177.476, 58.307, 144.841}, - { 82.731, 220.463, 288.665, 296.983, 221.654, 54.213, 143.771}, - { 84.700, 226.400, 168.336, 292.771, 177.629, 48.629, 147.950}, - { 83.358, 206.042, 277.567, 195.700, 161.600, 50.750, 145.258}, - { 82.614, 206.440, 52.524, 163.669, 148.421, 50.176, 147.590}, - { 84.285, 236.600, 220.400, 68.300, 200.122, 53.693, 145.730}, - { 84.457, 213.286, 69.086, 75.500, 156.671, 57.486, 147.686} - }, { // ddg index 4: C3' C2' trans - { 81.200, 199.243, 288.986, 180.286, 194.743, 178.200, 147.386}, - { 82.133, 204.933, 69.483, 63.417, 115.233, 176.283, 145.733} - }, { // ddg index 5: C3' C2' minus - { 83.977, 216.508, 287.192, 297.254, 225.154, 293.738, 150.677}, - { 84.606, 232.856, 248.125, 63.269, 181.975, 295.744, 149.744}, - { 83.000, 196.900, 65.350, 60.150, 138.425, 292.550, 154.275} - }, { // ddg index 6: C2' C3' plus - {145.399, 260.339, 288.756, 288.444, 192.733, 53.097, 84.067}, - {146.275, 259.783, 169.958, 298.450, 169.583, 50.908, 83.967}, - {149.286, 223.159, 139.421, 284.559, 158.107, 47.900, 84.424}, - {148.006, 191.944, 146.231, 289.288, 150.781, 42.419, 84.956}, - {148.028, 256.922, 165.194, 204.961, 165.194, 49.383, 82.983}, - {145.337, 262.869, 79.588, 203.863, 189.688, 58.000, 84.900}, - {148.992, 270.596, 240.892, 62.225, 176.271, 53.600, 87.262}, - {149.822, 249.956, 187.678, 80.433, 198.133, 61.000, 89.378}, - {146.922, 241.222, 88.894, 59.344, 160.683, 52.333, 83.417}, - {141.900, 258.383, 286.517, 178.267, 165.217, 48.350, 84.783} - }, { // ddg index 7: C2' C3' trans - {147.782, 260.712, 290.424, 296.200, 177.282, 175.594, 86.565}, - {143.722, 227.256, 203.789, 73.856, 216.733, 194.444, 80.911}, - {148.717, 274.683, 100.283, 80.600, 248.133, 181.817, 82.600}, - {150.311, 268.383, 84.972, 63.811, 191.483, 176.644, 85.600}, - {141.633, 244.100, 66.056, 71.667, 122.167, 182.200, 83.622} - }, { // ddg index 8: C2' C3' minus - {149.070, 249.780, 111.520, 278.370, 207.780, 287.820, 86.650} - }, { // ddg index 9: C2' C2' plus - {146.383, 259.402, 291.275, 291.982, 210.048, 54.412, 147.760}, - {145.256, 244.622, 162.822, 294.159, 171.630, 45.900, 145.804}, - {147.593, 248.421, 112.086, 274.943, 164.764, 56.843, 146.264}, - {150.077, 260.246, 213.785, 71.900, 207.638, 56.715, 148.131}, - {146.415, 257.831, 89.597, 67.923, 173.051, 55.513, 147.623}, - {142.900, 236.550, 268.800, 180.783, 185.133, 54.467, 143.350} - }, { // ddg index 10: C2' C2' trans - {149.863, 247.562, 170.488, 277.938, 84.425, 176.413, 148.087}, - {143.940, 258.200, 298.240, 279.640, 183.680, 183.080, 145.120} - }, { // ddg index 11: C2' C2' minus - {147.342, 256.475, 295.508, 287.408, 194.525, 293.725, 150.458} - } }; - - // Get number of ddg clusters and number of suites in each ddg cluster - // Dominant suites lists indices of the dominant suite associated with - // a satellite suite. A value of reference_suite_dihedrals.size() - // that this suite is neither dominant nor satellite. A dominant suite - // will point to its own index. - N_reference_ddg = reference_dihedrals.size(); - N_reference_suite.clear(); - dominant_suites.clear(); - for (size_t i = 0; i < N_reference_ddg; ++i) { + record = parseStringAs(line, 0, 8); + + if (record.empty() || record[0] == '#') continue; + + else if (record == "suite") { + + // Define a reference suite + + // Get delta delta gamma cluster + field = parseStringAs( + line, 16, min((size_t) 8, line.size() - 16)); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) + if (field == reference_ddgs[i]) { + ddg_index = i; + break; + } + + // This is a new DDG cluster + if (ddg_index == N_reference_ddg) { + reference_ddgs.push_back(field); + reference_dihedrals.push_back(vector >()); + reference_names.push_back(vector()); + ++N_reference_ddg; + N_reference_suite.push_back(0); + } + + // Get suite name + field = parseStringAs(line, 8, 8); + if (field.empty()) continue; + reference_names[ddg_index].push_back(field); + + // Get reference suite dihedrals + dihedrals[0] = parseStringAs(line, 24, 8); + dihedrals[1] = parseStringAs(line, 32, 8); + dihedrals[2] = parseStringAs(line, 40, 8); + dihedrals[3] = parseStringAs(line, 48, 8); + dihedrals[4] = parseStringAs(line, 56, 8); + dihedrals[5] = parseStringAs(line, 64, 8); + dihedrals[6] = parseStringAs(line, 72, 8); + reference_dihedrals[ddg_index].push_back(dihedrals); + + ++N_reference_suite[ddg_index]; + + } else if (record == "width") { + + // Get default widths for hyperellipsoid distance + dihedral_width.push_back(parseStringAs(line, 8, 8)); + dihedral_width.push_back(parseStringAs(line, 16, 8)); + dihedral_width.push_back(parseStringAs(line, 24, 8)); + dihedral_width.push_back(parseStringAs(line, 32, 8)); + dihedral_width.push_back(parseStringAs(line, 40, 8)); + dihedral_width.push_back(parseStringAs(line, 48, 8)); + dihedral_width.push_back(parseStringAs(line, 56, 8)); + + } else if (record == "domsat") { + + // Define a dominant-satellite pair + + // Get index of dominant suite + field = parseStringAs(line, 16, 8); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) { + for (size_t j = 0; j < N_reference_suite[i]; ++j) + if (field == reference_names[i][j]) { + ddg_index = i; + dom_index = j; + break; + } + if (ddg_index != N_reference_ddg) break; + } + + if (ddg_index == N_reference_ddg) { + cerr << boost::format( + "Warning: dominant suite %s was not defined in file %s") + % field % filename << endl; + continue; + } + + // Get index of satellite suite + field = parseStringAs(line, 8, 8); + sat_index = N_reference_suite[ddg_index]; + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) + if (field == reference_names[ddg_index][j]) { + sat_index = j; + break; + } + + if (sat_index == N_reference_suite[ddg_index]) { + cerr << boost::format( + "Warning: satellite suite %s was not defined in file %s") + % field % filename << endl; + continue; + } + + domsat_ddg.push_back(ddg_index); + domsat_dom.push_back(dom_index); + domsat_sat.push_back(sat_index); + + // Loop over dihedrals with alternate widths + vector dihedral_indices; + vector dom_width; + vector sat_width; + position = 24; + while (position < line.size()) { + dihedral_indices.push_back( + parseStringAs(line, position, 8)); + sat_width.push_back( + parseStringAs(line, position + 8, 8)); + dom_width.push_back( + parseStringAs(line, position + 16, 8)); + position += 24; + } + domsat_dihedral.push_back(dihedral_indices); + domsat_dom_width.push_back(dom_width); + domsat_sat_width.push_back(sat_width); + + } else if (record == "delta") { + + delta_min.push_back(parseStringAs(line, 8, 8)); + delta_max.push_back(parseStringAs(line, 16, 8)); + + } else if (record == "epsilon") { + + ezab_min[0] = parseStringAs(line, 8, 8); + ezab_max[0] = parseStringAs(line, 16, 8); + + } else if (record == "zeta") { + + ezab_min[1] = parseStringAs(line, 8, 8); + ezab_max[1] = parseStringAs(line, 16, 8); + + } else if (record == "alpha") { - suite_size = reference_dihedrals[i].size(); - N_reference_suite.push_back(suite_size); - vector dom_suites(suite_size, suite_size); - dominant_suites.push_back(dom_suites); + ezab_min[2] = parseStringAs(line, 8, 8); + ezab_max[2] = parseStringAs(line, 16, 8); + } else if (record == "beta") { + + ezab_min[3] = parseStringAs(line, 8, 8); + ezab_max[3] = parseStringAs(line, 16, 8); + + } else if (record == "gamma") { + + gamma_min.push_back(parseStringAs(line, 8, 8)); + gamma_max.push_back(parseStringAs(line, 16, 8)); + + } else cerr << boost::format( + "Warning: Unrecognized record %s in suite definition from %s") + % record % filename << endl; + + } // Loop over lines in file + + // Construct vectors for dominant-satellite pairs + for (size_t i = 0; i < N_reference_ddg; ++i) { + dominant_suites.push_back( + vector(N_reference_suite[i], N_reference_suite[i])); + dom_sat_pair_index.push_back( + vector(N_reference_suite[i], domsat_dihedral.size())); } - // 1m, 1L, and &a are satellites of 1a - dominant_suites[0][0] = 0; - dominant_suites[0][1] = 0; - dominant_suites[0][2] = 0; - dominant_suites[0][3] = 0; - - // 1f is a satellite of 1c - dominant_suites[1][1] = 1; - dominant_suites[1][2] = 1; - - // 1[ is a satellite of 1b - dominant_suites[3][0] = 0; - dominant_suites[3][1] = 0; - - // 4a and #a are satellites of 0a - dominant_suites[6][2] = 2; - dominant_suites[6][1] = 2; - dominant_suites[6][3] = 2; - - // 0i nd 6j are satellites of 6n - dominant_suites[7][3] = 3; - dominant_suites[7][2] = 3; - dominant_suites[7][4] = 3; - - // Two-character suite name - reference_names = { - {"1a", "1m", "1L", "&a", "7a", "3a", "9a", "1g", "7d", "3d", "5d", - "3g"}, - {"1e", "1c", "1f", "5j", "5n"}, - { }, - {"1b", "1[", "3b", "1z", "5z", "7p", "5p"}, - {"1t", "5q"}, - {"1o", "7r", "5r"}, - {"2a", "4a", "0a", "#a", "4g", "6g", "8d", "4d", "6d", "2g"}, - {"2h", "4n", "0i", "6n", "6j"}, - {"0k"}, - {"2[", "4b", "0b", "4p", "6p", "2z"}, - {"4s", "2u"}, - {"2o"} - }; - - // Widths used to scale each dihedral dimension - dihedral_width = {28.0, 60.0, 55.0, 50.0, 70.0, 35.0, 28.0}; - - // Alternative widths used to scale dominant-satellite pairs - dominant_width = { - {28.0, 60.0, 55.0, 50.0, 64.0, 35.0, 28.0}, - {28.0, 70.0, 55.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 60.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 65.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 56.0, 35.0, 28.0}, - {28.0, 50.0, 50.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 36.0, 36.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0} - }; - - satellite_width = { - {28.0, 60.0, 55.0, 50.0, 32.0, 35.0, 28.0}, - {28.0, 18.0, 55.0, 50.0, 18.0, 35.0, 28.0}, - {28.0, 20.0, 20.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 47.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 34.0, 35.0, 28.0}, - {28.0, 40.0, 40.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 26.0, 26.0, 50.0, 70.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - {28.0, 60.0, 55.0, 50.0, 60.0, 35.0, 28.0}, - }; - - // Index into dominant-satellite pair widths - dom_sat_pair_index = { - {9, 0, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9}, - {9, 9, 3, 9, 9}, - { }, - {9, 4, 9, 9, 9, 9, 9}, - {9, 9}, - {9, 9, 9}, - {9, 5, 9, 6, 9, 9, 9, 9, 9, 9}, - {9, 9, 7, 9, 8}, - {9}, - {9, 9, 9, 9, 9, 9}, - {9, 9}, - {9} - }; - - // Delta(i-1), delta, gamma index. Delta can be C3' endo ("3") or - // C2' endo ("2"). Gamma can be plus ("p"), trans ("t"), or minus ("m"). - reference_ddgs = {"33p", "33t", "33m", "32p", "32t", "32m", "23p", - "23t", "23m", "22p", "22t", "22m"}; - - // Boundaries for allowed regions of delta(i-1), delta, and gamma - delta_min = { 60.0, 125.0}; - delta_max = {105.0, 165.0}; - gamma_min = { 20.0, 140.0, 260.0}; - gamma_max = { 95.0, 215.0, 335.0}; - - // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - filter_min = {155.0, 25.0, 25.0, 50.0}; - filter_max = {310.0, 335.0, 335.0, 290.0}; - - } // defineSuitesFromSuitename() + for (size_t i = 0; i < domsat_dihedral.size(); ++i) { + dominant_suites[domsat_ddg[i]][domsat_dom[i]] = domsat_dom[i]; + dominant_suites[domsat_ddg[i]][domsat_sat[i]] = domsat_dom[i]; + dom_sat_pair_index[domsat_ddg[i]][domsat_sat[i]] = i; + dominant_width.push_back(dihedral_width); + satellite_width.push_back(dihedral_width); + for (size_t j = 0; j < domsat_dihedral[i].size(); ++j) { + dominant_width[i][domsat_dihedral[i][j]] = domsat_dom_width[i][j]; + satellite_width[i][domsat_dihedral[i][j]] = domsat_sat_width[i][j]; + } + } + + } // defineSuitesFromFile() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -697,18 +703,19 @@ namespace loos { // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into // one AtomicGroup. Use raw string literal R"()" to avoid escaping " AtomicGroup backbone = selectAtoms(group, - R"(name =~ "^(P|C[345]'|O[35]')$")"); + "(name =~ \"^(P|C[345]'|O[35]')$\")"); // Split by resid and loop over residues - for (AtomicGroup residue : backbone.splitByResidue()) { + vector backbone_residues = backbone.splitByResidue(); + for (size_t i = 0; i < backbone_residues.size(); ++i) { // Select RNA backbone atoms from residue - residue_p = selectAtoms(residue, R"(name == "P")"); - residue_o5p = selectAtoms(residue, R"(name == "O5'")"); - residue_c5p = selectAtoms(residue, R"(name == "C5'")"); - residue_c4p = selectAtoms(residue, R"(name == "C4'")"); - residue_c3p = selectAtoms(residue, R"(name == "C3'")"); - residue_o3p = selectAtoms(residue, R"(name == "O3'")"); + residue_p = selectAtoms(backbone_residues[i], "(name == \"P\")"); + residue_o5p = selectAtoms(backbone_residues[i], "(name == \"O5'\")"); + residue_c5p = selectAtoms(backbone_residues[i], "(name == \"C5'\")"); + residue_c4p = selectAtoms(backbone_residues[i], "(name == \"C4'\")"); + residue_c3p = selectAtoms(backbone_residues[i], "(name == \"C3'\")"); + residue_o3p = selectAtoms(backbone_residues[i], "(name == \"O3'\")"); // If any atom besides P is missing, skip this residue and start a // new continuous group @@ -859,7 +866,7 @@ namespace loos { return suite_ddgs; } // getSuiteDDGs() - vector> RnaSuite::getSuiteDihedrals() const { + vector > RnaSuite::getSuiteDihedrals() const { return suite_dihedrals; } // getSuiteDihedrals() diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index b3d68e2d1..05ad1b7c5 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -69,7 +69,7 @@ namespace loos { void calculateBackboneDihedrals(); //! Method to define suites used for assignment from an existing scheme - void defineSuites(const string suite_definition); + void defineSuites(const string& suite_definition); //! Method to extract RNA backbone atoms from an AtomicGroup /** @@ -82,7 +82,7 @@ namespace loos { vector getSuiteDDGs() const; //! Method to return the current backbone dihedrals - vector> getSuiteDihedrals() const; + vector > getSuiteDihedrals() const; //! Method to return the current assigned suite names vector getSuiteNames() const; @@ -125,7 +125,7 @@ namespace loos { //! Method to check the size of a vector of continuous groups void checkContinuousGroupSize( - const vector> &group_vector, + const vector > &group_vector, const size_t target_size, const string dihedral_name) const; //! Method to check the size of a vector of residues @@ -134,10 +134,7 @@ namespace loos { const size_t group_index) const; //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string filename); - - //! Method to define suites used for assignment from suitename - void defineSuitesFromSuitename(); + void defineSuitesFromFile(const string& filename); //! Method to test whether a point is in between two reference points bool isBetweenDomSatPair(const vector &dihedrals, @@ -149,20 +146,22 @@ namespace loos { uint first_index, uint last_index); // Reference suites used for assignment - vector>> reference_dihedrals; - vector> reference_names; + vector >> reference_dihedrals; + vector > reference_names; vector reference_ddgs; - vector> dominant_suites; // Widths used to scale each dihedral dimension vector dihedral_width; - // Alternative widths used to scale dominant-satellite pairs - vector> dominant_width; - vector> satellite_width; + // Indices of dominant-satellite pairs + vector > dominant_suites; // Index into dominant-satellite pair widths - vector> dom_sat_pair_index; + vector > dom_sat_pair_index; + + // Alternative widths used to scale dominant-satellite pairs + vector > dominant_width; + vector > satellite_width; // Boundaries for allowed regions of delta(i-1), delta, and gamma vector delta_min; @@ -170,23 +169,23 @@ namespace loos { vector gamma_min; vector gamma_max; - // Boundaries used to filter suites based on epsilon, zeta, alpha, beta - vector filter_min; - vector filter_max; + // Boundaries for allowed regions of epsilon, zeta, alpha, beta + vector ezab_min; + vector ezab_max; // Vector of continuous groups, composed of vectors of AtomicGroups // for each residue within a continuous group - vector> alpha_atoms; - vector> beta_atoms; - vector> gamma_atoms; - vector> delta_atoms; - vector> epsilon_atoms; - vector> zeta_atoms; + vector > alpha_atoms; + vector > beta_atoms; + vector > gamma_atoms; + vector > delta_atoms; + vector > epsilon_atoms; + vector > zeta_atoms; // Suite residue ids, residue names, and dihedrals vector suite_resids; vector suite_resnames; - vector> suite_dihedrals; + vector > suite_dihedrals; // Assigned suite names, ddg indices, and suiteness scores vector suite_names; From 282dd0c029cf651fa38e58b277023b2020301c98 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Tue, 28 Jul 2020 17:16:04 -0400 Subject: [PATCH 39/41] Moved file containing default suite definitions to new share directory --- share/suitename_definitions.dat | 82 +++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 share/suitename_definitions.dat diff --git a/share/suitename_definitions.dat b/share/suitename_definitions.dat new file mode 100644 index 000000000..1969a283f --- /dev/null +++ b/share/suitename_definitions.dat @@ -0,0 +1,82 @@ +# Reference suite dihedrals +# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035 +suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093 +suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457 +suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512 +suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444 +suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956 +suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047 +suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213 +suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287 +suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035 +suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757 +suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000 +suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507 +suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648 +suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470 +suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833 +suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483 +suite 33m +suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841 +suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771 +suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950 +suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258 +suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590 +suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730 +suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686 +suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386 +suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733 +suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677 +suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744 +suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275 +suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067 +suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967 +suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424 +suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956 +suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983 +suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900 +suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262 +suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378 +suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417 +suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783 +suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565 +suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911 +suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600 +suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600 +suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622 +suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650 +suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760 +suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804 +suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264 +suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131 +suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623 +suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350 +suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087 +suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120 +suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458 +# Default widths for hyperellipsoid distance +# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +width 28.000 60.000 55.000 50.000 70.000 35.000 28.000 +# Dominant-satellite pairs. Must come after suite and width +# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth +domsat 1m 1a 4 32.000 64.000 +domsat 1L 1a 1 18.000 70.000 4 18.000 70.000 +domsat &a 1a 1 20.000 60.000 2 20.000 60.000 +domsat 1f 1c 4 47.000 65.000 +domsat 1[ 1b 4 34.000 56.000 +domsat 4a 0a 1 40.000 50.000 2 40.000 50.000 +domsat #a 0a 1 26.000 36.000 2 26.000 36.000 +domsat 0i 6n 4 60.000 60.000 +domsat 6j 6n 4 60.000 60.000 +# Filter ranges +# Min Max +delta 60.000 105.000 +delta 125.000 165.000 +epsilon 155.000 310.000 +zeta 25.000 335.000 +alpha 25.000 335.000 +beta 50.000 290.000 +gamma 20.000 95.000 +gamma 140.000 215.000 +gamma 260.000 335.000 From 272c05cc7031003e963dcb901d28b7f31ed8fb52 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Wed, 5 Aug 2020 22:41:46 -0400 Subject: [PATCH 40/41] Add fullhelp; make path to suite definitions file a required positional argument --- SConstruct | 4 +- Tools/rna_suites.cpp | 113 ++++++++++++++++++++++++++++---- Tools/suitename_definitions.dat | 82 ----------------------- share/SConscript | 37 +++++++++++ src/RnaSuite.cpp | 42 +++--------- src/RnaSuite.hpp | 54 ++++++++++----- 6 files changed, 187 insertions(+), 145 deletions(-) delete mode 100644 Tools/suitename_definitions.dat create mode 100644 share/SConscript diff --git a/SConstruct b/SConstruct index 6983df113..c95ec6002 100644 --- a/SConstruct +++ b/SConstruct @@ -269,7 +269,9 @@ else: loos_tools = SConscript('Tools/SConscript') -loos_core = loos + loos_scripts +loos_share = SConscript('share/SConscript') + +loos_core = loos + loos_scripts + loos_share # Automatically setup build targets based on package_list diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index 46b620ac8..cccd2c10f 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -39,19 +39,104 @@ namespace po = loos::OptionsFramework::po; string fullHelpMessage(void) { string full_help_message = +"SYNOPSIS\n" +" Assign backbone suites to RNAs based on backbone dihedrals.\n" "\n" -" SYNOPSIS\n" +"DESCRIPTION\n" +" The goal of this tool is to assign continuous RNA dinucleotides to a\n" +" cluster called a \"suite\" based on the conformation of backbone dihedrals.\n" +" The idea comes from Richardson et al. (2008) RNA 14, 465-481. The\n" +" dinucleotide for a residue runs from delta (C5'-C4'-C3'-O3') of the previous\n" +" residue to delta of the current residue, encompassing seven continuous\n" +" dihedrals. A suite is a pre-defined cluster in this 7D space, named by a\n" +" two-character string. Examples are \"1a\" or \"5z\".\n" "\n" -" Assigns backbone suites to RNAs based on backbone dihedrals\n" +" The first step is to search the given selection for RNA backbone atoms, i.e.\n" +" atoms named \"P\", \"O5'\", \"C5'\", \"C4'\", \"C3'\", or \"O3'\". These atoms are\n" +" split by residue. Valid dinucleotides are sets of delta-to-delta backbone\n" +" atoms with sequential resids. Once the set of valid dinucleotides is\n" +" determined, the tool will loop over the trajectory and assign each\n" +" dinucleotide to a suite for each frame.\n" "\n" -" DESCRIPTION\n" +" Suite assignment occurs in two stages. The clusters are well-separated in\n" +" the 3D subspace of delta(i-1), delta, and gamma. So the first stage is to\n" +" assign each delta to one of two ranges of values consistent with either a\n" +" C3'-endo (3) or C2'-endo (2) sugar pucker and to assign gamma to one of\n" +" three ranges of values: gauche plus (p), gauche minus (m), or trans (t). The\n" +" result is a three-character string called a ddg index. Examples are \"33p\"\n" +" or \"23t\". Then, the dinucleotide is assigned to one of a possible set of\n" +" suites associated with its ddg index based on a scaled hyperellipsoid\n" +" distance in the dual 4D subspace of epsilon, zeta, alpha, and beta.\n" "\n" -" This tool\n" +" Some suites have overlapping hyperellipsoids of different sizes. The wider\n" +" suite is called a dominant suite, and the narrower suite is called a\n" +" satellite suite. These cases are handled by rescaling the hyperellipsoid\n" +" distance along the dimensions in which the overlap occurs.\n" "\n" -" EXAMPLES\n" +" If a dinucleotide doesn't fit into one of the allowed ranges for a dihedral,\n" +" it is assigned as an outlier and given a suite name \"!s\", where \"s\" is the\n" +" first character of the name of the deviant dihedral, e.g. \"!a\" for a bad\n" +" alpha. If the dinucleotide is not close to any of the reference suites, it \n" +" is also assigned as an outlier and given a suite name \"!!\".\n" "\n" -" rna_suites\n" - ; +" After assignment, each dinucleotide is given a goodness-of-fit score called \n" +" the suiteness based on the scaled 7D hyperellipsoid distance to its assigned\n" +" suite. A suiteness of one indicates that the dinucleotide is at the cluster\n" +" center. Lower suiteness indicates that the dinucleotide is farther from the\n" +" cluster center. An outlier has a suiteness of zero, and assigned\n" +" dinucleotides have a minimum suiteness score (set by the -c option) to\n" +" differentiate them from outliers.\n" +"\n" +" It is necessary to specify a path to a file containing definitions for the\n" +" reference suites on the command-line. The format is explained in the next\n" +" section. An example of the format that implements the suites as defined in\n" +" the software suitename (Richardson et al. (2008) RNA 14, 465-481) is\n" +" included as $PREFIX/share/suitename_definitions.dat, where $PREFIX is the\n" +" PREFIX argument given to \"scons install\". The default scons prefix is\n" +" PREFIX=/opt/LOOS. This suitename_defintions.dat file should be sufficient\n" +" for typical users, but you must specify the path to it as the first\n" +" positional argument.\n" +"\n" +"SUITE DEFINITON FILE FORMAT\n" +" Each line in the file is parsed as a record containing fields with a width\n" +" of eight characters. Blank lines and lines beginning with \"#\" are ignored.\n" +" The first field specifies the type of record and must be one of \"suite\",\n" +" \"width\", \"domsat\", \"delta\", \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\".\n" +" These records and their associated fields are described below.\n" +"\n" +" suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i)\n" +" Define a reference suite with suite name given in field 2, ddg index\n" +" given in field 3, and dihedrals of the cluster center given in fields 4\n" +" through 10.\n" +"\n" +" width delta(i-1) epsilon zeta alpha beta gamma delta\n" +" Define default widths for scaled hyperellipsoid distances.\n" +"\n" +" domsat sat_name dom_name dihedral_index sat_width dom_width\n" +" Define dominant-satellite pair with name of satellite suite in field 2,\n" +" name of dominant suite in field 3, index of dihedral dimension with\n" +" altered width in field 4, width of that dimension for satellite suite\n" +" in field 5, and width of that dimension for dominant suite in field 6.\n" +" Additional dimensions and widths can be specified in fields 7 through 9,\n" +" fields 10 through 12, etc.\n" +"\n" +" dihedral min max\n" +" Define allowed ranges for a dihedral. \"dihedral\" can be one of \"delta\",\n" +" \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\". The minimum value\n" +" is given in field 2 and maximum value in field 3.\n" +"\n" +"EXAMPLES\n" +" rna_suites $CONDA_PREFIX/share/suitename_defintions.dat foo.pdb foo.dcd\n" +" Assign backbone suites using the install prefix from a conda install.\n" +"\n" +" rna_suites -s 'resid <= 10' $CONDA_PREFIX/share/suitename_defintions.dat \\\n" +" foo.pdb foo.dcd\n" +" Assign backbone suites only for the first 10 residues.\n" +"\n" +" rna_suites -c 0.001 $CONDA_PREFIX/share/suitename_defintions.dat \\\n" +" foo.pdb foo.dcd\n" +" Assign backbone suites using a minimum suiteness of 0.001 for\n" +" non-outliers.\n"; return full_help_message; @@ -97,12 +182,14 @@ int main(int argc, char *argv[]) { // Set up tool options opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); opts::BasicSelection *sopts = new opts::BasicSelection("!hydrogen"); + opts::RequiredArguments *ropts = new opts::RequiredArguments; + ropts->addArgument("suite_def", "suite_definition_file"); opts::TrajectoryWithFrameIndices *tropts = new opts::TrajectoryWithFrameIndices; ToolOptions *topts = new ToolOptions; opts::AggregateOptions options; - options.add(bopts).add(sopts).add(tropts).add(topts); + options.add(bopts).add(sopts).add(ropts).add(tropts).add(topts); if (!options.parse(argc, argv)) exit(-1); @@ -110,9 +197,7 @@ int main(int argc, char *argv[]) { const double suiteness_cutoff = topts->suiteness_cutoff; // Print command-line input - cout << "# " << header << "\n"; - - // Do some error-checking on tool options + cout << "# " << header << endl; // Build LOOS system and generate atom selection AtomicGroup model = tropts->model; @@ -121,7 +206,8 @@ int main(int argc, char *argv[]) { AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); // Create RNASuite object from RNA atoms - RnaSuite rna_suite = RnaSuite(rna_atoms, suiteness_cutoff); + string suite_definition = ropts->value("suite_def"); + RnaSuite rna_suite = RnaSuite(rna_atoms, suite_definition, suiteness_cutoff); vector suite_resids = rna_suite.getSuiteResids(); vector suite_resnames = rna_suite.getSuiteResnames(); //rna_suite.printReferenceSuites(); @@ -129,6 +215,9 @@ int main(int argc, char *argv[]) { // Print dihedrals //rna_suite.printBackboneAtoms(); + // Print column headers + cout << "# Frame Resid Resname Suite DDG_index Suiteness" << endl; + // Loop over trajectory vector suite_names; vector suite_ddgs; diff --git a/Tools/suitename_definitions.dat b/Tools/suitename_definitions.dat deleted file mode 100644 index 1969a283f..000000000 --- a/Tools/suitename_definitions.dat +++ /dev/null @@ -1,82 +0,0 @@ -# Reference suite dihedrals -# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta -suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035 -suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093 -suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457 -suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512 -suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444 -suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956 -suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047 -suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213 -suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287 -suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035 -suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757 -suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000 -suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507 -suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648 -suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470 -suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833 -suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483 -suite 33m -suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841 -suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771 -suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950 -suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258 -suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590 -suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730 -suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686 -suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386 -suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733 -suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677 -suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744 -suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275 -suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067 -suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967 -suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424 -suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956 -suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983 -suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900 -suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262 -suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378 -suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417 -suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783 -suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565 -suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911 -suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600 -suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600 -suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622 -suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650 -suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760 -suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804 -suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264 -suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131 -suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623 -suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350 -suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087 -suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120 -suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458 -# Default widths for hyperellipsoid distance -# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta -width 28.000 60.000 55.000 50.000 70.000 35.000 28.000 -# Dominant-satellite pairs. Must come after suite and width -# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth -domsat 1m 1a 4 32.000 64.000 -domsat 1L 1a 1 18.000 70.000 4 18.000 70.000 -domsat &a 1a 1 20.000 60.000 2 20.000 60.000 -domsat 1f 1c 4 47.000 65.000 -domsat 1[ 1b 4 34.000 56.000 -domsat 4a 0a 1 40.000 50.000 2 40.000 50.000 -domsat #a 0a 1 26.000 36.000 2 26.000 36.000 -domsat 0i 6n 4 60.000 60.000 -domsat 6j 6n 4 60.000 60.000 -# Filter ranges -# Min Max -delta 60.000 105.000 -delta 125.000 165.000 -epsilon 155.000 310.000 -zeta 25.000 335.000 -alpha 25.000 335.000 -beta 50.000 290.000 -gamma 20.000 95.000 -gamma 140.000 215.000 -gamma 260.000 335.000 diff --git a/share/SConscript b/share/SConscript new file mode 100644 index 000000000..b51bf167a --- /dev/null +++ b/share/SConscript @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# This file is part of LOOS. +# +# LOOS (Lightweight Object-Oriented Structure library) +# Copyright (c) 2008, Tod D. Romo +# Department of Biochemistry and Biophysics +# School of Medicine & Dentistry, University of Rochester +# +# This package (LOOS) is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation under version 3 of the License. +# +# This package is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import sys + +Import('env') +Import('loos') + +clone = env.Clone() +clone.Prepend(LIBS = [loos]) + +files = 'suitename_definitions.dat' +PREFIX = env['PREFIX'] +share_path = os.path.join(PREFIX, "share") +shared_files = env.Install(share_path, Split(files)) + +shared_list = Split(files) + +Return('shared_list') diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp index 7a3bdf006..1b2259309 100644 --- a/src/RnaSuite.cpp +++ b/src/RnaSuite.cpp @@ -47,27 +47,9 @@ namespace loos { } - RnaSuite::RnaSuite(const AtomicGroup &group, - const double suiteness_cutoff_) { - - suiteness_cutoff = suiteness_cutoff_; - defineSuites("suitename"); - extractRnaBackboneAtoms(group); - - } - - RnaSuite::RnaSuite(const AtomicGroup &group) { - - suiteness_cutoff = 0.01; - defineSuites("suitename"); - extractRnaBackboneAtoms(group); - - } - RnaSuite::RnaSuite() { suiteness_cutoff = 0.01; - defineSuites("suitename"); } @@ -461,15 +443,7 @@ namespace loos { N_reference_ddg = 0; N_reference_suite.clear(); - if (suite_definition == "suitename") - defineSuitesFromFile("suitename_definitions.dat"); - - else defineSuitesFromFile(suite_definition); - - } // defineSuites() - - void RnaSuite::defineSuitesFromFile(const string& filename) { - + // Temporary variables for parsing lines from the definition file size_t ddg_index; size_t dom_index; size_t sat_index; @@ -488,8 +462,8 @@ namespace loos { vector > domsat_sat_width; // Read file contents - ifstream ifs(filename.c_str()); - if (!ifs) throw(FileOpenError(filename)); + ifstream ifs(suite_definition.c_str()); + if (!ifs) throw(FileOpenError(suite_definition)); while (getline(ifs, line)) { @@ -568,7 +542,7 @@ namespace loos { if (ddg_index == N_reference_ddg) { cerr << boost::format( "Warning: dominant suite %s was not defined in file %s") - % field % filename << endl; + % field % suite_definition << endl; continue; } @@ -584,7 +558,7 @@ namespace loos { if (sat_index == N_reference_suite[ddg_index]) { cerr << boost::format( "Warning: satellite suite %s was not defined in file %s") - % field % filename << endl; + % field % suite_definition << endl; continue; } @@ -642,7 +616,7 @@ namespace loos { } else cerr << boost::format( "Warning: Unrecognized record %s in suite definition from %s") - % record % filename << endl; + % record % suite_definition << endl; } // Loop over lines in file @@ -666,7 +640,7 @@ namespace loos { } } - } // defineSuitesFromFile() + } // defineSuites() void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { @@ -701,7 +675,7 @@ namespace loos { zeta_atoms.clear(); // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into - // one AtomicGroup. Use raw string literal R"()" to avoid escaping " + // one AtomicGroup AtomicGroup backbone = selectAtoms(group, "(name =~ \"^(P|C[345]'|O[35]')$\")"); diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp index 05ad1b7c5..3da6f2762 100644 --- a/src/RnaSuite.hpp +++ b/src/RnaSuite.hpp @@ -32,10 +32,12 @@ namespace loos { //! Class for assigning backbone suites to an RNA /** - * This class acts on an AtomicGroup and assigns backbone suites (as - * defined in Richardson et al. (2008) RNA 14, 465-481) to any RNA residues - * present. It also calculates the "suiteness" score that describes how - * well the residue fits into its assigned suite. + * This class acts on an AtomicGroup and assigns backbone suites to any RNA + * residues present. It also calculates the "suiteness" score that + * describes how well the residue fits into its assigned suite. The + * constructor requires that the user specifies a path to a file defining + * reference suites. The suites from Richardson et al. (2008) RNA 14, + * 465-481 are included in $LOOS/share/suitename_definitions.dat */ class RnaSuite { @@ -46,18 +48,13 @@ namespace loos { RnaSuite(const AtomicGroup &group, const string suite_definition); - RnaSuite(const AtomicGroup &group, const double suiteness_cutoff_); - - RnaSuite(const AtomicGroup &group); - RnaSuite(); //! Method to assign residues to backbone suites from Richardson et al. /** - * This method assigns residues to one of the 46 backbone suites - * defined in Richardson et al. (2008) RNA 14, 465-481. The suite of a - * residue is defined from delta of the previous residue to delta of - * the current residue. + * This method assigns residues to one of the reference suites defined + * in the constructor. The suite of a residue is defined from delta of + * the previous residue to delta of the current residue. */ void assignSuitenameSuites(); @@ -68,7 +65,35 @@ namespace loos { */ void calculateBackboneDihedrals(); - //! Method to define suites used for assignment from an existing scheme + //! Method to define suites used for assignment + /** + * This method defines reference suites. The argument must be a path to + * a file containing records consisting of fields with a width of eight + * characters. An example file for the suites defined in + * Richardson et al. (2008) RNA 14, 465-481 is included in + * $LOOS/share/suitename_definitions.dat. Records can be: + * + * suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i) + * Define a reference suite with name given in field 2, ddg_index + * given in field 3, and dihedrals of the cluster center given in + * fields 4 through 10. + * + * width delta(i-1) epsilon zeta alpha beta gamma delta + * Define default widths for scaled hyperellipsoid distances. + * + * domsat sat_name dom_name dihedral_index sat_width dom_width + * Define dominant-satellite pair with name of satellite suite in + * field 2, name of dominant suite in field 3, index of dihedral + * dimension with altered width in field 4, width of that dimension + * for satellite suite in field 5, and width of that dimension for + * dominant suite in field 6. Additional dimensions and width can + * be specified in fields 7 through 9, fields 10 through 12, etc. + * + * dihedral min max + * Define allowed ranges for a dihedral. "dihedral" can be one of + * "delta", "epsilon", "zeta", "alpha", "beta", or "gamma". The + * minimum value is given in field 2 and maximum value in field 3. + */ void defineSuites(const string& suite_definition); //! Method to extract RNA backbone atoms from an AtomicGroup @@ -133,9 +158,6 @@ namespace loos { const size_t target_size, const string dihedral_name, const size_t group_index) const; - //! Method to define suites used for assignment from a file - void defineSuitesFromFile(const string& filename); - //! Method to test whether a point is in between two reference points bool isBetweenDomSatPair(const vector &dihedrals, const vector &dominant, const vector &satellite); From 2d2ceadf5323ac1322a96b0d447d90c00c8e5c41 Mon Sep 17 00:00:00 2001 From: "Chapin E. Cavender" Date: Thu, 6 Aug 2020 15:15:56 -0400 Subject: [PATCH 41/41] Restore .travis.yml; fix scons conda install and update rna_suites fullhelp --- .travis.yml | 50 ++++++++++++++++++++++++++++++++++++++++++++ Tools/rna_suites.cpp | 17 ++++++++------- share/SConscript | 7 ++++++- 3 files changed, 65 insertions(+), 9 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..ac148f0b6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,50 @@ +language: cpp + +branches: + only: + - master + +matrix: + include: + - os: linux + - os: osx + osx_image: xcode10.1 + + +#addons: +# apt: +# packages: +# - scons +# - libboost-all-dev +# - libboost-regex-dev +# - libatlas-base-dev +# - libnetcdf-dev +# - swig +# - python3-dev +# - python3-numpy +# - python3-scipy +# - libeigen3-dev +# +#before_install: +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi +## - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install boost; fi +## - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install netcdf; fi +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install swig; fi +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install doxygen; fi +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install graphviz; fi +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install scons; fi +# - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install eigen; fi + +install: + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh; + fi + - bash miniconda.sh -b -p $HOME/miniconda + - source "$HOME/miniconda/etc/profile.d/conda.sh" + - hash -r + - conda config --set always_yes yes --set changeps1 no + +script: + - ./conda_build.sh loos 1 diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp index cccd2c10f..efb7cd531 100644 --- a/Tools/rna_suites.cpp +++ b/Tools/rna_suites.cpp @@ -91,11 +91,12 @@ string fullHelpMessage(void) { " reference suites on the command-line. The format is explained in the next\n" " section. An example of the format that implements the suites as defined in\n" " the software suitename (Richardson et al. (2008) RNA 14, 465-481) is\n" -" included as $PREFIX/share/suitename_definitions.dat, where $PREFIX is the\n" -" PREFIX argument given to \"scons install\". The default scons prefix is\n" -" PREFIX=/opt/LOOS. This suitename_defintions.dat file should be sufficient\n" -" for typical users, but you must specify the path to it as the first\n" -" positional argument.\n" +" included as share/suitename_definitions.dat in the top-level directory of\n" +" the LOOS source tree. If installing within a conda environment, this file\n" +" can also be found in $CONDA_PREFIX/share/loos/suitename_definitions.dat;\n" +" otherwise, it can be found in $LOOS/share/suitename_definitions.dat. The\n" +" suitename_defintions.dat file should be sufficient for typical users, but\n" +" you must specify the path to it as the first positional argument.\n" "\n" "SUITE DEFINITON FILE FORMAT\n" " Each line in the file is parsed as a record containing fields with a width\n" @@ -126,14 +127,14 @@ string fullHelpMessage(void) { " is given in field 2 and maximum value in field 3.\n" "\n" "EXAMPLES\n" -" rna_suites $CONDA_PREFIX/share/suitename_defintions.dat foo.pdb foo.dcd\n" +" rna_suites $CONDA_PREFIX/share/loos/suitename_defintions.dat foo.pdb foo.dcd\n" " Assign backbone suites using the install prefix from a conda install.\n" "\n" -" rna_suites -s 'resid <= 10' $CONDA_PREFIX/share/suitename_defintions.dat \\\n" +" rna_suites -s 'resid <= 10' $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n" " foo.pdb foo.dcd\n" " Assign backbone suites only for the first 10 residues.\n" "\n" -" rna_suites -c 0.001 $CONDA_PREFIX/share/suitename_defintions.dat \\\n" +" rna_suites -c 0.001 $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n" " foo.pdb foo.dcd\n" " Assign backbone suites using a minimum suiteness of 0.001 for\n" " non-outliers.\n"; diff --git a/share/SConscript b/share/SConscript index b51bf167a..0391a753a 100644 --- a/share/SConscript +++ b/share/SConscript @@ -29,7 +29,12 @@ clone.Prepend(LIBS = [loos]) files = 'suitename_definitions.dat' PREFIX = env['PREFIX'] -share_path = os.path.join(PREFIX, "share") + +if env.USING_CONDA: + share_path = os.path.join(PREFIX, "share", "loos") +else: + share_path = os.path.join(PREFIX, "share") + shared_files = env.Install(share_path, Split(files)) shared_list = Split(files)