diff --git a/.gitignore b/.gitignore
index 6af7d83c3..6527c9ce7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,6 +160,7 @@ Tools/xy_rdf
Tools/model-select
Tools/dihedrals
Tools/esp_mesh
+Tools/rna_suites
Packages/Clustering/**
!Packages/Clustering/*.cpp
!Packages/Clustering/*.hpp
@@ -272,6 +273,7 @@ Tools/coverlap
Tools/subsetter
Tools/xy_rdf
Tools/model-select
+Tools/dihedrals
Packages/ElasticNetworks/heavy-ca
Packages/ElasticNetworks/psf-masses
Packages/ElasticNetworks/vsa
diff --git a/SConstruct b/SConstruct
index bfa0bc7f7..8dc54aab4 100644
--- a/SConstruct
+++ b/SConstruct
@@ -274,7 +274,9 @@ else:
loos_tools = SConscript('Tools/SConscript')
-loos_core = loos + loos_scripts
+loos_share = SConscript('share/SConscript')
+
+loos_core = loos + loos_scripts + loos_share
# Automatically setup build targets based on package_list
diff --git a/Tools/SConscript b/Tools/SConscript
index cdb1d562c..0556e9f77 100644
--- a/Tools/SConscript
+++ b/Tools/SConscript
@@ -34,7 +34,8 @@ apps = apps + ' drifter porcupine ramachandran renum-pdb exposure clipper rebond
apps = apps + ' traj2pdb merge-traj center-molecule contact-time perturb-structure coverlap phase-pdb'
apps = apps + ' big-svd kurskew periodic_box area_per_lipid residue-contact-map'
apps = apps + ' cross-dist fcontacts serialize-selection transition_contacts fixdcd smooth-traj membrane_map packing_score'
-apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap esp_mesh dihedrals'
+apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap'
+apps = apps + ' esp_mesh dihedrals rna_suites'
list = []
diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp
index a412e277c..e52988b3f 100644
--- a/Tools/dihedrals.cpp
+++ b/Tools/dihedrals.cpp
@@ -477,4 +477,4 @@ int main(int argc, char *argv[]) {
for (auto v_fps : vv_filePtrs)
for (auto p_ofs : v_fps)
p_ofs->close();
-}
\ No newline at end of file
+}
diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp
new file mode 100644
index 000000000..efb7cd531
--- /dev/null
+++ b/Tools/rna_suites.cpp
@@ -0,0 +1,248 @@
+/*
+ rna_suites.cpp
+
+ Assigns backbone suites to RNAs based on backbone dihedrals
+
+ Chapin E. Cavender 2020-03
+*/
+
+/*
+
+ This file is part of LOOS.
+
+ LOOS (Lightweight Object-Oriented Structure library)
+ Copyright (c) 2008-2020 Tod D. Romo & Alan Grossfield
+ Department of Biochemistry and Biophysics
+ School of Medicine & Dentistry, University of Rochester
+
+ This package (LOOS) is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation under version 3 of the License.
+
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include
+
+using namespace std;
+using namespace loos;
+
+namespace opts = loos::OptionsFramework;
+namespace po = loos::OptionsFramework::po;
+
+string fullHelpMessage(void) {
+
+ string full_help_message =
+"SYNOPSIS\n"
+" Assign backbone suites to RNAs based on backbone dihedrals.\n"
+"\n"
+"DESCRIPTION\n"
+" The goal of this tool is to assign continuous RNA dinucleotides to a\n"
+" cluster called a \"suite\" based on the conformation of backbone dihedrals.\n"
+" The idea comes from Richardson et al. (2008) RNA 14, 465-481. The\n"
+" dinucleotide for a residue runs from delta (C5'-C4'-C3'-O3') of the previous\n"
+" residue to delta of the current residue, encompassing seven continuous\n"
+" dihedrals. A suite is a pre-defined cluster in this 7D space, named by a\n"
+" two-character string. Examples are \"1a\" or \"5z\".\n"
+"\n"
+" The first step is to search the given selection for RNA backbone atoms, i.e.\n"
+" atoms named \"P\", \"O5'\", \"C5'\", \"C4'\", \"C3'\", or \"O3'\". These atoms are\n"
+" split by residue. Valid dinucleotides are sets of delta-to-delta backbone\n"
+" atoms with sequential resids. Once the set of valid dinucleotides is\n"
+" determined, the tool will loop over the trajectory and assign each\n"
+" dinucleotide to a suite for each frame.\n"
+"\n"
+" Suite assignment occurs in two stages. The clusters are well-separated in\n"
+" the 3D subspace of delta(i-1), delta, and gamma. So the first stage is to\n"
+" assign each delta to one of two ranges of values consistent with either a\n"
+" C3'-endo (3) or C2'-endo (2) sugar pucker and to assign gamma to one of\n"
+" three ranges of values: gauche plus (p), gauche minus (m), or trans (t). The\n"
+" result is a three-character string called a ddg index. Examples are \"33p\"\n"
+" or \"23t\". Then, the dinucleotide is assigned to one of a possible set of\n"
+" suites associated with its ddg index based on a scaled hyperellipsoid\n"
+" distance in the dual 4D subspace of epsilon, zeta, alpha, and beta.\n"
+"\n"
+" Some suites have overlapping hyperellipsoids of different sizes. The wider\n"
+" suite is called a dominant suite, and the narrower suite is called a\n"
+" satellite suite. These cases are handled by rescaling the hyperellipsoid\n"
+" distance along the dimensions in which the overlap occurs.\n"
+"\n"
+" If a dinucleotide doesn't fit into one of the allowed ranges for a dihedral,\n"
+" it is assigned as an outlier and given a suite name \"!s\", where \"s\" is the\n"
+" first character of the name of the deviant dihedral, e.g. \"!a\" for a bad\n"
+" alpha. If the dinucleotide is not close to any of the reference suites, it \n"
+" is also assigned as an outlier and given a suite name \"!!\".\n"
+"\n"
+" After assignment, each dinucleotide is given a goodness-of-fit score called \n"
+" the suiteness based on the scaled 7D hyperellipsoid distance to its assigned\n"
+" suite. A suiteness of one indicates that the dinucleotide is at the cluster\n"
+" center. Lower suiteness indicates that the dinucleotide is farther from the\n"
+" cluster center. An outlier has a suiteness of zero, and assigned\n"
+" dinucleotides have a minimum suiteness score (set by the -c option) to\n"
+" differentiate them from outliers.\n"
+"\n"
+" It is necessary to specify a path to a file containing definitions for the\n"
+" reference suites on the command-line. The format is explained in the next\n"
+" section. An example of the format that implements the suites as defined in\n"
+" the software suitename (Richardson et al. (2008) RNA 14, 465-481) is\n"
+" included as share/suitename_definitions.dat in the top-level directory of\n"
+" the LOOS source tree. If installing within a conda environment, this file\n"
+" can also be found in $CONDA_PREFIX/share/loos/suitename_definitions.dat;\n"
+" otherwise, it can be found in $LOOS/share/suitename_definitions.dat. The\n"
+" suitename_defintions.dat file should be sufficient for typical users, but\n"
+" you must specify the path to it as the first positional argument.\n"
+"\n"
+"SUITE DEFINITON FILE FORMAT\n"
+" Each line in the file is parsed as a record containing fields with a width\n"
+" of eight characters. Blank lines and lines beginning with \"#\" are ignored.\n"
+" The first field specifies the type of record and must be one of \"suite\",\n"
+" \"width\", \"domsat\", \"delta\", \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\".\n"
+" These records and their associated fields are described below.\n"
+"\n"
+" suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i)\n"
+" Define a reference suite with suite name given in field 2, ddg index\n"
+" given in field 3, and dihedrals of the cluster center given in fields 4\n"
+" through 10.\n"
+"\n"
+" width delta(i-1) epsilon zeta alpha beta gamma delta\n"
+" Define default widths for scaled hyperellipsoid distances.\n"
+"\n"
+" domsat sat_name dom_name dihedral_index sat_width dom_width\n"
+" Define dominant-satellite pair with name of satellite suite in field 2,\n"
+" name of dominant suite in field 3, index of dihedral dimension with\n"
+" altered width in field 4, width of that dimension for satellite suite\n"
+" in field 5, and width of that dimension for dominant suite in field 6.\n"
+" Additional dimensions and widths can be specified in fields 7 through 9,\n"
+" fields 10 through 12, etc.\n"
+"\n"
+" dihedral min max\n"
+" Define allowed ranges for a dihedral. \"dihedral\" can be one of \"delta\",\n"
+" \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\". The minimum value\n"
+" is given in field 2 and maximum value in field 3.\n"
+"\n"
+"EXAMPLES\n"
+" rna_suites $CONDA_PREFIX/share/loos/suitename_defintions.dat foo.pdb foo.dcd\n"
+" Assign backbone suites using the install prefix from a conda install.\n"
+"\n"
+" rna_suites -s 'resid <= 10' $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n"
+" foo.pdb foo.dcd\n"
+" Assign backbone suites only for the first 10 residues.\n"
+"\n"
+" rna_suites -c 0.001 $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n"
+" foo.pdb foo.dcd\n"
+" Assign backbone suites using a minimum suiteness of 0.001 for\n"
+" non-outliers.\n";
+
+ return full_help_message;
+
+}
+
+class ToolOptions : public opts::OptionsPackage {
+
+public:
+
+ ToolOptions() {}
+
+ void addGeneric(po::options_description& o) {
+
+ o.add_options()
+ ("suiteness_cutoff,c",
+ po::value(&suiteness_cutoff)->default_value(0.01),
+ "Cutoff for the suiteness score of non-outliers")
+ ;
+
+ }
+
+ string print() const {
+
+ ostringstream oss;
+ oss << boost::format(
+ "suiteness_cutoff=%f"
+ ) % suiteness_cutoff;
+ return (oss.str());
+
+ }
+
+ double suiteness_cutoff;
+
+}; // ToolOptions
+
+// Tool functions
+
+int main(int argc, char *argv[]) {
+
+ // Get command-line input
+ string header = invocationHeader(argc, argv);
+
+ // Set up tool options
+ opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage());
+ opts::BasicSelection *sopts = new opts::BasicSelection("!hydrogen");
+ opts::RequiredArguments *ropts = new opts::RequiredArguments;
+ ropts->addArgument("suite_def", "suite_definition_file");
+ opts::TrajectoryWithFrameIndices *tropts =
+ new opts::TrajectoryWithFrameIndices;
+ ToolOptions *topts = new ToolOptions;
+
+ opts::AggregateOptions options;
+ options.add(bopts).add(sopts).add(ropts).add(tropts).add(topts);
+ if (!options.parse(argc, argv))
+ exit(-1);
+
+ // Assign tool options to variables
+ const double suiteness_cutoff = topts->suiteness_cutoff;
+
+ // Print command-line input
+ cout << "# " << header << endl;
+
+ // Build LOOS system and generate atom selection
+ AtomicGroup model = tropts->model;
+ pTraj traj = tropts->trajectory;
+ vector indices = tropts->frameList();
+ AtomicGroup rna_atoms = selectAtoms(model, sopts->selection);
+
+ // Create RNASuite object from RNA atoms
+ string suite_definition = ropts->value("suite_def");
+ RnaSuite rna_suite = RnaSuite(rna_atoms, suite_definition, suiteness_cutoff);
+ vector suite_resids = rna_suite.getSuiteResids();
+ vector suite_resnames = rna_suite.getSuiteResnames();
+ //rna_suite.printReferenceSuites();
+
+ // Print dihedrals
+ //rna_suite.printBackboneAtoms();
+
+ // Print column headers
+ cout << "# Frame Resid Resname Suite DDG_index Suiteness" << endl;
+
+ // Loop over trajectory
+ vector suite_names;
+ vector suite_ddgs;
+ vector suiteness;
+ uint t = 0;
+ for (vector::iterator i = indices.begin(); i != indices.end(); ++i) {
+
+ traj->readFrame(*i);
+ traj->updateGroupCoords(model);
+
+ rna_suite.calculateBackboneDihedrals();
+ rna_suite.assignSuitenameSuites();
+ suite_names = rna_suite.getSuiteNames();
+ suite_ddgs = rna_suite.getSuiteDDGs();
+ suiteness = rna_suite.getSuitenessScores();
+
+ for (uint j = 0; j < suite_resids.size(); ++j)
+ cout << boost::format("%5d %5d %3s %2s %2s %8.6f") % t
+ % suite_resids[j] % suite_resnames[j] % suite_names[j]
+ % suite_ddgs[j] % suiteness[j] << endl;
+
+ ++t;
+
+ }
+
+}
+
diff --git a/share/SConscript b/share/SConscript
new file mode 100644
index 000000000..0391a753a
--- /dev/null
+++ b/share/SConscript
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# This file is part of LOOS.
+#
+# LOOS (Lightweight Object-Oriented Structure library)
+# Copyright (c) 2008, Tod D. Romo
+# Department of Biochemistry and Biophysics
+# School of Medicine & Dentistry, University of Rochester
+#
+# This package (LOOS) is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation under version 3 of the License.
+#
+# This package is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import os
+import sys
+
+Import('env')
+Import('loos')
+
+clone = env.Clone()
+clone.Prepend(LIBS = [loos])
+
+files = 'suitename_definitions.dat'
+PREFIX = env['PREFIX']
+
+if env.USING_CONDA:
+ share_path = os.path.join(PREFIX, "share", "loos")
+else:
+ share_path = os.path.join(PREFIX, "share")
+
+shared_files = env.Install(share_path, Split(files))
+
+shared_list = Split(files)
+
+Return('shared_list')
diff --git a/share/suitename_definitions.dat b/share/suitename_definitions.dat
new file mode 100644
index 000000000..1969a283f
--- /dev/null
+++ b/share/suitename_definitions.dat
@@ -0,0 +1,82 @@
+# Reference suite dihedrals
+# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta
+suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035
+suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093
+suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457
+suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512
+suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444
+suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956
+suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047
+suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213
+suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287
+suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035
+suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757
+suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000
+suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507
+suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648
+suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470
+suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833
+suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483
+suite 33m
+suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841
+suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771
+suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950
+suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258
+suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590
+suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730
+suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686
+suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386
+suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733
+suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677
+suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744
+suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275
+suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067
+suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967
+suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424
+suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956
+suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983
+suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900
+suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262
+suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378
+suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417
+suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783
+suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565
+suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911
+suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600
+suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600
+suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622
+suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650
+suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760
+suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804
+suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264
+suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131
+suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623
+suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350
+suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087
+suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120
+suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458
+# Default widths for hyperellipsoid distance
+# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta
+width 28.000 60.000 55.000 50.000 70.000 35.000 28.000
+# Dominant-satellite pairs. Must come after suite and width
+# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth
+domsat 1m 1a 4 32.000 64.000
+domsat 1L 1a 1 18.000 70.000 4 18.000 70.000
+domsat &a 1a 1 20.000 60.000 2 20.000 60.000
+domsat 1f 1c 4 47.000 65.000
+domsat 1[ 1b 4 34.000 56.000
+domsat 4a 0a 1 40.000 50.000 2 40.000 50.000
+domsat #a 0a 1 26.000 36.000 2 26.000 36.000
+domsat 0i 6n 4 60.000 60.000
+domsat 6j 6n 4 60.000 60.000
+# Filter ranges
+# Min Max
+delta 60.000 105.000
+delta 125.000 165.000
+epsilon 155.000 310.000
+zeta 25.000 335.000
+alpha 25.000 335.000
+beta 50.000 290.000
+gamma 20.000 95.000
+gamma 140.000 215.000
+gamma 260.000 335.000
diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp
new file mode 100644
index 000000000..1b2259309
--- /dev/null
+++ b/src/RnaSuite.cpp
@@ -0,0 +1,1038 @@
+/*
+ This file is part of LOOS.
+
+ LOOS (Lightweight Object-Oriented Structure library)
+ Copyright (c) 2008, Tod D. Romo, Alan Grossfield
+ Department of Biochemistry and Biophysics
+ School of Medicine & Dentistry, University of Rochester
+
+ This package (LOOS) is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation under version 3 of the License.
+
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include
+
+using namespace std;
+
+namespace loos {
+
+ // |------------------------------------------------------------------------
+ // | Constructors
+ // |------------------------------------------------------------------------
+
+ RnaSuite::RnaSuite(const AtomicGroup &group, const string suite_definition,
+ const double suiteness_cutoff_) {
+
+ suiteness_cutoff = suiteness_cutoff_;
+ defineSuites(suite_definition);
+ extractRnaBackboneAtoms(group);
+
+ }
+
+ RnaSuite::RnaSuite(const AtomicGroup &group,
+ const string suite_definition) {
+
+ suiteness_cutoff = 0.01;
+ defineSuites(suite_definition);
+ extractRnaBackboneAtoms(group);
+
+ }
+
+ RnaSuite::RnaSuite() {
+
+ suiteness_cutoff = 0.01;
+
+ }
+
+ // |------------------------------------------------------------------------
+ // | Methods
+ // |------------------------------------------------------------------------
+
+ size_t RnaSuite::assignDDGIndex(double dihedral, vector &min,
+ vector &max, uint increment, uint &ddg_index) {
+
+ size_t i = 0;
+ while (i < min.size()) {
+
+ if (dihedral >= min[i] && dihedral <= max[i]) {
+
+ ddg_index += i * increment;
+ return i;
+
+ }
+
+ ++i;
+
+ }
+
+ return i;
+
+ } // assignDDGIndex()
+
+ void RnaSuite::assignSuitenameSuites() {
+
+ size_t N_delta = delta_min.size();
+ size_t N_gamma = gamma_min.size();
+ size_t N_dg = N_delta * N_gamma;
+ vector suite(7);
+
+ // Index into delta(i-1), delta, gamma clusters
+ uint ddg_index;
+
+ // Scaled 4D hyperellipsoid distance in epsilon, zeta, alpha, beta
+ double dist_ezab;
+
+ // Closest scaled 4D hyperellipsoid distance to a cluster and index of
+ // the associated cluster
+ double min_dist_ezab;
+ size_t min_index;
+
+ // Closest scaled 4D hyperellipsoid distance to a dominant cluster
+ double dom_min_dist_ezab;
+ size_t dom_min_index;
+
+ // Closest scaled 4D hyperellipsoid distance to a non-dominant cluster
+ double sat_min_dist_ezab;
+ size_t sat_min_index;
+
+ // Index into vector of widths for pair of dominant-satellite clusters
+ size_t dom_sat_index;
+
+ // Number of clusters this dinucleotide could belong to
+ uint candidates;
+
+ // Scaled 7D hyperellipsoid distance
+ double dist_7;
+
+ // Index of the assigned suite
+ size_t assigned_suite_index;
+
+ // Goodness-of-fit for assigned suite
+ double suiteness_score;
+
+ if (suite_dihedrals.empty()) {
+
+ cerr << "Warning: backbone dihedrals are empty" << endl;
+ return;
+
+ }
+
+ // Initialize vectors of suite names and suiteness scores
+ suite_names.clear();
+ suite_ddgs.clear();
+ suiteness.clear();
+ suite_names.reserve(N_suite);
+ suite_ddgs.reserve(N_suite);
+ suiteness.reserve(N_suite);
+
+ for (size_t i = 0; i < N_suite; ++i) {
+
+ // Assign delta(j-1), delta, gamma index. These 3 dihedrals have
+ // 12 clusters that are independent of the other 4 dihedrals.
+ ddg_index = 0;
+
+ // Filter on 5' delta. Values outside of this range are
+ // indicative of incorrect stereochemistry in the ribose.
+ if (assignDDGIndex(suite_dihedrals[i][0], delta_min, delta_max,
+ N_dg, ddg_index) == N_delta) {
+
+ suite_names.push_back("!d");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on 3' delta
+ if (assignDDGIndex(suite_dihedrals[i][6], delta_min, delta_max,
+ N_gamma, ddg_index) == N_delta) {
+
+ suite_names.push_back("!d");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on gamma
+ if (assignDDGIndex(suite_dihedrals[i][5], gamma_min, gamma_max, 1,
+ ddg_index) == N_gamma) {
+
+ suite_names.push_back("!g");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on epsilon. Values outside of this range are
+ // indicative of a misfit sugar pucker.
+ if (suite_dihedrals[i][1] < ezab_min[0]
+ || suite_dihedrals[i][1] > ezab_max[0]) {
+
+ suite_names.push_back("!e");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on zeta
+ if (suite_dihedrals[i][2] < ezab_min[1]
+ || suite_dihedrals[i][2] > ezab_max[1]) {
+
+ suite_names.push_back("!z");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on alpha
+ if (suite_dihedrals[i][3] < ezab_min[2]
+ || suite_dihedrals[i][3] > ezab_max[2]) {
+
+ suite_names.push_back("!a");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Filter on beta
+ if (suite_dihedrals[i][4] < ezab_min[3]
+ || suite_dihedrals[i][4] > ezab_max[3]) {
+
+ suite_names.push_back("!b");
+ suite_ddgs.push_back("!!!");
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // If there are no clusters associated with this ddg_index, then
+ // this is an outlier
+ if (N_reference_suite[ddg_index] == 0) {
+
+ suite_names.push_back("!!");
+ suite_ddgs.push_back(reference_ddgs[ddg_index]);
+ suiteness.push_back(0.0);
+ continue;
+
+ }
+
+ // Find closest cluster in epsilon, zeta, alpha, beta
+ // Largest distance in 7D is 688.66^3, so 10^9 should be safe
+ min_dist_ezab = 999999999.0;
+ dom_min_dist_ezab = 999999999.0;
+ sat_min_dist_ezab = 999999999.0;
+ min_index = N_reference_suite[ddg_index];
+ dom_min_index = N_reference_suite[ddg_index];
+ sat_min_index = N_reference_suite[ddg_index];
+ candidates = 0;
+
+ for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) {
+
+ // Get 4D scaled hyperellipsoid distance
+ dist_ezab = hyperellipsoidDist(suite_dihedrals[i],
+ reference_dihedrals[ddg_index][j], dihedral_width, 1, 4);
+
+ // Get closest cluster
+ if (dist_ezab < min_dist_ezab) {
+
+ min_dist_ezab = dist_ezab;
+ min_index = j;
+
+ }
+
+ // Get closest non-dominant cluster
+ if (dominant_suites[ddg_index][j] != j
+ && dist_ezab < sat_min_dist_ezab) {
+
+ sat_min_dist_ezab = dist_ezab;
+ sat_min_index = j;
+
+ }
+
+ // If 4D distance < 1, this reference suite is a candidate
+ if (dist_ezab < 1) {
+
+ ++candidates;
+
+ // Is this candidate a dominant cluster?
+ if (dominant_suites[ddg_index][j] == j) {
+
+ dom_min_dist_ezab = dist_ezab;
+ dom_min_index = j;
+
+ }
+
+ }
+
+ } // loop over reference suites
+
+ // Assign membership to a reference suite
+
+ // If there are multiple candidates, and the two canidates are
+ // a dominant-satellite pair, then reweight distances
+ if (candidates > 1 && dom_min_index != N_reference_suite[ddg_index]
+ && sat_min_index != N_reference_suite[ddg_index]
+ && dominant_suites[ddg_index][sat_min_index] == dom_min_index) {
+
+ // Is the DNMP in between the dominant and satellite suites?
+ if (isBetweenDomSatPair(suite_dihedrals[i],
+ reference_dihedrals[ddg_index][dom_min_index],
+ reference_dihedrals[ddg_index][sat_min_index])) {
+
+ // Rescale distances from point to dominant and satellite
+ // suites by ratio of distances from suite centers to
+ // boundary plane and assign to closest of the two
+ dom_sat_index = dom_sat_pair_index[ddg_index][sat_min_index];
+ if (hyperellipsoidDist(suite_dihedrals[i],
+ reference_dihedrals[ddg_index][sat_min_index],
+ satellite_width[dom_sat_index], 1, 4)
+ <= hyperellipsoidDist(suite_dihedrals[i],
+ reference_dihedrals[ddg_index][dom_min_index],
+ dominant_width[dom_sat_index], 1, 4))
+
+ assigned_suite_index = sat_min_index;
+
+ else assigned_suite_index = dom_min_index;
+
+
+ }
+
+ else {
+
+ // Assign to closer of dominant or satellite suite
+ if (sat_min_dist_ezab <= dom_min_dist_ezab)
+ assigned_suite_index = sat_min_index;
+ else assigned_suite_index = dom_min_index;
+
+ }
+
+ }
+
+ // If there is zero or one candidate or multiple candidates but no
+ // dominant-satellite pair, then assign to the closest suite
+ else assigned_suite_index = min_index;
+
+ // Make a final decision on whether this is an outlier using 7D
+ // hyperellipsoid distance
+ dist_7 = hyperellipsoidDist(suite_dihedrals[i],
+ reference_dihedrals[ddg_index][assigned_suite_index],
+ dihedral_width, 0, 6);
+
+ if (dist_7 < 1) {
+
+ suite_names.push_back(
+ reference_names[ddg_index][assigned_suite_index]);
+ suite_ddgs.push_back(reference_ddgs[ddg_index]);
+ suiteness_score = (1 + cos(M_PI * cbrt(dist_7))) / 2.0;
+ if (suiteness_score < suiteness_cutoff)
+ suiteness_score = suiteness_cutoff;
+ suiteness.push_back(suiteness_score);
+
+ } else {
+
+ suite_names.push_back("!!");
+ suite_ddgs.push_back(reference_ddgs[ddg_index]);
+ suiteness.push_back(0.0);
+
+ }
+
+ } // loop over suites
+
+ } // assignSuitenameSuites()
+
+ void RnaSuite::calculateBackboneDihedrals() {
+
+ double prev_delta;
+ vector suite(7);
+
+ // Clear vector of doubles for suite backbone dihedrals
+ suite_dihedrals.clear();
+
+ for (size_t i = 0; i < N_continuous_group; ++i) {
+
+ prev_delta = calculateDihedral(delta_atoms[i][0]);
+
+ for (size_t j = 0; j < N_residue[i]; ++j) {
+
+ suite[0] = prev_delta;
+ suite[1] = calculateDihedral(epsilon_atoms[i][j]);
+ suite[2] = calculateDihedral(zeta_atoms[i][j]);
+ suite[3] = calculateDihedral(alpha_atoms[i][j]);
+ suite[4] = calculateDihedral(beta_atoms[i][j]);
+ suite[5] = calculateDihedral(gamma_atoms[i][j]);
+ prev_delta = calculateDihedral(delta_atoms[i][j + 1]);
+ suite[6] = prev_delta;
+ suite_dihedrals.push_back(suite);
+
+ }
+
+ }
+
+ } // calculateBackboneDihedrals()
+
+ double RnaSuite::calculateDihedral(const AtomicGroup &group) {
+
+ double dihedral = Math::torsion(group[0], group[1], group[2], group[3]);
+ if (dihedral < 0.0) dihedral += 360.0;
+ return dihedral;
+
+ } // calculateDihedral()
+
+ void RnaSuite::checkContinuousGroupSize(
+ const vector > &group_vector,
+ const size_t target_size, const string dihedral_name) const {
+
+ if (group_vector.size() != target_size) {
+
+ cerr << boost::format("Error: different number of continuous "
+ "groups for alpha (%d) and %s (%d)") % target_size
+ % dihedral_name % group_vector.size() << endl;
+ throw(LOOSError());
+
+ }
+
+ } // checkContinuousGroupSize()
+
+ void RnaSuite::checkResidueSize(
+ const vector &residue_vector,
+ const size_t target_size, const string dihedral_name,
+ const size_t group_index) const {
+
+ if (residue_vector.size() != target_size) {
+
+ cerr << boost::format("Error: different number of residues in "
+ "continuous group %d for alpha (%d) and %s (%d)") % group_index
+ % target_size % dihedral_name % residue_vector.size() << endl;
+ throw(LOOSError());
+
+ }
+
+ } // checkResidueSize()
+
+ void RnaSuite::defineSuites(const string& suite_definition) {
+
+ // Clear vectors for reference suites
+ reference_dihedrals.clear();
+ reference_names.clear();
+ reference_ddgs.clear();
+ dihedral_width.clear();
+ dominant_suites.clear();
+ dom_sat_pair_index.clear();
+ dominant_width.clear();
+ satellite_width.clear();
+ delta_min.clear();
+ delta_max.clear();
+ gamma_min.clear();
+ gamma_max.clear();
+ ezab_min = vector(4);
+ ezab_max = vector(4);
+ N_reference_ddg = 0;
+ N_reference_suite.clear();
+
+ // Temporary variables for parsing lines from the definition file
+ size_t ddg_index;
+ size_t dom_index;
+ size_t sat_index;
+ size_t position;
+ string field;
+ string line;
+ string record;
+ vector dihedrals(7);
+
+ // Store dominant-satellite pairs
+ vector domsat_ddg;
+ vector domsat_dom;
+ vector domsat_sat;
+ vector > domsat_dihedral;
+ vector > domsat_dom_width;
+ vector > domsat_sat_width;
+
+ // Read file contents
+ ifstream ifs(suite_definition.c_str());
+ if (!ifs) throw(FileOpenError(suite_definition));
+
+ while (getline(ifs, line)) {
+
+ record = parseStringAs(line, 0, 8);
+
+ if (record.empty() || record[0] == '#') continue;
+
+ else if (record == "suite") {
+
+ // Define a reference suite
+
+ // Get delta delta gamma cluster
+ field = parseStringAs(
+ line, 16, min((size_t) 8, line.size() - 16));
+ ddg_index = N_reference_ddg;
+ for (size_t i = 0; i < N_reference_ddg; ++i)
+ if (field == reference_ddgs[i]) {
+ ddg_index = i;
+ break;
+ }
+
+ // This is a new DDG cluster
+ if (ddg_index == N_reference_ddg) {
+ reference_ddgs.push_back(field);
+ reference_dihedrals.push_back(vector >());
+ reference_names.push_back(vector());
+ ++N_reference_ddg;
+ N_reference_suite.push_back(0);
+ }
+
+ // Get suite name
+ field = parseStringAs(line, 8, 8);
+ if (field.empty()) continue;
+ reference_names[ddg_index].push_back(field);
+
+ // Get reference suite dihedrals
+ dihedrals[0] = parseStringAs(line, 24, 8);
+ dihedrals[1] = parseStringAs(line, 32, 8);
+ dihedrals[2] = parseStringAs(line, 40, 8);
+ dihedrals[3] = parseStringAs(line, 48, 8);
+ dihedrals[4] = parseStringAs(line, 56, 8);
+ dihedrals[5] = parseStringAs(line, 64, 8);
+ dihedrals[6] = parseStringAs(line, 72, 8);
+ reference_dihedrals[ddg_index].push_back(dihedrals);
+
+ ++N_reference_suite[ddg_index];
+
+ } else if (record == "width") {
+
+ // Get default widths for hyperellipsoid distance
+ dihedral_width.push_back(parseStringAs(line, 8, 8));
+ dihedral_width.push_back(parseStringAs(line, 16, 8));
+ dihedral_width.push_back(parseStringAs(line, 24, 8));
+ dihedral_width.push_back(parseStringAs(line, 32, 8));
+ dihedral_width.push_back(parseStringAs(line, 40, 8));
+ dihedral_width.push_back(parseStringAs(line, 48, 8));
+ dihedral_width.push_back(parseStringAs(line, 56, 8));
+
+ } else if (record == "domsat") {
+
+ // Define a dominant-satellite pair
+
+ // Get index of dominant suite
+ field = parseStringAs(line, 16, 8);
+ ddg_index = N_reference_ddg;
+ for (size_t i = 0; i < N_reference_ddg; ++i) {
+ for (size_t j = 0; j < N_reference_suite[i]; ++j)
+ if (field == reference_names[i][j]) {
+ ddg_index = i;
+ dom_index = j;
+ break;
+ }
+ if (ddg_index != N_reference_ddg) break;
+ }
+
+ if (ddg_index == N_reference_ddg) {
+ cerr << boost::format(
+ "Warning: dominant suite %s was not defined in file %s")
+ % field % suite_definition << endl;
+ continue;
+ }
+
+ // Get index of satellite suite
+ field = parseStringAs(line, 8, 8);
+ sat_index = N_reference_suite[ddg_index];
+ for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j)
+ if (field == reference_names[ddg_index][j]) {
+ sat_index = j;
+ break;
+ }
+
+ if (sat_index == N_reference_suite[ddg_index]) {
+ cerr << boost::format(
+ "Warning: satellite suite %s was not defined in file %s")
+ % field % suite_definition << endl;
+ continue;
+ }
+
+ domsat_ddg.push_back(ddg_index);
+ domsat_dom.push_back(dom_index);
+ domsat_sat.push_back(sat_index);
+
+ // Loop over dihedrals with alternate widths
+ vector dihedral_indices;
+ vector dom_width;
+ vector sat_width;
+ position = 24;
+ while (position < line.size()) {
+ dihedral_indices.push_back(
+ parseStringAs(line, position, 8));
+ sat_width.push_back(
+ parseStringAs(line, position + 8, 8));
+ dom_width.push_back(
+ parseStringAs(line, position + 16, 8));
+ position += 24;
+ }
+ domsat_dihedral.push_back(dihedral_indices);
+ domsat_dom_width.push_back(dom_width);
+ domsat_sat_width.push_back(sat_width);
+
+ } else if (record == "delta") {
+
+ delta_min.push_back(parseStringAs(line, 8, 8));
+ delta_max.push_back(parseStringAs(line, 16, 8));
+
+ } else if (record == "epsilon") {
+
+ ezab_min[0] = parseStringAs(line, 8, 8);
+ ezab_max[0] = parseStringAs(line, 16, 8);
+
+ } else if (record == "zeta") {
+
+ ezab_min[1] = parseStringAs(line, 8, 8);
+ ezab_max[1] = parseStringAs(line, 16, 8);
+
+ } else if (record == "alpha") {
+
+ ezab_min[2] = parseStringAs(line, 8, 8);
+ ezab_max[2] = parseStringAs(line, 16, 8);
+
+ } else if (record == "beta") {
+
+ ezab_min[3] = parseStringAs(line, 8, 8);
+ ezab_max[3] = parseStringAs(line, 16, 8);
+
+ } else if (record == "gamma") {
+
+ gamma_min.push_back(parseStringAs(line, 8, 8));
+ gamma_max.push_back(parseStringAs(line, 16, 8));
+
+ } else cerr << boost::format(
+ "Warning: Unrecognized record %s in suite definition from %s")
+ % record % suite_definition << endl;
+
+ } // Loop over lines in file
+
+ // Construct vectors for dominant-satellite pairs
+ for (size_t i = 0; i < N_reference_ddg; ++i) {
+ dominant_suites.push_back(
+ vector(N_reference_suite[i], N_reference_suite[i]));
+ dom_sat_pair_index.push_back(
+ vector(N_reference_suite[i], domsat_dihedral.size()));
+ }
+
+ for (size_t i = 0; i < domsat_dihedral.size(); ++i) {
+ dominant_suites[domsat_ddg[i]][domsat_dom[i]] = domsat_dom[i];
+ dominant_suites[domsat_ddg[i]][domsat_sat[i]] = domsat_dom[i];
+ dom_sat_pair_index[domsat_ddg[i]][domsat_sat[i]] = i;
+ dominant_width.push_back(dihedral_width);
+ satellite_width.push_back(dihedral_width);
+ for (size_t j = 0; j < domsat_dihedral[i].size(); ++j) {
+ dominant_width[i][domsat_dihedral[i][j]] = domsat_dom_width[i][j];
+ satellite_width[i][domsat_dihedral[i][j]] = domsat_sat_width[i][j];
+ }
+ }
+
+ } // defineSuites()
+
+ void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) {
+
+ vector continuous_alpha_atoms;
+ vector continuous_beta_atoms;
+ vector continuous_gamma_atoms;
+ vector continuous_delta_atoms;
+ vector continuous_epsilon_atoms;
+ vector continuous_zeta_atoms;
+ AtomicGroup dihedral_atoms;
+ AtomicGroup residue_p;
+ AtomicGroup residue_o5p;
+ AtomicGroup residue_c5p;
+ AtomicGroup residue_c4p;
+ AtomicGroup residue_c3p;
+ AtomicGroup residue_o3p;
+ AtomicGroup prev_residue_c4p;
+ AtomicGroup prev_residue_c3p;
+ AtomicGroup prev_residue_o3p;
+ int current_resid = -2;
+ size_t residue_size;
+
+ // True if this is the initial residue in a continuous group
+ bool first_res = true;
+
+ // Clear vector of vectors of AtomicGroups for each backbone dihedral
+ alpha_atoms.clear();
+ beta_atoms.clear();
+ gamma_atoms.clear();
+ delta_atoms.clear();
+ epsilon_atoms.clear();
+ zeta_atoms.clear();
+
+ // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into
+ // one AtomicGroup
+ AtomicGroup backbone = selectAtoms(group,
+ "(name =~ \"^(P|C[345]'|O[35]')$\")");
+
+ // Split by resid and loop over residues
+ vector backbone_residues = backbone.splitByResidue();
+ for (size_t i = 0; i < backbone_residues.size(); ++i) {
+
+ // Select RNA backbone atoms from residue
+ residue_p = selectAtoms(backbone_residues[i], "(name == \"P\")");
+ residue_o5p = selectAtoms(backbone_residues[i], "(name == \"O5'\")");
+ residue_c5p = selectAtoms(backbone_residues[i], "(name == \"C5'\")");
+ residue_c4p = selectAtoms(backbone_residues[i], "(name == \"C4'\")");
+ residue_c3p = selectAtoms(backbone_residues[i], "(name == \"C3'\")");
+ residue_o3p = selectAtoms(backbone_residues[i], "(name == \"O3'\")");
+
+ // If any atom besides P is missing, skip this residue and start a
+ // new continuous group
+ if (residue_o5p.size() != 1 || residue_c5p.size() != 1 ||
+ residue_c4p.size() != 1 || residue_c3p.size() != 1 ||
+ residue_o3p.size() != 1) {
+
+ first_res = true;
+ continue;
+
+ }
+
+ // If the resid is not sequential, this is not a continuous group
+ if (residue_p.size() != 1
+ || residue_p[0]->resid() != current_resid + 1) first_res = true;
+
+ if (first_res) {
+
+ first_res = false;
+
+ // Record any previous continuous group
+ if (continuous_alpha_atoms.size() != 0) {
+
+ alpha_atoms.push_back(continuous_alpha_atoms);
+ beta_atoms.push_back(continuous_beta_atoms);
+ gamma_atoms.push_back(continuous_gamma_atoms);
+ delta_atoms.push_back(continuous_delta_atoms);
+ epsilon_atoms.push_back(continuous_epsilon_atoms);
+ zeta_atoms.push_back(continuous_zeta_atoms);
+
+ }
+
+ // Clear vectors of AtomicGroups for this continuous groups
+ continuous_alpha_atoms.clear();
+ continuous_beta_atoms.clear();
+ continuous_gamma_atoms.clear();
+ continuous_delta_atoms.clear();
+ continuous_epsilon_atoms.clear();
+ continuous_zeta_atoms.clear();
+
+ // Record delta for this initial residue
+ dihedral_atoms = residue_c5p;
+ dihedral_atoms.append(residue_c4p);
+ dihedral_atoms.append(residue_c3p);
+ dihedral_atoms.append(residue_o3p);
+ continuous_delta_atoms.push_back(dihedral_atoms);
+
+ } else {
+
+ // Record backbone dihedrals for the remainder of the suite,
+ // i.e. epsilon and zeta of the previous residue and alpha,
+ // beta, gamma, and delta of the current residue
+ dihedral_atoms = prev_residue_c4p;
+ dihedral_atoms.append(prev_residue_c3p);
+ dihedral_atoms.append(prev_residue_o3p);
+ dihedral_atoms.append(residue_p);
+ continuous_epsilon_atoms.push_back(dihedral_atoms);
+ dihedral_atoms = prev_residue_c3p;
+ dihedral_atoms.append(prev_residue_o3p);
+ dihedral_atoms.append(residue_p);
+ dihedral_atoms.append(residue_o5p);
+ continuous_zeta_atoms.push_back(dihedral_atoms);
+ dihedral_atoms = prev_residue_o3p;
+ dihedral_atoms.append(residue_p);
+ dihedral_atoms.append(residue_o5p);
+ dihedral_atoms.append(residue_c5p);
+ continuous_alpha_atoms.push_back(dihedral_atoms);
+ dihedral_atoms = residue_p;
+ dihedral_atoms.append(residue_o5p);
+ dihedral_atoms.append(residue_c5p);
+ dihedral_atoms.append(residue_c4p);
+ continuous_beta_atoms.push_back(dihedral_atoms);
+ dihedral_atoms = residue_o5p;
+ dihedral_atoms.append(residue_c5p);
+ dihedral_atoms.append(residue_c4p);
+ dihedral_atoms.append(residue_c3p);
+ continuous_gamma_atoms.push_back(dihedral_atoms);
+ dihedral_atoms = residue_c5p;
+ dihedral_atoms.append(residue_c4p);
+ dihedral_atoms.append(residue_c3p);
+ dihedral_atoms.append(residue_o3p);
+ continuous_delta_atoms.push_back(dihedral_atoms);
+
+ }
+
+ // Save C4', C3', and O3' for dihedrals in the next residue
+ prev_residue_c4p = residue_c4p;
+ prev_residue_c3p = residue_c3p;
+ prev_residue_o3p = residue_o3p;
+
+ // Update resid
+ current_resid = residue_o5p[0]->resid();
+
+ } // loop over residues
+
+ // Record any previous continuous group
+ if (continuous_alpha_atoms.size() != 0) {
+
+ alpha_atoms.push_back(continuous_alpha_atoms);
+ beta_atoms.push_back(continuous_beta_atoms);
+ gamma_atoms.push_back(continuous_gamma_atoms);
+ delta_atoms.push_back(continuous_delta_atoms);
+ epsilon_atoms.push_back(continuous_epsilon_atoms);
+ zeta_atoms.push_back(continuous_zeta_atoms);
+
+ }
+
+ // Get number of continuous groups and check that all dihedral groups
+ // have same size
+ N_continuous_group = alpha_atoms.size();
+ checkContinuousGroupSize(beta_atoms, N_continuous_group, "beta");
+ checkContinuousGroupSize(gamma_atoms, N_continuous_group, "gamma");
+ checkContinuousGroupSize(delta_atoms, N_continuous_group, "delta");
+ checkContinuousGroupSize(epsilon_atoms, N_continuous_group, "epsilon");
+ checkContinuousGroupSize(zeta_atoms, N_continuous_group, "zeta");
+
+ // Get number of residues in each continuous group and check that these
+ // are consistent across backbone dihedrals. Delta should have one
+ // additional residue per continuous group.
+ N_residue.clear();
+ suite_resids.clear();
+ suite_resnames.clear();
+
+ for (size_t i = 0; i < N_continuous_group; ++i) {
+
+ residue_size = alpha_atoms[i].size();
+ checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1);
+ checkResidueSize(gamma_atoms[i], residue_size, "gamma", i + 1);
+ checkResidueSize(delta_atoms[i], residue_size + 1, "delta", i + 1);
+ checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1);
+ checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1);
+ N_residue.push_back(residue_size);
+
+ for (size_t j = 0; j < residue_size; ++j) {
+
+ suite_resids.push_back(gamma_atoms[i][j][0]->resid());
+ suite_resnames.push_back(gamma_atoms[i][j][0]->resname());
+
+ }
+
+ }
+
+ N_suite = suite_resids.size();
+
+ } // extractRnaBackboneAtoms()
+
+ vector RnaSuite::getSuiteDDGs() const {
+ return suite_ddgs;
+ } // getSuiteDDGs()
+
+ vector > RnaSuite::getSuiteDihedrals() const {
+ return suite_dihedrals;
+ } // getSuiteDihedrals()
+
+ vector RnaSuite::getSuiteNames() const {
+ return suite_names;
+ } // getSuiteNames()
+
+ vector RnaSuite::getSuiteResids() const {
+ return suite_resids;
+ } // getSuiteResids()
+
+ vector RnaSuite::getSuiteResnames() const {
+ return suite_resnames;
+ } // getSuiteResnames()
+
+ double RnaSuite::getSuitenessCutoff() const {
+ return suiteness_cutoff;
+ } // getSuitenessCutoff()
+
+ vector RnaSuite::getSuitenessScores() const {
+ return suiteness;
+ } // getSuitenessScores()
+
+ double RnaSuite::hyperellipsoidDist(const vector &dihedrals,
+ const vector &reference, const vector &width,
+ uint first_index, uint last_index) {
+
+ double unscaled_diff;
+ double sum_scaled_powers = 0.0;
+
+ for (uint i = first_index; i <= last_index; ++i) {
+
+ unscaled_diff = abs(dihedrals[i] - reference[i]);
+ // suitename program does not wrap unscaled coordinates
+ // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff;
+ sum_scaled_powers += pow(unscaled_diff / width[i], 3.0);
+
+ }
+
+ return sum_scaled_powers;
+
+ } // hyperellipsoidDist4()
+
+ bool RnaSuite::isBetweenDomSatPair(const vector &dihedrals,
+ const vector &dominant, const vector &satellite) {
+
+ double dom_to_sat;
+ double dom_dot_product = 0;
+ double sat_dot_product = 0;
+
+ // If the point is in between the dominant and satellite reference
+ // suites, then the dot product between the vectors (point - dominant)
+ // and (satellite - dominant) and the dot product between the vectors
+ // (point - satellite) and (dominant - satellite) should both be
+ // positive, i.e. the cosine of the angles is positive.
+ for (uint i = 1; i <= 4; ++i) {
+
+ dom_to_sat = satellite[i] - dominant[i];
+ dom_dot_product += (dihedrals[i] - dominant[i]) * dom_to_sat;
+ // sat_dot_product += (dihedrals[i] - satellite[i]) * sat_to_dom
+ // sat_to_dom = -dom_to_sat
+ sat_dot_product += (satellite[i] - dihedrals[i]) * dom_to_sat;
+
+ }
+
+ return dom_dot_product > 0 && sat_dot_product > 0;
+
+ } // isBetweenDomSatPair()
+
+ void RnaSuite::printBackboneAtoms() const {
+
+ size_t i_plus;
+ size_t j_plus;
+
+ cout << "\n ==== Printing backbone atoms ====\n" << endl;
+
+ if (alpha_atoms.empty()) {
+
+ cerr << "Warning: backbone atoms are empty" << endl;
+ return;
+
+ }
+
+ cout << boost::format("Number of continuous groups: %d\n")
+ % N_continuous_group;
+
+ for (size_t i = 0; i < N_continuous_group; ++i) {
+
+ i_plus = i + 1;
+ cout << boost::format("Continuous group %d has %d residues\n")
+ % i_plus % N_residue[i];
+
+ for (size_t j = 0; j < N_residue[i]; ++j) {
+
+ j_plus = j + 1;
+ cout << boost::format("Delta %d %d\n") % i_plus % j_plus;
+ cout << delta_atoms[i][j] << "\n";
+ cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus;
+ cout << epsilon_atoms[i][j] << "\n";
+ cout << boost::format("Zeta %d %d\n") % i_plus % j_plus;
+ cout << zeta_atoms[i][j] << "\n";
+ cout << boost::format("Alpha %d %d\n") % i_plus % j_plus;
+ cout << alpha_atoms[i][j] << "\n";
+ cout << boost::format("Beta %d %d\n") % i_plus % j_plus;
+ cout << beta_atoms[i][j] << "\n";
+ cout << boost::format("Gamma %d %d\n") % i_plus % j_plus;
+ cout << gamma_atoms[i][j] << "\n";
+
+ }
+
+ cout << boost::format("Delta %d %d\n") % i_plus
+ % (N_residue[i] + 1);
+ cout << delta_atoms[i][N_residue[i]] << "\n";
+
+ }
+
+ } // printBackboneAtoms()
+
+ void RnaSuite::printBackboneDihedrals() const {
+
+ cout << "\n ==== Printing backbone dihedrals ====\n" << endl;
+
+ if (suite_dihedrals.empty()) {
+
+ cerr << "Warning: backbone dihedrals are empty" << endl;
+ return;
+
+ }
+
+ for (size_t i = 0; i < N_suite; ++i)
+ cout << boost::format(
+ "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n")
+ % suite_resids[i] % suite_resnames[i] % suite_dihedrals[i][0]
+ % suite_dihedrals[i][1] % suite_dihedrals[i][2]
+ % suite_dihedrals[i][3] % suite_dihedrals[i][4]
+ % suite_dihedrals[i][5] % suite_dihedrals[i][6];
+
+ } // printBackboneDihedrals()
+
+ void RnaSuite::printReferenceSuites() const {
+
+ cout << "\n ==== Printing reference suites ====\n" << endl;
+
+ if (reference_dihedrals.empty()) {
+
+ cerr << "Warning: reference suites are empty" << endl;
+ return;
+
+ }
+
+ for (size_t i = 0; i < N_reference_ddg; ++i)
+ for (size_t j = 0; j < N_reference_suite[i]; ++j)
+ cout << boost::format(
+ "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n")
+ % reference_names[i][j] % reference_ddgs[i]
+ % reference_dihedrals[i][j][0]
+ % reference_dihedrals[i][j][1]
+ % reference_dihedrals[i][j][2]
+ % reference_dihedrals[i][j][3]
+ % reference_dihedrals[i][j][4]
+ % reference_dihedrals[i][j][5]
+ % reference_dihedrals[i][j][6];
+
+ } // printReferenceSuites()
+
+ void RnaSuite::printSuites() const {
+
+ cout << "\n ==== Printing suites ====\n" << endl;
+
+ if (suite_names.empty()) {
+
+ cerr << "Warning: suites are empty" << endl;
+ return;
+
+ }
+
+ for (size_t i = 0; i < N_suite; ++i)
+ cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f "
+ "%7.3f %7.3f %7.3f %7.3f\n") % suite_resids[i]
+ % suite_resnames[i] % suite_names[i] % suite_ddgs[i]
+ % suiteness[i] % suite_dihedrals[i][0] % suite_dihedrals[i][1]
+ % suite_dihedrals[i][2] % suite_dihedrals[i][3]
+ % suite_dihedrals[i][4] % suite_dihedrals[i][5]
+ % suite_dihedrals[i][6];
+
+ } // printSuites()
+
+ void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) {
+
+ suiteness_cutoff = suiteness_cutoff_;
+
+ } // setSuitenessCutoff()
+
+}
+
diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp
new file mode 100644
index 000000000..3da6f2762
--- /dev/null
+++ b/src/RnaSuite.hpp
@@ -0,0 +1,230 @@
+/*
+ This file is part of LOOS.
+
+ LOOS (Lightweight Object-Oriented Structure library)
+ Copyright (c) 2008, Tod D. Romo, Alan Grossfield
+ Department of Biochemistry and Biophysics
+ School of Medicine & Dentistry, University of Rochester
+
+ This package (LOOS) is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation under version 3 of the License.
+
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#if !defined(LOOS_RNASUITE_HPP)
+#define LOOS_RNASUITE_HPP
+
+#include
+#include
+#include
+
+using namespace std;
+
+namespace loos {
+
+ //! Class for assigning backbone suites to an RNA
+ /**
+ * This class acts on an AtomicGroup and assigns backbone suites to any RNA
+ * residues present. It also calculates the "suiteness" score that
+ * describes how well the residue fits into its assigned suite. The
+ * constructor requires that the user specifies a path to a file defining
+ * reference suites. The suites from Richardson et al. (2008) RNA 14,
+ * 465-481 are included in $LOOS/share/suitename_definitions.dat
+ */
+ class RnaSuite {
+
+ public:
+
+ RnaSuite(const AtomicGroup &group, const string suite_defintion,
+ const double suiteness_cutoff_);
+
+ RnaSuite(const AtomicGroup &group, const string suite_definition);
+
+ RnaSuite();
+
+ //! Method to assign residues to backbone suites from Richardson et al.
+ /**
+ * This method assigns residues to one of the reference suites defined
+ * in the constructor. The suite of a residue is defined from delta of
+ * the previous residue to delta of the current residue.
+ */
+ void assignSuitenameSuites();
+
+ //! Method to calculate backbone dihedrals for each RNA residue
+ /**
+ * This method calculates the six RNA backbone dihedrals (i.e. alpha,
+ * beta, gamma, delta, epsilon, and zeta) for each residue.
+ */
+ void calculateBackboneDihedrals();
+
+ //! Method to define suites used for assignment
+ /**
+ * This method defines reference suites. The argument must be a path to
+ * a file containing records consisting of fields with a width of eight
+ * characters. An example file for the suites defined in
+ * Richardson et al. (2008) RNA 14, 465-481 is included in
+ * $LOOS/share/suitename_definitions.dat. Records can be:
+ *
+ * suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i)
+ * Define a reference suite with name given in field 2, ddg_index
+ * given in field 3, and dihedrals of the cluster center given in
+ * fields 4 through 10.
+ *
+ * width delta(i-1) epsilon zeta alpha beta gamma delta
+ * Define default widths for scaled hyperellipsoid distances.
+ *
+ * domsat sat_name dom_name dihedral_index sat_width dom_width
+ * Define dominant-satellite pair with name of satellite suite in
+ * field 2, name of dominant suite in field 3, index of dihedral
+ * dimension with altered width in field 4, width of that dimension
+ * for satellite suite in field 5, and width of that dimension for
+ * dominant suite in field 6. Additional dimensions and width can
+ * be specified in fields 7 through 9, fields 10 through 12, etc.
+ *
+ * dihedral min max
+ * Define allowed ranges for a dihedral. "dihedral" can be one of
+ * "delta", "epsilon", "zeta", "alpha", "beta", or "gamma". The
+ * minimum value is given in field 2 and maximum value in field 3.
+ */
+ void defineSuites(const string& suite_definition);
+
+ //! Method to extract RNA backbone atoms from an AtomicGroup
+ /**
+ * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3',
+ * and O3') and splits them into AtomicGroups by residue id.
+ */
+ void extractRnaBackboneAtoms(const AtomicGroup &group);
+
+ //! Method to return the current indices into delta delta gamma clusters
+ vector getSuiteDDGs() const;
+
+ //! Method to return the current backbone dihedrals
+ vector > getSuiteDihedrals() const;
+
+ //! Method to return the current assigned suite names
+ vector getSuiteNames() const;
+
+ //! Method to return the suite residue indices
+ vector getSuiteResids() const;
+
+ //! Method to return the suite residue names
+ vector getSuiteResnames() const;
+
+ //! Method to return the cutoff for the suiteness score of non-outliers
+ double getSuitenessCutoff() const;
+
+ //! Method to return the current suiteness scores
+ vector getSuitenessScores() const;
+
+ //! Method to print groups of backbone atoms for each dihedral
+ void printBackboneAtoms() const;
+
+ //! Method to print backbone dihedrals for each residue
+ void printBackboneDihedrals() const;
+
+ //! Method to print reference suite names and mean dihedrals
+ void printReferenceSuites() const;
+
+ //! Method to print suite names, suiteness scores, and dihedrals
+ void printSuites() const;
+
+ //! Method to set the cutoff for the suiteness score of non-outliers
+ void setSuitenessCutoff(const double suiteness_cutoff_);
+
+ private:
+
+ //! Method to assign residues to a delta(i-1), delta, gamma index
+ size_t assignDDGIndex(double dihedral, vector &min,
+ vector &max, uint increment, uint &ddg_index);
+
+ //! Calculate a dihedral in deg from 4 atoms in the range [0, 360]
+ double calculateDihedral(const AtomicGroup &group);
+
+ //! Method to check the size of a vector of continuous groups
+ void checkContinuousGroupSize(
+ const vector > &group_vector,
+ const size_t target_size, const string dihedral_name) const;
+
+ //! Method to check the size of a vector of residues
+ void checkResidueSize(const vector &residue_vector,
+ const size_t target_size, const string dihedral_name,
+ const size_t group_index) const;
+
+ //! Method to test whether a point is in between two reference points
+ bool isBetweenDomSatPair(const vector &dihedrals,
+ const vector &dominant, const vector &satellite);
+
+ //! Calculate a scaled hyperellipsoid distance between two points
+ double hyperellipsoidDist(const vector &dihedrals,
+ const vector &reference, const vector &width,
+ uint first_index, uint last_index);
+
+ // Reference suites used for assignment
+ vector >> reference_dihedrals;
+ vector > reference_names;
+ vector reference_ddgs;
+
+ // Widths used to scale each dihedral dimension
+ vector dihedral_width;
+
+ // Indices of dominant-satellite pairs
+ vector > dominant_suites;
+
+ // Index into dominant-satellite pair widths
+ vector > dom_sat_pair_index;
+
+ // Alternative widths used to scale dominant-satellite pairs
+ vector > dominant_width;
+ vector > satellite_width;
+
+ // Boundaries for allowed regions of delta(i-1), delta, and gamma
+ vector delta_min;
+ vector delta_max;
+ vector gamma_min;
+ vector gamma_max;
+
+ // Boundaries for allowed regions of epsilon, zeta, alpha, beta
+ vector ezab_min;
+ vector ezab_max;
+
+ // Vector of continuous groups, composed of vectors of AtomicGroups
+ // for each residue within a continuous group
+ vector > alpha_atoms;
+ vector > beta_atoms;
+ vector > gamma_atoms;
+ vector > delta_atoms;
+ vector > epsilon_atoms;
+ vector > zeta_atoms;
+
+ // Suite residue ids, residue names, and dihedrals
+ vector suite_resids;
+ vector suite_resnames;
+ vector > suite_dihedrals;
+
+ // Assigned suite names, ddg indices, and suiteness scores
+ vector suite_names;
+ vector suite_ddgs;
+ vector suiteness;
+
+ // Other internal variables
+ size_t N_reference_ddg;
+ vector N_reference_suite;
+ size_t N_continuous_group;
+ vector N_residue;
+ size_t N_suite;
+ double suiteness_cutoff;
+
+ }; // RnaSuite class
+
+}
+
+#endif
+
diff --git a/src/RnaSuite.i b/src/RnaSuite.i
new file mode 100644
index 000000000..444b78aed
--- /dev/null
+++ b/src/RnaSuite.i
@@ -0,0 +1,27 @@
+/*
+ This file is part of LOOS.
+
+ LOOS (Lightweight Object-Oriented Structure library)
+ Copyright (c) 2008, Alan Grossfield
+ Department of Biochemistry and Biophysics
+ School of Medicine & Dentistry, University of Rochester
+
+ This package (LOOS) is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation under version 3 of the License.
+
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+%header %{
+#include
+%}
+
+%include "RnaSuite.hpp"
+
diff --git a/src/SConscript b/src/SConscript
index a77c125f3..28698b92d 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -41,6 +41,7 @@ apps = apps + ' charmm.cpp AtomicNumberDeducer.cpp OptionsFramework.cpp revision
apps = apps + ' utils_random.cpp utils_structural.cpp LineReader.cpp xtcwriter.cpp alignment.cpp MultiTraj.cpp'
apps = apps + ' index_range_parser.cpp'
apps = apps + ' Weights.cpp'
+apps = apps + ' RnaSuite.cpp'
if (env['HAS_NETCDF']):
apps = apps + ' amber_netcdf.cpp'
@@ -71,6 +72,7 @@ hdr += ' xdr.hpp xtc.hpp gro.hpp trr.hpp exceptions.hpp MatrixOps.hpp sorting.hp
hdr += ' Simplex.hpp charmm.hpp AtomicNumberDeducer.hpp OptionsFramework.hpp'
hdr += ' utils_random.hpp utils_structural.hpp LineReader.hpp xtcwriter.hpp'
hdr += ' trajwriter.hpp MultiTraj.hpp index_range_parser.hpp'
+hdr += ' RnaSuite.hpp'
if (env['HAS_NETCDF']):
hdr += ' amber_netcdf.hpp'
diff --git a/src/loos.hpp b/src/loos.hpp
index df91f206e..8e00197a3 100644
--- a/src/loos.hpp
+++ b/src/loos.hpp
@@ -114,6 +114,7 @@
#include
#include
+#include
#endif
diff --git a/src/loos.i b/src/loos.i
index c251fb2b2..adbec630f 100644
--- a/src/loos.i
+++ b/src/loos.i
@@ -52,7 +52,7 @@ namespace loos {
%template(DoubleVectorMatrix) std::vector< std::vector >;
%template(IntVector) std::vector;
%template(UIntVector) std::vector;
-
+%template(StringVector) std::vector;
%include "exceptions.i"
@@ -81,3 +81,4 @@ namespace loos {
%include "gro.i"
%include "utils_structural.i"
%include "Weights.i"
+%include "RnaSuite.i"