diff --git a/.gitignore b/.gitignore index 6af7d83c3..6527c9ce7 100644 --- a/.gitignore +++ b/.gitignore @@ -160,6 +160,7 @@ Tools/xy_rdf Tools/model-select Tools/dihedrals Tools/esp_mesh +Tools/rna_suites Packages/Clustering/** !Packages/Clustering/*.cpp !Packages/Clustering/*.hpp @@ -272,6 +273,7 @@ Tools/coverlap Tools/subsetter Tools/xy_rdf Tools/model-select +Tools/dihedrals Packages/ElasticNetworks/heavy-ca Packages/ElasticNetworks/psf-masses Packages/ElasticNetworks/vsa diff --git a/SConstruct b/SConstruct index bfa0bc7f7..8dc54aab4 100644 --- a/SConstruct +++ b/SConstruct @@ -274,7 +274,9 @@ else: loos_tools = SConscript('Tools/SConscript') -loos_core = loos + loos_scripts +loos_share = SConscript('share/SConscript') + +loos_core = loos + loos_scripts + loos_share # Automatically setup build targets based on package_list diff --git a/Tools/SConscript b/Tools/SConscript index cdb1d562c..0556e9f77 100644 --- a/Tools/SConscript +++ b/Tools/SConscript @@ -34,7 +34,8 @@ apps = apps + ' drifter porcupine ramachandran renum-pdb exposure clipper rebond apps = apps + ' traj2pdb merge-traj center-molecule contact-time perturb-structure coverlap phase-pdb' apps = apps + ' big-svd kurskew periodic_box area_per_lipid residue-contact-map' apps = apps + ' cross-dist fcontacts serialize-selection transition_contacts fixdcd smooth-traj membrane_map packing_score' -apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap esp_mesh dihedrals' +apps = apps + ' mops dibmops xtcinfo model-meta-stats verap lipid_survival multi-rmsds rms-overlap' +apps = apps + ' esp_mesh dihedrals rna_suites' list = [] diff --git a/Tools/dihedrals.cpp b/Tools/dihedrals.cpp index a412e277c..e52988b3f 100644 --- a/Tools/dihedrals.cpp +++ b/Tools/dihedrals.cpp @@ -477,4 +477,4 @@ int main(int argc, char *argv[]) { for (auto v_fps : vv_filePtrs) for (auto p_ofs : v_fps) p_ofs->close(); -} \ No newline at end of file +} diff --git a/Tools/rna_suites.cpp b/Tools/rna_suites.cpp new file mode 100644 index 000000000..efb7cd531 --- /dev/null +++ b/Tools/rna_suites.cpp @@ -0,0 +1,248 @@ +/* + rna_suites.cpp + + Assigns backbone suites to RNAs based on backbone dihedrals + + Chapin E. Cavender 2020-03 +*/ + +/* + + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008-2020 Tod D. Romo & Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; +using namespace loos; + +namespace opts = loos::OptionsFramework; +namespace po = loos::OptionsFramework::po; + +string fullHelpMessage(void) { + + string full_help_message = +"SYNOPSIS\n" +" Assign backbone suites to RNAs based on backbone dihedrals.\n" +"\n" +"DESCRIPTION\n" +" The goal of this tool is to assign continuous RNA dinucleotides to a\n" +" cluster called a \"suite\" based on the conformation of backbone dihedrals.\n" +" The idea comes from Richardson et al. (2008) RNA 14, 465-481. The\n" +" dinucleotide for a residue runs from delta (C5'-C4'-C3'-O3') of the previous\n" +" residue to delta of the current residue, encompassing seven continuous\n" +" dihedrals. A suite is a pre-defined cluster in this 7D space, named by a\n" +" two-character string. Examples are \"1a\" or \"5z\".\n" +"\n" +" The first step is to search the given selection for RNA backbone atoms, i.e.\n" +" atoms named \"P\", \"O5'\", \"C5'\", \"C4'\", \"C3'\", or \"O3'\". These atoms are\n" +" split by residue. Valid dinucleotides are sets of delta-to-delta backbone\n" +" atoms with sequential resids. Once the set of valid dinucleotides is\n" +" determined, the tool will loop over the trajectory and assign each\n" +" dinucleotide to a suite for each frame.\n" +"\n" +" Suite assignment occurs in two stages. The clusters are well-separated in\n" +" the 3D subspace of delta(i-1), delta, and gamma. So the first stage is to\n" +" assign each delta to one of two ranges of values consistent with either a\n" +" C3'-endo (3) or C2'-endo (2) sugar pucker and to assign gamma to one of\n" +" three ranges of values: gauche plus (p), gauche minus (m), or trans (t). The\n" +" result is a three-character string called a ddg index. Examples are \"33p\"\n" +" or \"23t\". Then, the dinucleotide is assigned to one of a possible set of\n" +" suites associated with its ddg index based on a scaled hyperellipsoid\n" +" distance in the dual 4D subspace of epsilon, zeta, alpha, and beta.\n" +"\n" +" Some suites have overlapping hyperellipsoids of different sizes. The wider\n" +" suite is called a dominant suite, and the narrower suite is called a\n" +" satellite suite. These cases are handled by rescaling the hyperellipsoid\n" +" distance along the dimensions in which the overlap occurs.\n" +"\n" +" If a dinucleotide doesn't fit into one of the allowed ranges for a dihedral,\n" +" it is assigned as an outlier and given a suite name \"!s\", where \"s\" is the\n" +" first character of the name of the deviant dihedral, e.g. \"!a\" for a bad\n" +" alpha. If the dinucleotide is not close to any of the reference suites, it \n" +" is also assigned as an outlier and given a suite name \"!!\".\n" +"\n" +" After assignment, each dinucleotide is given a goodness-of-fit score called \n" +" the suiteness based on the scaled 7D hyperellipsoid distance to its assigned\n" +" suite. A suiteness of one indicates that the dinucleotide is at the cluster\n" +" center. Lower suiteness indicates that the dinucleotide is farther from the\n" +" cluster center. An outlier has a suiteness of zero, and assigned\n" +" dinucleotides have a minimum suiteness score (set by the -c option) to\n" +" differentiate them from outliers.\n" +"\n" +" It is necessary to specify a path to a file containing definitions for the\n" +" reference suites on the command-line. The format is explained in the next\n" +" section. An example of the format that implements the suites as defined in\n" +" the software suitename (Richardson et al. (2008) RNA 14, 465-481) is\n" +" included as share/suitename_definitions.dat in the top-level directory of\n" +" the LOOS source tree. If installing within a conda environment, this file\n" +" can also be found in $CONDA_PREFIX/share/loos/suitename_definitions.dat;\n" +" otherwise, it can be found in $LOOS/share/suitename_definitions.dat. The\n" +" suitename_defintions.dat file should be sufficient for typical users, but\n" +" you must specify the path to it as the first positional argument.\n" +"\n" +"SUITE DEFINITON FILE FORMAT\n" +" Each line in the file is parsed as a record containing fields with a width\n" +" of eight characters. Blank lines and lines beginning with \"#\" are ignored.\n" +" The first field specifies the type of record and must be one of \"suite\",\n" +" \"width\", \"domsat\", \"delta\", \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\".\n" +" These records and their associated fields are described below.\n" +"\n" +" suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i)\n" +" Define a reference suite with suite name given in field 2, ddg index\n" +" given in field 3, and dihedrals of the cluster center given in fields 4\n" +" through 10.\n" +"\n" +" width delta(i-1) epsilon zeta alpha beta gamma delta\n" +" Define default widths for scaled hyperellipsoid distances.\n" +"\n" +" domsat sat_name dom_name dihedral_index sat_width dom_width\n" +" Define dominant-satellite pair with name of satellite suite in field 2,\n" +" name of dominant suite in field 3, index of dihedral dimension with\n" +" altered width in field 4, width of that dimension for satellite suite\n" +" in field 5, and width of that dimension for dominant suite in field 6.\n" +" Additional dimensions and widths can be specified in fields 7 through 9,\n" +" fields 10 through 12, etc.\n" +"\n" +" dihedral min max\n" +" Define allowed ranges for a dihedral. \"dihedral\" can be one of \"delta\",\n" +" \"epsilon\", \"zeta\", \"alpha\", \"beta\", or \"gamma\". The minimum value\n" +" is given in field 2 and maximum value in field 3.\n" +"\n" +"EXAMPLES\n" +" rna_suites $CONDA_PREFIX/share/loos/suitename_defintions.dat foo.pdb foo.dcd\n" +" Assign backbone suites using the install prefix from a conda install.\n" +"\n" +" rna_suites -s 'resid <= 10' $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n" +" foo.pdb foo.dcd\n" +" Assign backbone suites only for the first 10 residues.\n" +"\n" +" rna_suites -c 0.001 $CONDA_PREFIX/share/loos/suitename_defintions.dat \\\n" +" foo.pdb foo.dcd\n" +" Assign backbone suites using a minimum suiteness of 0.001 for\n" +" non-outliers.\n"; + + return full_help_message; + +} + +class ToolOptions : public opts::OptionsPackage { + +public: + + ToolOptions() {} + + void addGeneric(po::options_description& o) { + + o.add_options() + ("suiteness_cutoff,c", + po::value(&suiteness_cutoff)->default_value(0.01), + "Cutoff for the suiteness score of non-outliers") + ; + + } + + string print() const { + + ostringstream oss; + oss << boost::format( + "suiteness_cutoff=%f" + ) % suiteness_cutoff; + return (oss.str()); + + } + + double suiteness_cutoff; + +}; // ToolOptions + +// Tool functions + +int main(int argc, char *argv[]) { + + // Get command-line input + string header = invocationHeader(argc, argv); + + // Set up tool options + opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage()); + opts::BasicSelection *sopts = new opts::BasicSelection("!hydrogen"); + opts::RequiredArguments *ropts = new opts::RequiredArguments; + ropts->addArgument("suite_def", "suite_definition_file"); + opts::TrajectoryWithFrameIndices *tropts = + new opts::TrajectoryWithFrameIndices; + ToolOptions *topts = new ToolOptions; + + opts::AggregateOptions options; + options.add(bopts).add(sopts).add(ropts).add(tropts).add(topts); + if (!options.parse(argc, argv)) + exit(-1); + + // Assign tool options to variables + const double suiteness_cutoff = topts->suiteness_cutoff; + + // Print command-line input + cout << "# " << header << endl; + + // Build LOOS system and generate atom selection + AtomicGroup model = tropts->model; + pTraj traj = tropts->trajectory; + vector indices = tropts->frameList(); + AtomicGroup rna_atoms = selectAtoms(model, sopts->selection); + + // Create RNASuite object from RNA atoms + string suite_definition = ropts->value("suite_def"); + RnaSuite rna_suite = RnaSuite(rna_atoms, suite_definition, suiteness_cutoff); + vector suite_resids = rna_suite.getSuiteResids(); + vector suite_resnames = rna_suite.getSuiteResnames(); + //rna_suite.printReferenceSuites(); + + // Print dihedrals + //rna_suite.printBackboneAtoms(); + + // Print column headers + cout << "# Frame Resid Resname Suite DDG_index Suiteness" << endl; + + // Loop over trajectory + vector suite_names; + vector suite_ddgs; + vector suiteness; + uint t = 0; + for (vector::iterator i = indices.begin(); i != indices.end(); ++i) { + + traj->readFrame(*i); + traj->updateGroupCoords(model); + + rna_suite.calculateBackboneDihedrals(); + rna_suite.assignSuitenameSuites(); + suite_names = rna_suite.getSuiteNames(); + suite_ddgs = rna_suite.getSuiteDDGs(); + suiteness = rna_suite.getSuitenessScores(); + + for (uint j = 0; j < suite_resids.size(); ++j) + cout << boost::format("%5d %5d %3s %2s %2s %8.6f") % t + % suite_resids[j] % suite_resnames[j] % suite_names[j] + % suite_ddgs[j] % suiteness[j] << endl; + + ++t; + + } + +} + diff --git a/share/SConscript b/share/SConscript new file mode 100644 index 000000000..0391a753a --- /dev/null +++ b/share/SConscript @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# This file is part of LOOS. +# +# LOOS (Lightweight Object-Oriented Structure library) +# Copyright (c) 2008, Tod D. Romo +# Department of Biochemistry and Biophysics +# School of Medicine & Dentistry, University of Rochester +# +# This package (LOOS) is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation under version 3 of the License. +# +# This package is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import sys + +Import('env') +Import('loos') + +clone = env.Clone() +clone.Prepend(LIBS = [loos]) + +files = 'suitename_definitions.dat' +PREFIX = env['PREFIX'] + +if env.USING_CONDA: + share_path = os.path.join(PREFIX, "share", "loos") +else: + share_path = os.path.join(PREFIX, "share") + +shared_files = env.Install(share_path, Split(files)) + +shared_list = Split(files) + +Return('shared_list') diff --git a/share/suitename_definitions.dat b/share/suitename_definitions.dat new file mode 100644 index 000000000..1969a283f --- /dev/null +++ b/share/suitename_definitions.dat @@ -0,0 +1,82 @@ +# Reference suite dihedrals +# Name DDG Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +suite 1a 33p 81.495 212.250 288.831 294.967 173.990 53.550 81.035 +suite 1m 33p 83.513 218.120 291.593 292.247 222.300 58.067 86.093 +suite 1L 33p 85.664 245.014 268.257 303.879 138.164 61.950 79.457 +suite &a 33p 82.112 190.682 264.945 295.967 181.839 51.455 81.512 +suite 7a 33p 83.414 217.400 222.006 302.856 160.719 49.097 82.444 +suite 3a 33p 85.072 216.324 173.276 289.320 164.132 45.876 84.956 +suite 9a 33p 83.179 210.347 121.474 288.568 157.268 49.347 81.047 +suite 1g 33p 80.888 218.636 290.735 167.447 159.565 51.326 85.213 +suite 7d 33p 83.856 238.750 256.875 69.562 170.200 52.800 85.287 +suite 3d 33p 85.295 244.085 203.815 65.880 181.130 54.680 86.035 +suite 5d 33p 79.671 202.471 63.064 68.164 143.450 49.664 82.757 +suite 3g 33p 84.000 195.000 146.000 170.000 170.000 52.000 84.000 +suite 1e 33t 80.514 200.545 280.510 249.314 82.662 167.890 85.507 +suite 1c 33t 80.223 196.591 291.299 153.060 194.379 179.061 83.648 +suite 1f 33t 81.395 203.030 294.445 172.195 138.540 175.565 84.470 +suite 5j 33t 87.417 223.558 80.175 66.667 109.150 176.475 83.833 +suite 5n 33t 86.055 246.502 100.392 73.595 213.752 183.395 85.483 +suite 33m +suite 1b 32p 84.215 215.014 288.672 300.420 177.476 58.307 144.841 +suite 1[ 32p 82.731 220.463 288.665 296.983 221.654 54.213 143.771 +suite 3b 32p 84.700 226.400 168.336 292.771 177.629 48.629 147.950 +suite 1z 32p 83.358 206.042 277.567 195.700 161.600 50.750 145.258 +suite 5z 32p 82.614 206.440 52.524 163.669 148.421 50.176 147.590 +suite 7p 32p 84.285 236.600 220.400 68.300 200.122 53.693 145.730 +suite 5p 32p 84.457 213.286 69.086 75.500 156.671 57.486 147.686 +suite 1t 32t 81.200 199.243 288.986 180.286 194.743 178.200 147.386 +suite 5q 32t 82.133 204.933 69.483 63.417 115.233 176.283 145.733 +suite 1o 32m 83.977 216.508 287.192 297.254 225.154 293.738 150.677 +suite 7r 32m 84.606 232.856 248.125 63.269 181.975 295.744 149.744 +suite 5r 32m 83.000 196.900 65.350 60.150 138.425 292.550 154.275 +suite 2a 23p 145.399 260.339 288.756 288.444 192.733 53.097 84.067 +suite 4a 23p 146.275 259.783 169.958 298.450 169.583 50.908 83.967 +suite 0a 23p 149.286 223.159 139.421 284.559 158.107 47.900 84.424 +suite #a 23p 148.006 191.944 146.231 289.288 150.781 42.419 84.956 +suite 4g 23p 148.028 256.922 165.194 204.961 165.194 49.383 82.983 +suite 6g 23p 145.337 262.869 79.588 203.863 189.688 58.000 84.900 +suite 8d 23p 148.992 270.596 240.892 62.225 176.271 53.600 87.262 +suite 4d 23p 149.822 249.956 187.678 80.433 198.133 61.000 89.378 +suite 6d 23p 146.922 241.222 88.894 59.344 160.683 52.333 83.417 +suite 2g 23p 141.900 258.383 286.517 178.267 165.217 48.350 84.783 +suite 2h 23t 147.782 260.712 290.424 296.200 177.282 175.594 86.565 +suite 4n 23t 143.722 227.256 203.789 73.856 216.733 194.444 80.911 +suite 0i 23t 148.717 274.683 100.283 80.600 248.133 181.817 82.600 +suite 6n 23t 150.311 268.383 84.972 63.811 191.483 176.644 85.600 +suite 6j 23t 141.633 244.100 66.056 71.667 122.167 182.200 83.622 +suite 0k 23m 149.070 249.780 111.520 278.370 207.780 287.820 86.650 +suite 2[ 22p 146.383 259.402 291.275 291.982 210.048 54.412 147.760 +suite 4b 22p 145.256 244.622 162.822 294.159 171.630 45.900 145.804 +suite 0b 22p 147.593 248.421 112.086 274.943 164.764 56.843 146.264 +suite 4p 22p 150.077 260.246 213.785 71.900 207.638 56.715 148.131 +suite 6p 22p 146.415 257.831 89.597 67.923 173.051 55.513 147.623 +suite 2z 22p 142.900 236.550 268.800 180.783 185.133 54.467 143.350 +suite 4s 22t 149.863 247.562 170.488 277.938 84.425 176.413 148.087 +suite 2u 22t 143.940 258.200 298.240 279.640 183.680 183.080 145.120 +suite 2o 22m 147.342 256.475 295.508 287.408 194.525 293.725 150.458 +# Default widths for hyperellipsoid distance +# Delta-1 Epsilon Zeta Alpha Beta Gamma Delta +width 28.000 60.000 55.000 50.000 70.000 35.000 28.000 +# Dominant-satellite pairs. Must come after suite and width +# Satname Domname DihedralSatwidthDomwidthDihedralSatwidthDomwidth +domsat 1m 1a 4 32.000 64.000 +domsat 1L 1a 1 18.000 70.000 4 18.000 70.000 +domsat &a 1a 1 20.000 60.000 2 20.000 60.000 +domsat 1f 1c 4 47.000 65.000 +domsat 1[ 1b 4 34.000 56.000 +domsat 4a 0a 1 40.000 50.000 2 40.000 50.000 +domsat #a 0a 1 26.000 36.000 2 26.000 36.000 +domsat 0i 6n 4 60.000 60.000 +domsat 6j 6n 4 60.000 60.000 +# Filter ranges +# Min Max +delta 60.000 105.000 +delta 125.000 165.000 +epsilon 155.000 310.000 +zeta 25.000 335.000 +alpha 25.000 335.000 +beta 50.000 290.000 +gamma 20.000 95.000 +gamma 140.000 215.000 +gamma 260.000 335.000 diff --git a/src/RnaSuite.cpp b/src/RnaSuite.cpp new file mode 100644 index 000000000..1b2259309 --- /dev/null +++ b/src/RnaSuite.cpp @@ -0,0 +1,1038 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +using namespace std; + +namespace loos { + + // |------------------------------------------------------------------------ + // | Constructors + // |------------------------------------------------------------------------ + + RnaSuite::RnaSuite(const AtomicGroup &group, const string suite_definition, + const double suiteness_cutoff_) { + + suiteness_cutoff = suiteness_cutoff_; + defineSuites(suite_definition); + extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite(const AtomicGroup &group, + const string suite_definition) { + + suiteness_cutoff = 0.01; + defineSuites(suite_definition); + extractRnaBackboneAtoms(group); + + } + + RnaSuite::RnaSuite() { + + suiteness_cutoff = 0.01; + + } + + // |------------------------------------------------------------------------ + // | Methods + // |------------------------------------------------------------------------ + + size_t RnaSuite::assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index) { + + size_t i = 0; + while (i < min.size()) { + + if (dihedral >= min[i] && dihedral <= max[i]) { + + ddg_index += i * increment; + return i; + + } + + ++i; + + } + + return i; + + } // assignDDGIndex() + + void RnaSuite::assignSuitenameSuites() { + + size_t N_delta = delta_min.size(); + size_t N_gamma = gamma_min.size(); + size_t N_dg = N_delta * N_gamma; + vector suite(7); + + // Index into delta(i-1), delta, gamma clusters + uint ddg_index; + + // Scaled 4D hyperellipsoid distance in epsilon, zeta, alpha, beta + double dist_ezab; + + // Closest scaled 4D hyperellipsoid distance to a cluster and index of + // the associated cluster + double min_dist_ezab; + size_t min_index; + + // Closest scaled 4D hyperellipsoid distance to a dominant cluster + double dom_min_dist_ezab; + size_t dom_min_index; + + // Closest scaled 4D hyperellipsoid distance to a non-dominant cluster + double sat_min_dist_ezab; + size_t sat_min_index; + + // Index into vector of widths for pair of dominant-satellite clusters + size_t dom_sat_index; + + // Number of clusters this dinucleotide could belong to + uint candidates; + + // Scaled 7D hyperellipsoid distance + double dist_7; + + // Index of the assigned suite + size_t assigned_suite_index; + + // Goodness-of-fit for assigned suite + double suiteness_score; + + if (suite_dihedrals.empty()) { + + cerr << "Warning: backbone dihedrals are empty" << endl; + return; + + } + + // Initialize vectors of suite names and suiteness scores + suite_names.clear(); + suite_ddgs.clear(); + suiteness.clear(); + suite_names.reserve(N_suite); + suite_ddgs.reserve(N_suite); + suiteness.reserve(N_suite); + + for (size_t i = 0; i < N_suite; ++i) { + + // Assign delta(j-1), delta, gamma index. These 3 dihedrals have + // 12 clusters that are independent of the other 4 dihedrals. + ddg_index = 0; + + // Filter on 5' delta. Values outside of this range are + // indicative of incorrect stereochemistry in the ribose. + if (assignDDGIndex(suite_dihedrals[i][0], delta_min, delta_max, + N_dg, ddg_index) == N_delta) { + + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on 3' delta + if (assignDDGIndex(suite_dihedrals[i][6], delta_min, delta_max, + N_gamma, ddg_index) == N_delta) { + + suite_names.push_back("!d"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on gamma + if (assignDDGIndex(suite_dihedrals[i][5], gamma_min, gamma_max, 1, + ddg_index) == N_gamma) { + + suite_names.push_back("!g"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on epsilon. Values outside of this range are + // indicative of a misfit sugar pucker. + if (suite_dihedrals[i][1] < ezab_min[0] + || suite_dihedrals[i][1] > ezab_max[0]) { + + suite_names.push_back("!e"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on zeta + if (suite_dihedrals[i][2] < ezab_min[1] + || suite_dihedrals[i][2] > ezab_max[1]) { + + suite_names.push_back("!z"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on alpha + if (suite_dihedrals[i][3] < ezab_min[2] + || suite_dihedrals[i][3] > ezab_max[2]) { + + suite_names.push_back("!a"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // Filter on beta + if (suite_dihedrals[i][4] < ezab_min[3] + || suite_dihedrals[i][4] > ezab_max[3]) { + + suite_names.push_back("!b"); + suite_ddgs.push_back("!!!"); + suiteness.push_back(0.0); + continue; + + } + + // If there are no clusters associated with this ddg_index, then + // this is an outlier + if (N_reference_suite[ddg_index] == 0) { + + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); + continue; + + } + + // Find closest cluster in epsilon, zeta, alpha, beta + // Largest distance in 7D is 688.66^3, so 10^9 should be safe + min_dist_ezab = 999999999.0; + dom_min_dist_ezab = 999999999.0; + sat_min_dist_ezab = 999999999.0; + min_index = N_reference_suite[ddg_index]; + dom_min_index = N_reference_suite[ddg_index]; + sat_min_index = N_reference_suite[ddg_index]; + candidates = 0; + + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) { + + // Get 4D scaled hyperellipsoid distance + dist_ezab = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][j], dihedral_width, 1, 4); + + // Get closest cluster + if (dist_ezab < min_dist_ezab) { + + min_dist_ezab = dist_ezab; + min_index = j; + + } + + // Get closest non-dominant cluster + if (dominant_suites[ddg_index][j] != j + && dist_ezab < sat_min_dist_ezab) { + + sat_min_dist_ezab = dist_ezab; + sat_min_index = j; + + } + + // If 4D distance < 1, this reference suite is a candidate + if (dist_ezab < 1) { + + ++candidates; + + // Is this candidate a dominant cluster? + if (dominant_suites[ddg_index][j] == j) { + + dom_min_dist_ezab = dist_ezab; + dom_min_index = j; + + } + + } + + } // loop over reference suites + + // Assign membership to a reference suite + + // If there are multiple candidates, and the two canidates are + // a dominant-satellite pair, then reweight distances + if (candidates > 1 && dom_min_index != N_reference_suite[ddg_index] + && sat_min_index != N_reference_suite[ddg_index] + && dominant_suites[ddg_index][sat_min_index] == dom_min_index) { + + // Is the DNMP in between the dominant and satellite suites? + if (isBetweenDomSatPair(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + reference_dihedrals[ddg_index][sat_min_index])) { + + // Rescale distances from point to dominant and satellite + // suites by ratio of distances from suite centers to + // boundary plane and assign to closest of the two + dom_sat_index = dom_sat_pair_index[ddg_index][sat_min_index]; + if (hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][sat_min_index], + satellite_width[dom_sat_index], 1, 4) + <= hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][dom_min_index], + dominant_width[dom_sat_index], 1, 4)) + + assigned_suite_index = sat_min_index; + + else assigned_suite_index = dom_min_index; + + + } + + else { + + // Assign to closer of dominant or satellite suite + if (sat_min_dist_ezab <= dom_min_dist_ezab) + assigned_suite_index = sat_min_index; + else assigned_suite_index = dom_min_index; + + } + + } + + // If there is zero or one candidate or multiple candidates but no + // dominant-satellite pair, then assign to the closest suite + else assigned_suite_index = min_index; + + // Make a final decision on whether this is an outlier using 7D + // hyperellipsoid distance + dist_7 = hyperellipsoidDist(suite_dihedrals[i], + reference_dihedrals[ddg_index][assigned_suite_index], + dihedral_width, 0, 6); + + if (dist_7 < 1) { + + suite_names.push_back( + reference_names[ddg_index][assigned_suite_index]); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness_score = (1 + cos(M_PI * cbrt(dist_7))) / 2.0; + if (suiteness_score < suiteness_cutoff) + suiteness_score = suiteness_cutoff; + suiteness.push_back(suiteness_score); + + } else { + + suite_names.push_back("!!"); + suite_ddgs.push_back(reference_ddgs[ddg_index]); + suiteness.push_back(0.0); + + } + + } // loop over suites + + } // assignSuitenameSuites() + + void RnaSuite::calculateBackboneDihedrals() { + + double prev_delta; + vector suite(7); + + // Clear vector of doubles for suite backbone dihedrals + suite_dihedrals.clear(); + + for (size_t i = 0; i < N_continuous_group; ++i) { + + prev_delta = calculateDihedral(delta_atoms[i][0]); + + for (size_t j = 0; j < N_residue[i]; ++j) { + + suite[0] = prev_delta; + suite[1] = calculateDihedral(epsilon_atoms[i][j]); + suite[2] = calculateDihedral(zeta_atoms[i][j]); + suite[3] = calculateDihedral(alpha_atoms[i][j]); + suite[4] = calculateDihedral(beta_atoms[i][j]); + suite[5] = calculateDihedral(gamma_atoms[i][j]); + prev_delta = calculateDihedral(delta_atoms[i][j + 1]); + suite[6] = prev_delta; + suite_dihedrals.push_back(suite); + + } + + } + + } // calculateBackboneDihedrals() + + double RnaSuite::calculateDihedral(const AtomicGroup &group) { + + double dihedral = Math::torsion(group[0], group[1], group[2], group[3]); + if (dihedral < 0.0) dihedral += 360.0; + return dihedral; + + } // calculateDihedral() + + void RnaSuite::checkContinuousGroupSize( + const vector > &group_vector, + const size_t target_size, const string dihedral_name) const { + + if (group_vector.size() != target_size) { + + cerr << boost::format("Error: different number of continuous " + "groups for alpha (%d) and %s (%d)") % target_size + % dihedral_name % group_vector.size() << endl; + throw(LOOSError()); + + } + + } // checkContinuousGroupSize() + + void RnaSuite::checkResidueSize( + const vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const { + + if (residue_vector.size() != target_size) { + + cerr << boost::format("Error: different number of residues in " + "continuous group %d for alpha (%d) and %s (%d)") % group_index + % target_size % dihedral_name % residue_vector.size() << endl; + throw(LOOSError()); + + } + + } // checkResidueSize() + + void RnaSuite::defineSuites(const string& suite_definition) { + + // Clear vectors for reference suites + reference_dihedrals.clear(); + reference_names.clear(); + reference_ddgs.clear(); + dihedral_width.clear(); + dominant_suites.clear(); + dom_sat_pair_index.clear(); + dominant_width.clear(); + satellite_width.clear(); + delta_min.clear(); + delta_max.clear(); + gamma_min.clear(); + gamma_max.clear(); + ezab_min = vector(4); + ezab_max = vector(4); + N_reference_ddg = 0; + N_reference_suite.clear(); + + // Temporary variables for parsing lines from the definition file + size_t ddg_index; + size_t dom_index; + size_t sat_index; + size_t position; + string field; + string line; + string record; + vector dihedrals(7); + + // Store dominant-satellite pairs + vector domsat_ddg; + vector domsat_dom; + vector domsat_sat; + vector > domsat_dihedral; + vector > domsat_dom_width; + vector > domsat_sat_width; + + // Read file contents + ifstream ifs(suite_definition.c_str()); + if (!ifs) throw(FileOpenError(suite_definition)); + + while (getline(ifs, line)) { + + record = parseStringAs(line, 0, 8); + + if (record.empty() || record[0] == '#') continue; + + else if (record == "suite") { + + // Define a reference suite + + // Get delta delta gamma cluster + field = parseStringAs( + line, 16, min((size_t) 8, line.size() - 16)); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) + if (field == reference_ddgs[i]) { + ddg_index = i; + break; + } + + // This is a new DDG cluster + if (ddg_index == N_reference_ddg) { + reference_ddgs.push_back(field); + reference_dihedrals.push_back(vector >()); + reference_names.push_back(vector()); + ++N_reference_ddg; + N_reference_suite.push_back(0); + } + + // Get suite name + field = parseStringAs(line, 8, 8); + if (field.empty()) continue; + reference_names[ddg_index].push_back(field); + + // Get reference suite dihedrals + dihedrals[0] = parseStringAs(line, 24, 8); + dihedrals[1] = parseStringAs(line, 32, 8); + dihedrals[2] = parseStringAs(line, 40, 8); + dihedrals[3] = parseStringAs(line, 48, 8); + dihedrals[4] = parseStringAs(line, 56, 8); + dihedrals[5] = parseStringAs(line, 64, 8); + dihedrals[6] = parseStringAs(line, 72, 8); + reference_dihedrals[ddg_index].push_back(dihedrals); + + ++N_reference_suite[ddg_index]; + + } else if (record == "width") { + + // Get default widths for hyperellipsoid distance + dihedral_width.push_back(parseStringAs(line, 8, 8)); + dihedral_width.push_back(parseStringAs(line, 16, 8)); + dihedral_width.push_back(parseStringAs(line, 24, 8)); + dihedral_width.push_back(parseStringAs(line, 32, 8)); + dihedral_width.push_back(parseStringAs(line, 40, 8)); + dihedral_width.push_back(parseStringAs(line, 48, 8)); + dihedral_width.push_back(parseStringAs(line, 56, 8)); + + } else if (record == "domsat") { + + // Define a dominant-satellite pair + + // Get index of dominant suite + field = parseStringAs(line, 16, 8); + ddg_index = N_reference_ddg; + for (size_t i = 0; i < N_reference_ddg; ++i) { + for (size_t j = 0; j < N_reference_suite[i]; ++j) + if (field == reference_names[i][j]) { + ddg_index = i; + dom_index = j; + break; + } + if (ddg_index != N_reference_ddg) break; + } + + if (ddg_index == N_reference_ddg) { + cerr << boost::format( + "Warning: dominant suite %s was not defined in file %s") + % field % suite_definition << endl; + continue; + } + + // Get index of satellite suite + field = parseStringAs(line, 8, 8); + sat_index = N_reference_suite[ddg_index]; + for (size_t j = 0; j < N_reference_suite[ddg_index]; ++j) + if (field == reference_names[ddg_index][j]) { + sat_index = j; + break; + } + + if (sat_index == N_reference_suite[ddg_index]) { + cerr << boost::format( + "Warning: satellite suite %s was not defined in file %s") + % field % suite_definition << endl; + continue; + } + + domsat_ddg.push_back(ddg_index); + domsat_dom.push_back(dom_index); + domsat_sat.push_back(sat_index); + + // Loop over dihedrals with alternate widths + vector dihedral_indices; + vector dom_width; + vector sat_width; + position = 24; + while (position < line.size()) { + dihedral_indices.push_back( + parseStringAs(line, position, 8)); + sat_width.push_back( + parseStringAs(line, position + 8, 8)); + dom_width.push_back( + parseStringAs(line, position + 16, 8)); + position += 24; + } + domsat_dihedral.push_back(dihedral_indices); + domsat_dom_width.push_back(dom_width); + domsat_sat_width.push_back(sat_width); + + } else if (record == "delta") { + + delta_min.push_back(parseStringAs(line, 8, 8)); + delta_max.push_back(parseStringAs(line, 16, 8)); + + } else if (record == "epsilon") { + + ezab_min[0] = parseStringAs(line, 8, 8); + ezab_max[0] = parseStringAs(line, 16, 8); + + } else if (record == "zeta") { + + ezab_min[1] = parseStringAs(line, 8, 8); + ezab_max[1] = parseStringAs(line, 16, 8); + + } else if (record == "alpha") { + + ezab_min[2] = parseStringAs(line, 8, 8); + ezab_max[2] = parseStringAs(line, 16, 8); + + } else if (record == "beta") { + + ezab_min[3] = parseStringAs(line, 8, 8); + ezab_max[3] = parseStringAs(line, 16, 8); + + } else if (record == "gamma") { + + gamma_min.push_back(parseStringAs(line, 8, 8)); + gamma_max.push_back(parseStringAs(line, 16, 8)); + + } else cerr << boost::format( + "Warning: Unrecognized record %s in suite definition from %s") + % record % suite_definition << endl; + + } // Loop over lines in file + + // Construct vectors for dominant-satellite pairs + for (size_t i = 0; i < N_reference_ddg; ++i) { + dominant_suites.push_back( + vector(N_reference_suite[i], N_reference_suite[i])); + dom_sat_pair_index.push_back( + vector(N_reference_suite[i], domsat_dihedral.size())); + } + + for (size_t i = 0; i < domsat_dihedral.size(); ++i) { + dominant_suites[domsat_ddg[i]][domsat_dom[i]] = domsat_dom[i]; + dominant_suites[domsat_ddg[i]][domsat_sat[i]] = domsat_dom[i]; + dom_sat_pair_index[domsat_ddg[i]][domsat_sat[i]] = i; + dominant_width.push_back(dihedral_width); + satellite_width.push_back(dihedral_width); + for (size_t j = 0; j < domsat_dihedral[i].size(); ++j) { + dominant_width[i][domsat_dihedral[i][j]] = domsat_dom_width[i][j]; + satellite_width[i][domsat_dihedral[i][j]] = domsat_sat_width[i][j]; + } + } + + } // defineSuites() + + void RnaSuite::extractRnaBackboneAtoms(const AtomicGroup &group) { + + vector continuous_alpha_atoms; + vector continuous_beta_atoms; + vector continuous_gamma_atoms; + vector continuous_delta_atoms; + vector continuous_epsilon_atoms; + vector continuous_zeta_atoms; + AtomicGroup dihedral_atoms; + AtomicGroup residue_p; + AtomicGroup residue_o5p; + AtomicGroup residue_c5p; + AtomicGroup residue_c4p; + AtomicGroup residue_c3p; + AtomicGroup residue_o3p; + AtomicGroup prev_residue_c4p; + AtomicGroup prev_residue_c3p; + AtomicGroup prev_residue_o3p; + int current_resid = -2; + size_t residue_size; + + // True if this is the initial residue in a continuous group + bool first_res = true; + + // Clear vector of vectors of AtomicGroups for each backbone dihedral + alpha_atoms.clear(); + beta_atoms.clear(); + gamma_atoms.clear(); + delta_atoms.clear(); + epsilon_atoms.clear(); + zeta_atoms.clear(); + + // Extract all RNA backbone atoms (P, O5', C5', C4', C3', and O3') into + // one AtomicGroup + AtomicGroup backbone = selectAtoms(group, + "(name =~ \"^(P|C[345]'|O[35]')$\")"); + + // Split by resid and loop over residues + vector backbone_residues = backbone.splitByResidue(); + for (size_t i = 0; i < backbone_residues.size(); ++i) { + + // Select RNA backbone atoms from residue + residue_p = selectAtoms(backbone_residues[i], "(name == \"P\")"); + residue_o5p = selectAtoms(backbone_residues[i], "(name == \"O5'\")"); + residue_c5p = selectAtoms(backbone_residues[i], "(name == \"C5'\")"); + residue_c4p = selectAtoms(backbone_residues[i], "(name == \"C4'\")"); + residue_c3p = selectAtoms(backbone_residues[i], "(name == \"C3'\")"); + residue_o3p = selectAtoms(backbone_residues[i], "(name == \"O3'\")"); + + // If any atom besides P is missing, skip this residue and start a + // new continuous group + if (residue_o5p.size() != 1 || residue_c5p.size() != 1 || + residue_c4p.size() != 1 || residue_c3p.size() != 1 || + residue_o3p.size() != 1) { + + first_res = true; + continue; + + } + + // If the resid is not sequential, this is not a continuous group + if (residue_p.size() != 1 + || residue_p[0]->resid() != current_resid + 1) first_res = true; + + if (first_res) { + + first_res = false; + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + // Clear vectors of AtomicGroups for this continuous groups + continuous_alpha_atoms.clear(); + continuous_beta_atoms.clear(); + continuous_gamma_atoms.clear(); + continuous_delta_atoms.clear(); + continuous_epsilon_atoms.clear(); + continuous_zeta_atoms.clear(); + + // Record delta for this initial residue + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } else { + + // Record backbone dihedrals for the remainder of the suite, + // i.e. epsilon and zeta of the previous residue and alpha, + // beta, gamma, and delta of the current residue + dihedral_atoms = prev_residue_c4p; + dihedral_atoms.append(prev_residue_c3p); + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + continuous_epsilon_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_c3p; + dihedral_atoms.append(prev_residue_o3p); + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + continuous_zeta_atoms.push_back(dihedral_atoms); + dihedral_atoms = prev_residue_o3p; + dihedral_atoms.append(residue_p); + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + continuous_alpha_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_p; + dihedral_atoms.append(residue_o5p); + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + continuous_beta_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_o5p; + dihedral_atoms.append(residue_c5p); + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + continuous_gamma_atoms.push_back(dihedral_atoms); + dihedral_atoms = residue_c5p; + dihedral_atoms.append(residue_c4p); + dihedral_atoms.append(residue_c3p); + dihedral_atoms.append(residue_o3p); + continuous_delta_atoms.push_back(dihedral_atoms); + + } + + // Save C4', C3', and O3' for dihedrals in the next residue + prev_residue_c4p = residue_c4p; + prev_residue_c3p = residue_c3p; + prev_residue_o3p = residue_o3p; + + // Update resid + current_resid = residue_o5p[0]->resid(); + + } // loop over residues + + // Record any previous continuous group + if (continuous_alpha_atoms.size() != 0) { + + alpha_atoms.push_back(continuous_alpha_atoms); + beta_atoms.push_back(continuous_beta_atoms); + gamma_atoms.push_back(continuous_gamma_atoms); + delta_atoms.push_back(continuous_delta_atoms); + epsilon_atoms.push_back(continuous_epsilon_atoms); + zeta_atoms.push_back(continuous_zeta_atoms); + + } + + // Get number of continuous groups and check that all dihedral groups + // have same size + N_continuous_group = alpha_atoms.size(); + checkContinuousGroupSize(beta_atoms, N_continuous_group, "beta"); + checkContinuousGroupSize(gamma_atoms, N_continuous_group, "gamma"); + checkContinuousGroupSize(delta_atoms, N_continuous_group, "delta"); + checkContinuousGroupSize(epsilon_atoms, N_continuous_group, "epsilon"); + checkContinuousGroupSize(zeta_atoms, N_continuous_group, "zeta"); + + // Get number of residues in each continuous group and check that these + // are consistent across backbone dihedrals. Delta should have one + // additional residue per continuous group. + N_residue.clear(); + suite_resids.clear(); + suite_resnames.clear(); + + for (size_t i = 0; i < N_continuous_group; ++i) { + + residue_size = alpha_atoms[i].size(); + checkResidueSize(beta_atoms[i], residue_size, "beta", i + 1); + checkResidueSize(gamma_atoms[i], residue_size, "gamma", i + 1); + checkResidueSize(delta_atoms[i], residue_size + 1, "delta", i + 1); + checkResidueSize(epsilon_atoms[i], residue_size, "epsilon", i + 1); + checkResidueSize(zeta_atoms[i], residue_size, "zeta", i + 1); + N_residue.push_back(residue_size); + + for (size_t j = 0; j < residue_size; ++j) { + + suite_resids.push_back(gamma_atoms[i][j][0]->resid()); + suite_resnames.push_back(gamma_atoms[i][j][0]->resname()); + + } + + } + + N_suite = suite_resids.size(); + + } // extractRnaBackboneAtoms() + + vector RnaSuite::getSuiteDDGs() const { + return suite_ddgs; + } // getSuiteDDGs() + + vector > RnaSuite::getSuiteDihedrals() const { + return suite_dihedrals; + } // getSuiteDihedrals() + + vector RnaSuite::getSuiteNames() const { + return suite_names; + } // getSuiteNames() + + vector RnaSuite::getSuiteResids() const { + return suite_resids; + } // getSuiteResids() + + vector RnaSuite::getSuiteResnames() const { + return suite_resnames; + } // getSuiteResnames() + + double RnaSuite::getSuitenessCutoff() const { + return suiteness_cutoff; + } // getSuitenessCutoff() + + vector RnaSuite::getSuitenessScores() const { + return suiteness; + } // getSuitenessScores() + + double RnaSuite::hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index) { + + double unscaled_diff; + double sum_scaled_powers = 0.0; + + for (uint i = first_index; i <= last_index; ++i) { + + unscaled_diff = abs(dihedrals[i] - reference[i]); + // suitename program does not wrap unscaled coordinates + // if (unscaled_diff > 180.0) unscaled_diff = 360.0 - unscaled_diff; + sum_scaled_powers += pow(unscaled_diff / width[i], 3.0); + + } + + return sum_scaled_powers; + + } // hyperellipsoidDist4() + + bool RnaSuite::isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite) { + + double dom_to_sat; + double dom_dot_product = 0; + double sat_dot_product = 0; + + // If the point is in between the dominant and satellite reference + // suites, then the dot product between the vectors (point - dominant) + // and (satellite - dominant) and the dot product between the vectors + // (point - satellite) and (dominant - satellite) should both be + // positive, i.e. the cosine of the angles is positive. + for (uint i = 1; i <= 4; ++i) { + + dom_to_sat = satellite[i] - dominant[i]; + dom_dot_product += (dihedrals[i] - dominant[i]) * dom_to_sat; + // sat_dot_product += (dihedrals[i] - satellite[i]) * sat_to_dom + // sat_to_dom = -dom_to_sat + sat_dot_product += (satellite[i] - dihedrals[i]) * dom_to_sat; + + } + + return dom_dot_product > 0 && sat_dot_product > 0; + + } // isBetweenDomSatPair() + + void RnaSuite::printBackboneAtoms() const { + + size_t i_plus; + size_t j_plus; + + cout << "\n ==== Printing backbone atoms ====\n" << endl; + + if (alpha_atoms.empty()) { + + cerr << "Warning: backbone atoms are empty" << endl; + return; + + } + + cout << boost::format("Number of continuous groups: %d\n") + % N_continuous_group; + + for (size_t i = 0; i < N_continuous_group; ++i) { + + i_plus = i + 1; + cout << boost::format("Continuous group %d has %d residues\n") + % i_plus % N_residue[i]; + + for (size_t j = 0; j < N_residue[i]; ++j) { + + j_plus = j + 1; + cout << boost::format("Delta %d %d\n") % i_plus % j_plus; + cout << delta_atoms[i][j] << "\n"; + cout << boost::format("Epsilon %d %d\n") % i_plus % j_plus; + cout << epsilon_atoms[i][j] << "\n"; + cout << boost::format("Zeta %d %d\n") % i_plus % j_plus; + cout << zeta_atoms[i][j] << "\n"; + cout << boost::format("Alpha %d %d\n") % i_plus % j_plus; + cout << alpha_atoms[i][j] << "\n"; + cout << boost::format("Beta %d %d\n") % i_plus % j_plus; + cout << beta_atoms[i][j] << "\n"; + cout << boost::format("Gamma %d %d\n") % i_plus % j_plus; + cout << gamma_atoms[i][j] << "\n"; + + } + + cout << boost::format("Delta %d %d\n") % i_plus + % (N_residue[i] + 1); + cout << delta_atoms[i][N_residue[i]] << "\n"; + + } + + } // printBackboneAtoms() + + void RnaSuite::printBackboneDihedrals() const { + + cout << "\n ==== Printing backbone dihedrals ====\n" << endl; + + if (suite_dihedrals.empty()) { + + cerr << "Warning: backbone dihedrals are empty" << endl; + return; + + } + + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format( + "%5d %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % suite_resids[i] % suite_resnames[i] % suite_dihedrals[i][0] + % suite_dihedrals[i][1] % suite_dihedrals[i][2] + % suite_dihedrals[i][3] % suite_dihedrals[i][4] + % suite_dihedrals[i][5] % suite_dihedrals[i][6]; + + } // printBackboneDihedrals() + + void RnaSuite::printReferenceSuites() const { + + cout << "\n ==== Printing reference suites ====\n" << endl; + + if (reference_dihedrals.empty()) { + + cerr << "Warning: reference suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < N_reference_ddg; ++i) + for (size_t j = 0; j < N_reference_suite[i]; ++j) + cout << boost::format( + "%2s %3s %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n") + % reference_names[i][j] % reference_ddgs[i] + % reference_dihedrals[i][j][0] + % reference_dihedrals[i][j][1] + % reference_dihedrals[i][j][2] + % reference_dihedrals[i][j][3] + % reference_dihedrals[i][j][4] + % reference_dihedrals[i][j][5] + % reference_dihedrals[i][j][6]; + + } // printReferenceSuites() + + void RnaSuite::printSuites() const { + + cout << "\n ==== Printing suites ====\n" << endl; + + if (suite_names.empty()) { + + cerr << "Warning: suites are empty" << endl; + return; + + } + + for (size_t i = 0; i < N_suite; ++i) + cout << boost::format("%5d %3s %2s %3s %8.6f %7.3f %7.3f %7.3f " + "%7.3f %7.3f %7.3f %7.3f\n") % suite_resids[i] + % suite_resnames[i] % suite_names[i] % suite_ddgs[i] + % suiteness[i] % suite_dihedrals[i][0] % suite_dihedrals[i][1] + % suite_dihedrals[i][2] % suite_dihedrals[i][3] + % suite_dihedrals[i][4] % suite_dihedrals[i][5] + % suite_dihedrals[i][6]; + + } // printSuites() + + void RnaSuite::setSuitenessCutoff(const double suiteness_cutoff_) { + + suiteness_cutoff = suiteness_cutoff_; + + } // setSuitenessCutoff() + +} + diff --git a/src/RnaSuite.hpp b/src/RnaSuite.hpp new file mode 100644 index 000000000..3da6f2762 --- /dev/null +++ b/src/RnaSuite.hpp @@ -0,0 +1,230 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Tod D. Romo, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#if !defined(LOOS_RNASUITE_HPP) +#define LOOS_RNASUITE_HPP + +#include +#include +#include + +using namespace std; + +namespace loos { + + //! Class for assigning backbone suites to an RNA + /** + * This class acts on an AtomicGroup and assigns backbone suites to any RNA + * residues present. It also calculates the "suiteness" score that + * describes how well the residue fits into its assigned suite. The + * constructor requires that the user specifies a path to a file defining + * reference suites. The suites from Richardson et al. (2008) RNA 14, + * 465-481 are included in $LOOS/share/suitename_definitions.dat + */ + class RnaSuite { + + public: + + RnaSuite(const AtomicGroup &group, const string suite_defintion, + const double suiteness_cutoff_); + + RnaSuite(const AtomicGroup &group, const string suite_definition); + + RnaSuite(); + + //! Method to assign residues to backbone suites from Richardson et al. + /** + * This method assigns residues to one of the reference suites defined + * in the constructor. The suite of a residue is defined from delta of + * the previous residue to delta of the current residue. + */ + void assignSuitenameSuites(); + + //! Method to calculate backbone dihedrals for each RNA residue + /** + * This method calculates the six RNA backbone dihedrals (i.e. alpha, + * beta, gamma, delta, epsilon, and zeta) for each residue. + */ + void calculateBackboneDihedrals(); + + //! Method to define suites used for assignment + /** + * This method defines reference suites. The argument must be a path to + * a file containing records consisting of fields with a width of eight + * characters. An example file for the suites defined in + * Richardson et al. (2008) RNA 14, 465-481 is included in + * $LOOS/share/suitename_definitions.dat. Records can be: + * + * suite name ddg delta(i-1) epsilon zeta alpha beta gamma delta(i) + * Define a reference suite with name given in field 2, ddg_index + * given in field 3, and dihedrals of the cluster center given in + * fields 4 through 10. + * + * width delta(i-1) epsilon zeta alpha beta gamma delta + * Define default widths for scaled hyperellipsoid distances. + * + * domsat sat_name dom_name dihedral_index sat_width dom_width + * Define dominant-satellite pair with name of satellite suite in + * field 2, name of dominant suite in field 3, index of dihedral + * dimension with altered width in field 4, width of that dimension + * for satellite suite in field 5, and width of that dimension for + * dominant suite in field 6. Additional dimensions and width can + * be specified in fields 7 through 9, fields 10 through 12, etc. + * + * dihedral min max + * Define allowed ranges for a dihedral. "dihedral" can be one of + * "delta", "epsilon", "zeta", "alpha", "beta", or "gamma". The + * minimum value is given in field 2 and maximum value in field 3. + */ + void defineSuites(const string& suite_definition); + + //! Method to extract RNA backbone atoms from an AtomicGroup + /** + * This method selects RNA backbone atoms (i.e. P, O5', C5', C4', C3', + * and O3') and splits them into AtomicGroups by residue id. + */ + void extractRnaBackboneAtoms(const AtomicGroup &group); + + //! Method to return the current indices into delta delta gamma clusters + vector getSuiteDDGs() const; + + //! Method to return the current backbone dihedrals + vector > getSuiteDihedrals() const; + + //! Method to return the current assigned suite names + vector getSuiteNames() const; + + //! Method to return the suite residue indices + vector getSuiteResids() const; + + //! Method to return the suite residue names + vector getSuiteResnames() const; + + //! Method to return the cutoff for the suiteness score of non-outliers + double getSuitenessCutoff() const; + + //! Method to return the current suiteness scores + vector getSuitenessScores() const; + + //! Method to print groups of backbone atoms for each dihedral + void printBackboneAtoms() const; + + //! Method to print backbone dihedrals for each residue + void printBackboneDihedrals() const; + + //! Method to print reference suite names and mean dihedrals + void printReferenceSuites() const; + + //! Method to print suite names, suiteness scores, and dihedrals + void printSuites() const; + + //! Method to set the cutoff for the suiteness score of non-outliers + void setSuitenessCutoff(const double suiteness_cutoff_); + + private: + + //! Method to assign residues to a delta(i-1), delta, gamma index + size_t assignDDGIndex(double dihedral, vector &min, + vector &max, uint increment, uint &ddg_index); + + //! Calculate a dihedral in deg from 4 atoms in the range [0, 360] + double calculateDihedral(const AtomicGroup &group); + + //! Method to check the size of a vector of continuous groups + void checkContinuousGroupSize( + const vector > &group_vector, + const size_t target_size, const string dihedral_name) const; + + //! Method to check the size of a vector of residues + void checkResidueSize(const vector &residue_vector, + const size_t target_size, const string dihedral_name, + const size_t group_index) const; + + //! Method to test whether a point is in between two reference points + bool isBetweenDomSatPair(const vector &dihedrals, + const vector &dominant, const vector &satellite); + + //! Calculate a scaled hyperellipsoid distance between two points + double hyperellipsoidDist(const vector &dihedrals, + const vector &reference, const vector &width, + uint first_index, uint last_index); + + // Reference suites used for assignment + vector >> reference_dihedrals; + vector > reference_names; + vector reference_ddgs; + + // Widths used to scale each dihedral dimension + vector dihedral_width; + + // Indices of dominant-satellite pairs + vector > dominant_suites; + + // Index into dominant-satellite pair widths + vector > dom_sat_pair_index; + + // Alternative widths used to scale dominant-satellite pairs + vector > dominant_width; + vector > satellite_width; + + // Boundaries for allowed regions of delta(i-1), delta, and gamma + vector delta_min; + vector delta_max; + vector gamma_min; + vector gamma_max; + + // Boundaries for allowed regions of epsilon, zeta, alpha, beta + vector ezab_min; + vector ezab_max; + + // Vector of continuous groups, composed of vectors of AtomicGroups + // for each residue within a continuous group + vector > alpha_atoms; + vector > beta_atoms; + vector > gamma_atoms; + vector > delta_atoms; + vector > epsilon_atoms; + vector > zeta_atoms; + + // Suite residue ids, residue names, and dihedrals + vector suite_resids; + vector suite_resnames; + vector > suite_dihedrals; + + // Assigned suite names, ddg indices, and suiteness scores + vector suite_names; + vector suite_ddgs; + vector suiteness; + + // Other internal variables + size_t N_reference_ddg; + vector N_reference_suite; + size_t N_continuous_group; + vector N_residue; + size_t N_suite; + double suiteness_cutoff; + + }; // RnaSuite class + +} + +#endif + diff --git a/src/RnaSuite.i b/src/RnaSuite.i new file mode 100644 index 000000000..444b78aed --- /dev/null +++ b/src/RnaSuite.i @@ -0,0 +1,27 @@ +/* + This file is part of LOOS. + + LOOS (Lightweight Object-Oriented Structure library) + Copyright (c) 2008, Alan Grossfield + Department of Biochemistry and Biophysics + School of Medicine & Dentistry, University of Rochester + + This package (LOOS) is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation under version 3 of the License. + + This package is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +%header %{ +#include +%} + +%include "RnaSuite.hpp" + diff --git a/src/SConscript b/src/SConscript index a77c125f3..28698b92d 100644 --- a/src/SConscript +++ b/src/SConscript @@ -41,6 +41,7 @@ apps = apps + ' charmm.cpp AtomicNumberDeducer.cpp OptionsFramework.cpp revision apps = apps + ' utils_random.cpp utils_structural.cpp LineReader.cpp xtcwriter.cpp alignment.cpp MultiTraj.cpp' apps = apps + ' index_range_parser.cpp' apps = apps + ' Weights.cpp' +apps = apps + ' RnaSuite.cpp' if (env['HAS_NETCDF']): apps = apps + ' amber_netcdf.cpp' @@ -71,6 +72,7 @@ hdr += ' xdr.hpp xtc.hpp gro.hpp trr.hpp exceptions.hpp MatrixOps.hpp sorting.hp hdr += ' Simplex.hpp charmm.hpp AtomicNumberDeducer.hpp OptionsFramework.hpp' hdr += ' utils_random.hpp utils_structural.hpp LineReader.hpp xtcwriter.hpp' hdr += ' trajwriter.hpp MultiTraj.hpp index_range_parser.hpp' +hdr += ' RnaSuite.hpp' if (env['HAS_NETCDF']): hdr += ' amber_netcdf.hpp' diff --git a/src/loos.hpp b/src/loos.hpp index df91f206e..8e00197a3 100644 --- a/src/loos.hpp +++ b/src/loos.hpp @@ -114,6 +114,7 @@ #include #include +#include #endif diff --git a/src/loos.i b/src/loos.i index c251fb2b2..adbec630f 100644 --- a/src/loos.i +++ b/src/loos.i @@ -52,7 +52,7 @@ namespace loos { %template(DoubleVectorMatrix) std::vector< std::vector >; %template(IntVector) std::vector; %template(UIntVector) std::vector; - +%template(StringVector) std::vector; %include "exceptions.i" @@ -81,3 +81,4 @@ namespace loos { %include "gro.i" %include "utils_structural.i" %include "Weights.i" +%include "RnaSuite.i"