diff --git a/src/isdb/EMMIVox.cpp b/src/isdb/EMMIVox.cpp new file mode 100644 index 0000000000..5cd6f7ce03 --- /dev/null +++ b/src/isdb/EMMIVox.cpp @@ -0,0 +1,1881 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2021 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed.org for more information. + + This file is part of plumed, version 2. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see . ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#ifdef __PLUMED_HAS_LIBTORCH +#include "colvar/Colvar.h" +#include "core/ActionRegister.h" +#include "core/PlumedMain.h" +#include "tools/Matrix.h" +#include "core/GenericMolInfo.h" +#include "core/ActionSet.h" +#include "tools/File.h" +#include "tools/OpenMP.h" +#include +#include +#include +#include +#include +#include "tools/Random.h" + +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +using namespace std; + +namespace PLMD { +namespace isdb { + +//+PLUMEDOC ISDB_COLVAR EMMIVOX +/* +Bayesian single-structure and ensemble refinement with cryo-EM maps. + +This action implements the Bayesian approach for single-structure and ensemble refinement from cryo-EM maps introduced in Ref. XXX. +EMMIVox does not require fitting the cryo-EM map with a Gaussian Mixture Model, as done in \ref EMMI, but uses directly the voxels in the deposited map. + +When run in single-replica mode, this action allows atomistic, flexible refinement (and B-factors inference) of an individual structure into a density map. +A coarse-grained forward model can also be used in combination with the MARTINI force field. +Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using +the Metainference approach \cite Bonomi:2016ip . The approach can be used to model continous dynamics of flexible regions as well as semi-ordered waters, lipids, and ions. + +\warning + To use EMMIVOX, PLUMED must be linked against the LibTorch library as described \ref ISDB "here" + +\par Examples + +Complete tutorials for single-structure and ensemble refinement can be found here: https://github.com/maxbonomi/EMMIVox + +*/ +//+ENDPLUMEDOC + +class EMMIVOX : public Colvar { + +private: + +// temperature in kbt + double kbt_; + double kbt0_; +// model - atom types + vector Model_type_; +// model - list of atom sigmas - one per atom type + vector Model_s_; +// model - list of atom weights - one per atom type + vector Model_w_; +// model - map between residue/chain IDs and list of atoms + map< pair, vector > Model_resmap_; +// model - list of residue/chain IDs per atom + vector< pair > Model_res_; +// model - list of neighboring voxels per atom + vector< vector > Model_nb_; +// model - map between residue/chain ID and bfactor + map< pair, double> Model_b_; +// model - global list of residue/chain IDs + vector< pair > Model_rlist_; +// model density + vector ovmd_; + +// data map - voxel position + vector Map_m_; +// data map - density + vector ovdd_; +// data map - error + vector exp_err_; + +// derivatives + vector ovmd_der_; + vector atom_der_; + vector score_der_; +// constants + double inv_sqrt2_, sqrt2_pi_, inv_pi2_; + vector pref_; + vector invs2_; + vector cfact_; + vector cut_; +// metainference + unsigned nrep_; + unsigned replica_; + vector ismin_; +// neighbor list + double nl_dist_cutoff_; + double nl_gauss_cutoff_; + unsigned nl_stride_; + bool first_time_; + vector< pair > nl_; + vector< pair > ns_; + vector refpos_; +// averaging + bool no_aver_; +// correlation; + bool do_corr_; +// Monte Carlo stuff + Random random_; + // Scale Monte Carlo + double scale_; + double scale_min_; + double scale_max_; + double dscale_; + double offset_; + double doffset_; + int MCSstride_; + double MCSaccept_; + double MCStrials_; +// Bfact Monte Carlo + double dbfact_; + double bfactmin_; + double bfactmax_; + double bfactsig_; + bool bfactnoc_; + bool bfactread_; + int MCBstride_; + double MCBaccept_; + double MCBtrials_; +// residue neighbor list + vector< vector > nl_res_; + unsigned bfactcount_; + unsigned bfactgroup_; + bool bfactemin_; +// Martini scattering factors + bool martini_; + // status stuff + unsigned int statusstride_; + string statusfilename_; + OFile statusfile_; + bool first_status_; + // total energy and virial + double ene_; + Tensor virial_; + double eps_; + // model density file + unsigned int mapstride_; + string mapfilename_; + // gpu/libtorch stuff + bool gpu_; + torch::Tensor ovmd_gpu_; + torch::Tensor ovmd_der_gpu_; + torch::Tensor ismin_gpu_; + torch::Tensor ovdd_gpu_; + torch::Tensor Map_m_gpu_; + torch::Tensor pref_gpu_; + torch::Tensor invs2_gpu_; + torch::Tensor nl_id_gpu_; + torch::Tensor nl_im_gpu_; + torch::Tensor pref_nl_gpu_; + torch::Tensor invs2_nl_gpu_; + torch::Tensor Map_m_nl_gpu_; + torch::DeviceType device_t_; + // annealing stuff + unsigned anneal_; + double anneal_kbt_; +// +// write file with model density + void write_model_density(long int step); +// get median of vector + double get_median(vector v); +// read and write status + void read_status(); + void print_status(long int step); +// accept or reject + bool doAccept(double oldE, double newE, double kbt); +// vector of close residues + void get_close_residues(); +// do MonteCarlo for Bfactor + void doMonteCarloBfact(); +// do MonteCarlo for scale + void doMonteCarloScale(); +// calculate model parameters + vector get_Model_param(vector &atoms); +// read data file + void get_exp_data(string datafile); +// auxiliary methods + void prepare_gpu(); + void initialize_Bfactor(double reso); + void get_auxiliary_vectors(); + void push_auxiliary_gpu(); +// calculate overlap between two Gaussians + double get_overlap_der(const Vector &d_m, const Vector &m_m, + const Vector5d &pref, const Vector5d &invs2, + Vector &ov_der); + double get_overlap(const Vector &d_m, const Vector &m_m, + const Vector5d &cfact, const Vector5d &m_s, double bfact); +// update the neighbor list + void update_neighbor_list(); +// update the gpu + void update_gpu(); +// update the neighbor sphere + void update_neighbor_sphere(); + bool do_neighbor_sphere(); +// calculate forward model and score on cpu/gpu + void calculate_fmod(); + void calculate_fmod_cpu(); + void calculate_fmod_gpu(); + void calculate_score(); + void calculate_score_cpu(); + void calculate_score_gpu(); +// calculate correlation + void calculate_corr(); +// Marginal noise + double calculate_Marginal(double scale, double offset, vector &score_der); + +public: + static void registerKeywords( Keywords& keys ); + explicit EMMIVOX(const ActionOptions&); +// active methods: + void prepare(); + virtual void calculate(); +}; + +PLUMED_REGISTER_ACTION(EMMIVOX,"EMMIVOX") + +void EMMIVOX::registerKeywords( Keywords& keys ) { + Colvar::registerKeywords( keys ); + keys.add("atoms","ATOMS","atoms used in the calculation of the density map, typically all heavy atoms"); + keys.add("compulsory","DATA_FILE","file with cryo-EM map"); + keys.add("compulsory","RESOLUTION", "cryo-EM map resolution"); + keys.add("compulsory","NORM_DENSITY","integral of experimental density"); + keys.add("compulsory","WRITE_STRIDE","stride for writing status file"); + keys.add("optional","NL_DIST_CUTOFF","neighbor list distance cutoff"); + keys.add("optional","NL_GAUSS_CUTOFF","neighbor list Gaussian sigma cutoff"); + keys.add("optional","NL_STRIDE","neighbor list update frequency"); + keys.add("optional","SIGMA_MIN","minimum density error"); + keys.add("optional","DBFACT","Bfactor MC step"); + keys.add("optional","BFACT_MAX","Bfactor maximum value"); + keys.add("optional","MCBFACT_STRIDE", "Bfactor MC stride"); + keys.add("optional","BFACT_SIGMA","Bfactor sigma prior"); + keys.add("optional","BFACT_GROUP","sample Bfactors in groups"); + keys.add("optional","STATUS_FILE","write a file with all the data useful for restart"); + keys.add("optional","SCALE_MIN","minimum scale"); + keys.add("optional","SCALE_MAX","maximum scale"); + keys.add("optional","DSCALE","maximum scale MC move"); + keys.add("optional","SCALE","scale factor"); + keys.add("optional","OFFSET","offset"); + keys.add("optional","DOFFSET","maximum offset MC move"); + keys.add("optional","MCSCALE_STRIDE", "scale factor MC stride"); + keys.add("optional","TEMP","temperature"); + keys.add("optional","WRITE_MAP","file with model density"); + keys.add("optional","WRITE_MAP_STRIDE","stride for writing model density to file"); + keys.add("optional","ANNEAL","initial annealing in number of steps"); + keys.add("optional","ANNEAL_TEMP","initial temperature for annealing"); + keys.addFlag("NO_AVER",false,"no ensemble averaging in multi-replica mode"); + keys.addFlag("CORRELATION",false,"calculate correlation coefficient"); + keys.addFlag("GPU",false,"calculate EMMIVOX on GPU with Libtorch"); + keys.addFlag("BFACT_NOCHAIN",false,"Do not use chain ID for Bfactor MC"); + keys.addFlag("BFACT_READ",false,"Read Bfactor on RESTART (automatic with DBFACT>0)"); + keys.addFlag("BFACT_MINIMIZE",false,"Accept only moves that decrease energy"); + keys.addFlag("MARTINI",false,"Use Martini scattering factors"); + componentsAreNotOptional(keys); + keys.addOutputComponent("scoreb","default","Bayesian score"); + keys.addOutputComponent("scale", "default","scale factor"); + keys.addOutputComponent("offset","default","offset"); + keys.addOutputComponent("accS", "default","scale MC acceptance"); + keys.addOutputComponent("accB", "default", "Bfactor MC acceptance"); + keys.addOutputComponent("kbt", "default", "temperature in energy unit"); + keys.addOutputComponent("corr", "CORRELATION", "correlation coefficient"); +} + +EMMIVOX::EMMIVOX(const ActionOptions&ao): + PLUMED_COLVAR_INIT(ao), + nl_dist_cutoff_(1.0), nl_gauss_cutoff_(3.0), nl_stride_(50), + first_time_(true), no_aver_(false), do_corr_(false), + scale_(1.), dscale_(0.), offset_(0.), doffset_(0.), + MCSstride_(1), MCSaccept_(0.), MCStrials_(0.), + dbfact_(0.0), bfactmin_(0.05), bfactmax_(5.0), + bfactsig_(0.1), bfactnoc_(false), bfactread_(false), + MCBstride_(1), MCBaccept_(0.), MCBtrials_(0.), + bfactcount_(0), bfactgroup_(1), bfactemin_(false), + martini_(false), statusstride_(0), first_status_(true), + eps_(0.0001), mapstride_(0), gpu_(false), + anneal_(0), anneal_kbt_(0.) +{ + // set constants + inv_sqrt2_ = 1.0/sqrt(2.0); + sqrt2_pi_ = sqrt(2.0 / M_PI); + inv_pi2_ = 0.5 / M_PI / M_PI; + + // list of atoms + vector atoms; + parseAtomList("ATOMS", atoms); + + // file with experimental cryo-EM map + string datafile; + parse("DATA_FILE", datafile); + + // neighbor list cutoffs + parse("NL_DIST_CUTOFF",nl_dist_cutoff_); + parse("NL_GAUSS_CUTOFF",nl_gauss_cutoff_); + // checks + if(nl_dist_cutoff_<=0. && nl_gauss_cutoff_<=0.) error("You must specify either NL_DIST_CUTOFF or NL_GAUSS_CUTOFF or both"); + if(nl_gauss_cutoff_<=0.) nl_gauss_cutoff_ = 1.0e+10; + if(nl_dist_cutoff_<=0.) nl_dist_cutoff_ = 1.0e+10; + // neighbor list update stride + parse("NL_STRIDE",nl_stride_); + if(nl_stride_<=0) error("NL_STRIDE must be explicitly specified and positive"); + + // minimum value for error + double sigma_min = 0.2; + parse("SIGMA_MIN", sigma_min); + if(sigma_min<0.) error("SIGMA_MIN must be greater or equal to zero"); + + // status file parameters + parse("WRITE_STRIDE", statusstride_); + if(statusstride_<=0) error("you must specify a positive WRITE_STRIDE"); + parse("STATUS_FILE", statusfilename_); + if(statusfilename_=="") statusfilename_ = "EMMIStatus"+getLabel(); + + // integral of the experimetal density + double norm_d; + parse("NORM_DENSITY", norm_d); + + // temperature + double temp=0.0; + parse("TEMP",temp); + // convert temp to kbt + if(temp>0.0) kbt0_=plumed.getAtoms().getKBoltzmann()*temp; + else kbt0_=plumed.getAtoms().getKbT(); + // set initial temperature + kbt_ = kbt0_; + + // scale MC + parse("SCALE", scale_); + parse("DSCALE",dscale_); + parse("OFFSET",offset_); + parse("DOFFSET",doffset_); + // other parameters + if(dscale_>0.) { + parse("MCSCALE_STRIDE",MCSstride_); + parse("SCALE_MIN",scale_min_); + parse("SCALE_MAX",scale_max_); + // checks + if(MCSstride_<=0) error("you must specify a positive MCSCALE_STRIDE"); + if(scale_min_<=0.) error("SCALE_MIN must be strictly positive"); + if(scale_max_<=scale_min_) error("SCALE_MAX must be greater than SCALE_MIN"); + } + + // B-factors MC + parse("DBFACT",dbfact_); + // read Bfactors + parseFlag("BFACT_READ",bfactread_); + // do not use chains + parseFlag("BFACT_NOCHAIN",bfactnoc_); + // other parameters + if(dbfact_>0.) { + parse("MCBFACT_STRIDE",MCBstride_); + parse("BFACT_MAX",bfactmax_); + parse("BFACT_SIGMA",bfactsig_); + parse("BFACT_GROUP",bfactgroup_); + parseFlag("BFACT_MINIMIZE",bfactemin_); + // checks + if(MCBstride_<=0) error("you must specify a positive MCBFACT_STRIDE"); + if(bfactmax_<=bfactmin_) error("you must specify a positive BFACT_MAX"); + if(MCBstride_%nl_stride_!=0) error("MCBFACT_STRIDE must be multiple of NL_STRIDE"); + if(bfactsig_<=0.) error("you must specify a positive BFACT_SIGMA"); + } + + // read map resolution + double reso; + parse("RESOLUTION", reso); + if(reso<=0.) error("RESOLUTION must be strictly positive"); + + // averaging or not + parseFlag("NO_AVER",no_aver_); + + // calculate correlation coefficient + parseFlag("CORRELATION",do_corr_); + + // write density file + parse("WRITE_MAP_STRIDE", mapstride_); + parse("WRITE_MAP", mapfilename_); + if(mapstride_>0 && mapfilename_=="") error("With WRITE_MAP_STRIDE you must specify WRITE_MAP"); + + // use GPU? + parseFlag("GPU",gpu_); + // set device + if (gpu_ && torch::cuda::is_available()) { + device_t_ = torch::kCUDA; + } else { + device_t_ = torch::kCPU; + gpu_ = false; + } + + // annealing stuff + parse("ANNEAL", anneal_); + if(anneal_>0) { + // read initial temperature + parse("ANNEAL_TEMP", anneal_kbt_); + // convert to energy unit + anneal_kbt_ *= plumed.getAtoms().getKBoltzmann(); + // compare with kbt0_ + if(anneal_kbt_>kbt0_) error("ANNEAL_TEMP must be lower than TEMP"); + } + +// Martini model + parseFlag("MARTINI",martini_); + + // check read + checkRead(); + + // set parallel stuff + unsigned mpisize=comm.Get_size(); + if(mpisize>1) error("EMMIVOX supports only OpenMP parallelization"); + + // get number of replicas + if(no_aver_) { + nrep_ = 1; + } else { + nrep_ = multi_sim_comm.Get_size(); + } + replica_ = multi_sim_comm.Get_rank(); + + if(nrep_>1 && dbfact_>0) error("Bfactor sampling not supported with ensemble averaging"); + + log.printf(" number of atoms involved : %u\n", atoms.size()); + log.printf(" experimental density map : %s\n", datafile.c_str()); + if(no_aver_) log.printf(" without ensemble averaging\n"); + if(gpu_) log.printf(" running on GPU \n"); + if(nl_dist_cutoff_ <1.0e+10) log.printf(" neighbor list distance cutoff : %lf\n", nl_dist_cutoff_); + if(nl_gauss_cutoff_<1.0e+10) log.printf(" neighbor list Gaussian sigma cutoff : %lf\n", nl_gauss_cutoff_); + log.printf(" neighbor list update stride : %u\n", nl_stride_); + log.printf(" minimum density error : %f\n", sigma_min); + log.printf(" scale factor : %lf\n", scale_); + log.printf(" offset : %lf\n", offset_); + log.printf(" reading/writing to status file : %s\n", statusfilename_.c_str()); + log.printf(" with stride : %u\n", statusstride_); + if(dscale_>0.0) { + log.printf(" minimum scale : %lf\n", scale_min_); + log.printf(" maximum scale : %lf\n", scale_max_); + log.printf(" maximum scale MC move : %lf\n", dscale_); + log.printf(" maximum offset MC move : %lf\n", doffset_); + log.printf(" stride MC move : %u\n", MCSstride_); + } + if(dbfact_>0) { + log.printf(" maximum Bfactor MC move : %f\n", dbfact_); + log.printf(" stride MC move : %u\n", MCBstride_); + log.printf(" using prior with sigma : %f\n", bfactsig_); + } + if(bfactread_) log.printf(" reading Bfactors from file : %s\n", statusfilename_.c_str()); + log.printf(" temperature of the system in energy unit : %f\n", kbt_); + if(anneal_>0) { + log.printf(" doing annealing for initial number of steps : %u\n", anneal_); + log.printf(" initial annealing temperature in energy unit : %f\n", anneal_kbt_); + } + if(nrep_>1) { + log.printf(" number of replicas for averaging: %u\n", nrep_); + log.printf(" replica ID : %u\n", replica_); + } + if(mapstride_>0) { + log.printf(" writing model density to file : %s\n", mapfilename_.c_str()); + log.printf(" with stride : %u\n", mapstride_); + } + if(martini_) log.printf(" using Martini scattering factors\n"); + + // calculate model constant parameters + vector Model_w = get_Model_param(atoms); + + // read experimental map and errors + get_exp_data(datafile); + log.printf(" number of voxels : %u\n", static_cast(ovdd_.size())); + + // normalize atom weight map + double norm_m = accumulate(Model_w.begin(), Model_w.end(), 0.0); + // renormalization and constant factor on atom types + for(unsigned i=0; i0) {addComponent("accB"); componentIsNotPeriodic("accB");} + if(dscale_>0.0) {addComponent("accS"); componentIsNotPeriodic("accS");} + if(do_corr_) {addComponent("corr"); componentIsNotPeriodic("corr");} + + // initialize random seed + unsigned iseed = time(NULL)+replica_; + random_.setSeed(-iseed); + + // request atoms + requestAtoms(atoms); + + // print bibliography + log<<" Bibliography "<1)log< Map_m_gpu(3*nd); + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(int i=0; i v) +{ +// dimension of vector + unsigned size = v.size(); +// in case of only one entry + if (size==1) { + return v[0]; + } else { + // reorder vector + sort(v.begin(), v.end()); + // odd or even? + if (size%2==0) { + return (v[size/2-1]+v[size/2])/2.0; + } else { + return v[size/2]; + } + } +} + +void EMMIVOX::read_status() +{ + double MDtime; +// open file + IFile *ifile = new IFile(); + ifile->link(*this); + if(ifile->FileExist(statusfilename_)) { + ifile->open(statusfilename_); + while(ifile->scanField("MD_time", MDtime)) { + // read scale and offset + ifile->scanField("scale", scale_); + ifile->scanField("offset", offset_); + // read bfactors if doing fitting of reading it at restart + if(dbfact_>0 || bfactread_) { + // cycle on residues + for(unsigned ir=0; ir key = Model_rlist_[ir]; + // convert ires to string + std::string num; Tools::convert(key.first,num); + // read entry + std::string ch = key.second; + if(ch==" ") ch=""; + ifile->scanField("bf-"+num+":"+ch, Model_b_[key]); + } + } + // new line + ifile->scanField(); + } + ifile->close(); + } else { + error("Cannot find status file "+statusfilename_+"\n"); + } + delete ifile; +} + +void EMMIVOX::print_status(long int step) +{ +// if first time open the file + if(first_status_) { + first_status_ = false; + statusfile_.link(*this); + statusfile_.open(statusfilename_); + statusfile_.setHeavyFlush(); + statusfile_.fmtField("%10.7e "); + } +// write fields + double MDtime = static_cast(step)*getTimeStep(); + statusfile_.printField("MD_time", MDtime); + // write scale and offset + statusfile_.printField("scale", scale_); + statusfile_.printField("offset", offset_); + // write bfactors only if doing fitting or reading bfactors + if(dbfact_>0 || bfactread_) { + // cycle on residues + for(unsigned ir=0; ir key = Model_rlist_[ir]; + // bfactor from map + double bf = Model_b_[key]; + // convert ires to string + std::string num; Tools::convert(key.first,num); + // print entry + statusfile_.printField("bf-"+num+":"+key.second, bf); + } + } + statusfile_.printField(); +} + +bool EMMIVOX::doAccept(double oldE, double newE, double kbt) +{ + bool accept = false; + // calculate delta energy + double delta = ( newE - oldE ) / kbt; + // if delta is negative always accept move + if( delta < 0.0 ) { + accept = true; + } else { + // otherwise extract random number + double s = random_.RandU01(); + if( s < exp(-delta) ) { accept = true; } + } + return accept; +} + +vector EMMIVOX::get_Model_param(vector &atoms) +{ + // check if MOLINFO line is present + auto* moldat=plumed.getActionSet().selectLatest(this); + if(!moldat) error("MOLINFO DATA not found\n"); + log<<" MOLINFO DATA found with label " <getLabel()<<", using proper atom names\n"; + + // list of weights - one per atom + vector Model_w; + // 5-Gaussians parameters + // map of atom types to A and B coefficients of scattering factor + // f(s) = A * exp(-B*s**2) + // B is in Angstrom squared + // Elastic atomic scattering factors of electrons for neutral atoms + // and s up to 6.0 A^-1: as implemented in PLUMED + // map between an atom type and an index + map type_map; + // atomistic types + type_map["C"]=0; type_map["O"]=1; type_map["N"]=2; + type_map["S"]=3; type_map["P"]=4; type_map["F"]=5; + type_map["NA"]=6; type_map["MG"]=7; type_map["CL"]=8; + type_map["CA"]=9; type_map["K"]=10; type_map["ZN"]=11; + // Martini types + type_map["ALA_BB"]=12; type_map["ALA_SC1"]=13; type_map["CYS_BB"]=14; + type_map["CYS_SC1"]=15; type_map["ASP_BB"]=16; type_map["ASP_SC1"]=17; + type_map["GLU_BB"]=18; type_map["GLU_SC1"]=19; type_map["PHE_BB"]=20; + type_map["PHE_SC1"]=21; type_map["PHE_SC2"]=22; type_map["PHE_SC3"]=23; + type_map["GLY_BB"]=24; type_map["HIS_BB"]=25; type_map["HIS_SC1"]=26; + type_map["HIS_SC2"]=27; type_map["HIS_SC3"]=28; type_map["ILE_BB"]=29; + type_map["ILE_SC1"]=30; type_map["LYS_BB"]=31; type_map["LYS_SC1"]=32; + type_map["LYS_SC2"]=33; type_map["LEU_BB"]=34; type_map["LEU_SC1"]=35; + type_map["MET_BB"]=36; type_map["MET_SC1"]=37; type_map["ASN_BB"]=38; + type_map["ASN_SC1"]=39; type_map["PRO_BB"]=40; type_map["PRO_SC1"]=41; + type_map["GLN_BB"]=42; type_map["GLN_SC1"]=43; type_map["ARG_BB"]=44; + type_map["ARG_SC1"]=45; type_map["ARG_SC2"]=46; type_map["SER_BB"]=47; + type_map["SER_SC1"]=48; type_map["THR_BB"]=49; type_map["THR_SC1"]=50; + type_map["VAL_BB"]=51; type_map["VAL_SC1"]=52; type_map["TRP_BB"]=53; + type_map["TRP_SC1"]=54; type_map["TRP_SC2"]=55; type_map["TRP_SC3"]=56; + type_map["TRP_SC4"]=57; type_map["TRP_SC5"]=58; type_map["TYR_BB"]=59; + type_map["TYR_SC1"]=60; type_map["TYR_SC2"]=61; type_map["TYR_SC3"]=62; + type_map["TYR_SC4"]=63; + // fill in sigma vector for atoms + Model_s_.push_back(0.01*Vector5d(0.114,1.0825,5.4281,17.8811,51.1341)); // C + Model_s_.push_back(0.01*Vector5d(0.0652,0.6184,2.9449,9.6298,28.2194)); // O + Model_s_.push_back(0.01*Vector5d(0.0541,0.5165,2.8207,10.6297,34.3764)); // N + Model_s_.push_back(0.01*Vector5d(0.0838,0.7788,4.3462,15.5846,44.63655)); // S + Model_s_.push_back(0.01*Vector5d(0.0977,0.9084,4.9654,18.5471,54.3648)); // P + Model_s_.push_back(0.01*Vector5d(0.0613,0.5753,2.6858,8.8214,25.6668)); // F + Model_s_.push_back(0.01*Vector5d(0.1684,1.7150,8.8386,50.8265,147.2073)); // NA + Model_s_.push_back(0.01*Vector5d(0.1356,1.3579,6.9255,32.3165,92.1138)); // MG + Model_s_.push_back(0.01*Vector5d(0.0694,0.6443,3.5351,12.5058,35.8633)); // CL + Model_s_.push_back(0.01*Vector5d(0.1742,1.8329,8.8407,47.4583,134.9613)); // CA + Model_s_.push_back(0.01*Vector5d(0.1660,1.6906,8.7447,46.7825,165.6923)); // K + Model_s_.push_back(0.01*Vector5d(0.0876,0.8650,3.8612,18.8726,64.7016)); // ZN + // fill in sigma vector for Martini beads + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // ALA_BB + Model_s_.push_back(0.01*Vector5d(0.500000,0.500000,0.500000,0.500000,0.500000)); // ALA_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // CYS_BB + Model_s_.push_back(0.01*Vector5d(8.500000,8.500000,8.500000,8.500000,8.500000)); // CYS_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // ASP_BB + Model_s_.push_back(0.01*Vector5d(17.000000,17.000000,17.000000,17.000000,17.000000)); // ASP_SC1 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // GLU_BB + Model_s_.push_back(0.01*Vector5d(24.000000,24.000000,24.000000,24.000000,24.000000)); // GLU_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // PHE_BB + Model_s_.push_back(0.01*Vector5d(17.500000,17.500000,17.500000,17.500000,17.500000)); // PHE_SC1 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // PHE_SC2 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // PHE_SC3 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // GLY_BB + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // HIS_BB + Model_s_.push_back(0.01*Vector5d(11.500000,11.500000,11.500000,11.500000,11.500000)); // HIS_SC1 + Model_s_.push_back(0.01*Vector5d(9.000000,9.000000,9.000000,9.000000,9.000000)); // HIS_SC2 + Model_s_.push_back(0.01*Vector5d(8.500000,8.500000,8.500000,8.500000,8.500000)); // HIS_SC3 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // ILE_BB + Model_s_.push_back(0.01*Vector5d(25.500000,25.500000,25.500000,25.500000,25.500000)); // ILE_SC1 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // LYS_BB + Model_s_.push_back(0.01*Vector5d(18.000000,18.000000,18.000000,18.000000,18.000000)); // LYS_SC1 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // LYS_SC2 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // LEU_BB + Model_s_.push_back(0.01*Vector5d(21.500000,21.500000,21.500000,21.500000,21.500000)); // LEU_SC1 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // MET_BB + Model_s_.push_back(0.01*Vector5d(22.500000,22.500000,22.500000,22.500000,22.500000)); // MET_SC1 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // ASN_BB + Model_s_.push_back(0.01*Vector5d(18.500000,18.500000,18.500000,18.500000,18.500000)); // ASN_SC1 + Model_s_.push_back(0.01*Vector5d(23.500000,23.500000,23.500000,23.500000,23.500000)); // PRO_BB + Model_s_.push_back(0.01*Vector5d(17.500000,17.500000,17.500000,17.500000,17.500000)); // PRO_SC1 + Model_s_.push_back(0.01*Vector5d(22.000000,22.000000,22.000000,22.000000,22.000000)); // GLN_BB + Model_s_.push_back(0.01*Vector5d(24.500000,24.500000,24.500000,24.500000,24.500000)); // GLN_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // ARG_BB + Model_s_.push_back(0.01*Vector5d(18.000000,18.000000,18.000000,18.000000,18.000000)); // ARG_SC1 + Model_s_.push_back(0.01*Vector5d(18.000000,18.000000,18.000000,18.000000,18.000000)); // ARG_SC2 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // SER_BB + Model_s_.push_back(0.01*Vector5d(9.000000,9.000000,9.000000,9.000000,9.000000)); // SER_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // THR_BB + Model_s_.push_back(0.01*Vector5d(17.000000,17.000000,17.000000,17.000000,17.000000)); // THR_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // VAL_BB + Model_s_.push_back(0.01*Vector5d(18.000000,18.000000,18.000000,18.000000,18.000000)); // VAL_SC1 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // TRP_BB + Model_s_.push_back(0.01*Vector5d(11.500000,11.500000,11.500000,11.500000,11.500000)); // TRP_SC1 + Model_s_.push_back(0.01*Vector5d(9.000000,9.000000,9.000000,9.000000,9.000000)); // TRP_SC2 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // TRP_SC3 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // TRP_SC4 + Model_s_.push_back(0.01*Vector5d(9.500000,9.500000,9.500000,9.500000,9.500000)); // TRP_SC5 + Model_s_.push_back(0.01*Vector5d(23.000000,23.000000,23.000000,23.000000,23.000000)); // TYR_BB + Model_s_.push_back(0.01*Vector5d(12.000000,12.000000,12.000000,12.000000,12.000000)); // TYR_SC1 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // TYR_SC2 + Model_s_.push_back(0.01*Vector5d(11.000000,11.000000,11.000000,11.000000,11.000000)); // TYR_SC3 + Model_s_.push_back(0.01*Vector5d(8.500000,8.500000,8.500000,8.500000,8.500000)); // TYR_SC4 + // fill in weight vector for atoms + Model_w_.push_back(Vector5d(0.0489,0.2091,0.7537,1.1420,0.3555)); // C + Model_w_.push_back(Vector5d(0.0365,0.1729,0.5805,0.8814,0.3121)); // O + Model_w_.push_back(Vector5d(0.0267,0.1328,0.5301,1.1020,0.4215)); // N + Model_w_.push_back(Vector5d(0.0915,0.4312,1.0847,2.4671,1.0852)); // S + Model_w_.push_back(Vector5d(0.1005,0.4615,1.0663,2.5854,1.2725)); // P + Model_w_.push_back(Vector5d(0.0382,0.1822,0.5972,0.7707,0.2130)); // F + Model_w_.push_back(Vector5d(0.1260,0.6442,0.8893,1.8197,1.2988)); // NA + Model_w_.push_back(Vector5d(0.1130,0.5575,0.9046,2.1580,1.4735)); // MG + Model_w_.push_back(Vector5d(0.0799,0.3891,1.0037,2.3332,1.0507)); // CL + Model_w_.push_back(Vector5d(0.2355,0.9916,2.3959,3.7252,2.5647)); // CA + Model_w_.push_back(Vector5d(0.2149,0.8703,2.4999,2.3591,3.0318)); // K + Model_w_.push_back(Vector5d(0.1780,0.8096,1.6744,1.9499,1.4495)); // ZN + // fill in weight vector for Martini beads + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // ALA_BB + Model_w_.push_back(Vector5d(0.100000,0.100000,0.100000,0.100000,0.100000)); // ALA_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // CYS_BB + Model_w_.push_back(Vector5d(1.100000,1.100000,1.100000,1.100000,1.100000)); // CYS_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // ASP_BB + Model_w_.push_back(Vector5d(1.700000,1.700000,1.700000,1.700000,1.700000)); // ASP_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // GLU_BB + Model_w_.push_back(Vector5d(2.300000,2.300000,2.300000,2.300000,2.300000)); // GLU_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // PHE_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // PHE_SC1 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // PHE_SC2 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // PHE_SC3 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // GLY_BB + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // HIS_BB + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // HIS_SC1 + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // HIS_SC2 + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // HIS_SC3 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // ILE_BB + Model_w_.push_back(Vector5d(2.000000,2.000000,2.000000,2.000000,2.000000)); // ILE_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // LYS_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // LYS_SC1 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // LYS_SC2 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // LEU_BB + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // LEU_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // MET_BB + Model_w_.push_back(Vector5d(2.300000,2.300000,2.300000,2.300000,2.300000)); // MET_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // ASN_BB + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // ASN_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // PRO_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // PRO_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // GLN_BB + Model_w_.push_back(Vector5d(2.300000,2.300000,2.300000,2.300000,2.300000)); // GLN_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // ARG_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // ARG_SC1 + Model_w_.push_back(Vector5d(1.800000,1.800000,1.800000,1.800000,1.800000)); // ARG_SC2 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // SER_BB + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // SER_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // THR_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // THR_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // VAL_BB + Model_w_.push_back(Vector5d(1.400000,1.400000,1.400000,1.400000,1.400000)); // VAL_SC1 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // TRP_BB + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TRP_SC1 + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // TRP_SC2 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TRP_SC3 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TRP_SC4 + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // TRP_SC5 + Model_w_.push_back(Vector5d(1.900000,1.900000,1.900000,1.900000,1.900000)); // TYR_BB + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TYR_SC1 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TYR_SC2 + Model_w_.push_back(Vector5d(0.900000,0.900000,0.900000,0.900000,0.900000)); // TYR_SC3 + Model_w_.push_back(Vector5d(0.800000,0.800000,0.800000,0.800000,0.800000)); // TYR_SC4 + // cycle on atoms + for(unsigned i=0; igetAtomName(atoms[i]); + // get residue name + string resname = moldat->getResidueName(atoms[i]); + // type of atoms/bead + std::string type_s; + // Martini model + if(martini_) { + type_s = resname+"_"+name; + // Atomistic model + } else { + char type; + // get atom type + char first = name.at(0); + // GOLDEN RULE: type is first letter, if not a number + if (!isdigit(first)) { + type = first; + // otherwise is the second + } else { + type = name.at(1); + } + // convert to string + type_s = std::string(1,type); + // special cases + if(name=="SOD" || name=="NA" || name =="Na") type_s = "NA"; + if(name=="MG" || name=="Mg") type_s = "MG"; + if(name=="CLA" || name=="CL" || name =="Cl") type_s = "CL"; + if((resname=="CAL" || resname=="CA") && (name=="CAL" || name=="CA" || name =="C0")) type_s = "CA"; + if(name=="POT" || name=="K") type_s = "K"; + if(name=="ZN" || name=="Zn") type_s = "ZN"; + } + // check if key in map + if(type_map.find(type_s) != type_map.end()) { + // save atom type + Model_type_.push_back(type_map[type_s]); + // this will be normalized in the final density + Vector5d w = Model_w_[type_map[type_s]]; + Model_w.push_back(w[0]+w[1]+w[2]+w[3]+w[4]); + // get residue id + unsigned ires = moldat->getResidueNumber(atoms[i]); + // and chain + string c ("*"); + if(!bfactnoc_) c = moldat->getChainID(atoms[i]); + // define pair residue/chain IDs + pair key = make_pair(ires,c); + // add to map between residue/chain and list of atoms + Model_resmap_[key].push_back(i); + // and global list of residue/chain per atom + Model_res_.push_back(key); + // initialize Bfactor map + Model_b_[key] = 0.0; + } else { + error("Wrong atom type "+type_s+" from atom name "+name+"\n"); + } + } + // create ordered vector of residue-chain IDs + for(unsigned i=0; i key = Model_res_[i]; + // search in Model_rlist_ + if(find(Model_rlist_.begin(), Model_rlist_.end(), key) == Model_rlist_.end()) + Model_rlist_.push_back(key); + } + // return weights + return Model_w; +} + +// read experimental data file in PLUMED format: +void EMMIVOX::get_exp_data(string datafile) +{ + Vector pos; + double dens, err; + int idcomp; + +// open file + IFile *ifile = new IFile(); + if(ifile->FileExist(datafile)) { + ifile->open(datafile); + while(ifile->scanField("Id",idcomp)) { + ifile->scanField("Pos_0",pos[0]); + ifile->scanField("Pos_1",pos[1]); + ifile->scanField("Pos_2",pos[2]); + ifile->scanField("Density",dens); + ifile->scanField("Error",err); + // voxel center + Map_m_.push_back(pos); + // experimental density + ovdd_.push_back(dens); + // error + exp_err_.push_back(err); + // new line + ifile->scanField(); + } + ifile->close(); + } else { + error("Cannot find DATA_FILE "+datafile+"\n"); + } + delete ifile; +} + +void EMMIVOX::initialize_Bfactor(double reso) +{ + double bfactini = 0.0; + // if doing Bfactor Monte Carlo + if(dbfact_>0) { + // initialize B factor based on empirical relation between resolution and average bfactor + // calculated on ~8000 cryo-EM data with resolution < 5 Ang + // Bfact = A*reso**2+B; with A=6.95408 B=-2.45697/100.0 nm^2 + bfactini = 6.95408*reso*reso - 0.01*2.45697; + // check for min and max + bfactini = min(bfactmax_, max(bfactmin_, bfactini)); + } + // set initial Bfactor + for(map< pair, double>::iterator it=Model_b_.begin(); it!=Model_b_.end(); ++it) { + it->second = bfactini; + } + log.printf(" experimental map resolution : %3.2f\n", reso); + // if doing Bfactor Monte Carlo + if(dbfact_>0) { + log.printf(" minimum Bfactor value : %3.2f\n", bfactmin_); + log.printf(" maximum Bfactor value : %3.2f\n", bfactmax_); + log.printf(" initial Bfactor value : %3.2f\n", bfactini); + } +} + +// prepare auxiliary vectors +void EMMIVOX::get_auxiliary_vectors() +{ +// number of atoms + unsigned natoms = Model_res_.size(); +// clear lists + pref_.clear(); invs2_.clear(); cut_.clear(); +// resize + pref_.resize(natoms); invs2_.resize(natoms); cut_.resize(natoms); +// cycle on all atoms + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(unsigned im=0; im key = Model_res_[im]; + // get bfactor + double bfact = Model_b_[key]; + // sigma for 5 gaussians + Vector5d m_s = Model_s_[atype]; + // calculate constant quantities + Vector5d pref, invs2; + // calculate cutoff + double n = 0.0; + double d = 0.0; + for(unsigned j=0; j<5; ++j) { + // total value of b + double m_b = m_s[j] + bfact/4.0; + // calculate invs2 + invs2[j] = 1.0/(inv_pi2_*m_b); + // prefactor + pref[j] = cfact_[atype][j] * pow(invs2[j],1.5); + // cutoff + n += pref[j] / invs2[j]; + d += pref[j]; + } + // put into global lists + pref_[im] = pref; + invs2_[im] = invs2; + cut_[im] = min(nl_dist_cutoff_, sqrt(n/d)*nl_gauss_cutoff_); + } + // push to GPU + if(gpu_) push_auxiliary_gpu(); +} + +void EMMIVOX::push_auxiliary_gpu() +{ + // 1) create vector of pref_ and invs2_ + int natoms = Model_type_.size(); + vector pref(5*natoms), invs2(5*natoms); + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(int i=0; i > nl_res_l(Model_rlist_.size()); + // cycle on residues/chains #1 + #pragma omp for + for(unsigned i=0; i key1 = Model_rlist_[i]; + + // cycle over residues/chains #2 + for(unsigned j=i+1; j key2 = Model_rlist_[j]; + + // set flag neighbor + bool neigh = false; + + // cycle over all the atoms belonging to key1 + for(unsigned im1=0; im1 key = Model_rlist_[ir]; + // old bfactor + double bfactold = Model_b_[key]; + + // propose move in bfactor + double bfactnew = bfactold + dbfact_ * ( 2.0 * random_.RandU01() - 1.0 ); + // check boundaries + if(bfactnew > bfactmax_) {bfactnew = 2.0*bfactmax_ - bfactnew;} + if(bfactnew < bfactmin_) {bfactnew = 2.0*bfactmin_ - bfactnew;} + + // useful quantities + map deltaov; + + #pragma omp parallel num_threads(OpenMP::getNumThreads()) + { + // private variables + map deltaov_l; + #pragma omp for + // cycle over all the atoms belonging to key (residue/chain) + for(unsigned ia=0; ia::iterator itov=deltaov_l.begin(); itov!=deltaov_l.end(); ++itov) + deltaov[itov->first] += itov->second; + } + } + + // now calculate new and old score + double old_ene = 0.0; + double new_ene = 0.0; + + // cycle on all affected voxels + for(map::iterator itov=deltaov.begin(); itov!=deltaov.end(); ++itov) { + // id of the component + unsigned id = itov->first; + // new value + double ovmdnew = ovmd_[id]+itov->second; + // deviations + double devold = scale_ * ovmd_[id] + offset_ - ovdd_[id]; + double devnew = scale_ * ovmdnew + offset_ - ovdd_[id]; + // inverse of sigma_min + double ismin = ismin_[id]; + // scores + if(devold==0.0) { + old_ene += -kbt_ * std::log( 0.5 * sqrt2_pi_ * ismin ); + } else { + old_ene += -kbt_ * std::log( 0.5 / devold * erf ( devold * inv_sqrt2_ * ismin )); + } + if(devnew==0.0) { + new_ene += -kbt_ * std::log( 0.5 * sqrt2_pi_ * ismin ); + } else { + new_ene += -kbt_ * std::log( 0.5 / devnew * erf ( devnew * inv_sqrt2_ * ismin )); + } + } + + // list of neighboring residues + vector close = nl_res_[ir]; + // add restraint to keep Bfactors of close residues close + for(unsigned i=0; i keyn = Model_rlist_[close[i]]; + // deviations + double devold = bfactold - Model_b_[keyn]; + double devnew = bfactnew - Model_b_[keyn]; + // inverse of sigma_min + double ismin = 1.0 / bfactsig_; + // scores + if(devold==0.0) { + old_ene += -kbt_ * std::log( 0.5 * sqrt2_pi_ * ismin ); + } else { + old_ene += -kbt_ * std::log( 0.5 / devold * erf ( devold * inv_sqrt2_ * ismin )); + } + if(devnew==0.0) { + new_ene += -kbt_ * std::log( 0.5 * sqrt2_pi_ * ismin ); + } else { + new_ene += -kbt_ * std::log( 0.5 / devnew * erf ( devnew * inv_sqrt2_ * ismin )); + } + } + + // increment number of trials + MCBtrials_ += 1.0; + + // accept or reject + bool accept = false; + if(bfactemin_) { + if(new_ene < old_ene) accept = true; + } else { + accept = doAccept(old_ene, new_ene, kbt_); + } + + // in case of acceptance + if(accept) { + // update acceptance rate + MCBaccept_ += 1.0; + // update bfactor + Model_b_[key] = bfactnew; + // change all the ovmd_ affected + for(map::iterator itov=deltaov.begin(); itov!=deltaov.end(); ++itov) + ovmd_[itov->first] += itov->second; + } + + } // end cycle on bfactors + +// update offset sampling + bfactcount_ += 1; +// reset if needed + if(bfactcount_==bfactgroup_) bfactcount_ = 0; +// update auxiliary lists (to update pref_gpu_, invs2_gpu_, and cut_ on CPU/GPU) + get_auxiliary_vectors(); +// update neighbor list (new cut_ + update pref_nl_gpu_ and invs2_nl_gpu_ on GPU) + update_neighbor_list(); +// recalculate fmod (to update derivatives) + calculate_fmod(); +} + +void EMMIVOX::doMonteCarloScale() +{ + // store old energy, scale, and offset + double old_ene = ene_; + double old_scale = scale_; + double old_offset = offset_; + + // propose move in scale + double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 ); + double new_scale = scale_ + ds; + // check boundaries + if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;} + if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;} + // propose move in offset + double doff = doffset_ * ( 2.0 * random_.RandU01() - 1.0 ); + double new_off = offset_ + doff; + + // communicate new_scale and new_off to other replicas + if(!no_aver_ && nrep_>1) { + if(replica_!=0) {new_scale = 0.0; new_off = 0.0;} + multi_sim_comm.Sum(&new_scale, 1); + multi_sim_comm.Sum(&new_off, 1); + } + + // set new scale and offset + scale_ = new_scale; + offset_ = new_off; + // calculate score + calculate_score(); + + // accept or reject + bool accept = doAccept(old_ene, ene_, kbt_); + + // increment number of trials + MCStrials_ += 1.0; + + // communicate decision + int do_update = 0; + if(accept) do_update = 1; + if(!no_aver_ && nrep_>1) { + if(replica_!=0) do_update = 0; + multi_sim_comm.Sum(&do_update, 1); + } + + // in case of acceptance + if(do_update==1) { + MCSaccept_ += 1.0; + } else { + // go back to old stuff + scale_ = old_scale; + offset_ = old_offset; + // recalculate energy and derivatives + calculate_score(); + } +} + +// get overlap and derivatives +double EMMIVOX::get_overlap_der(const Vector &d_m, const Vector &m_m, + const Vector5d &pref, const Vector5d &invs2, + Vector &ov_der) +{ + // initialize stuff + double ov_tot = 0.0; + // derivative accumulator + double ov_der_p = 0.0; + // calculate vector difference + Vector md = delta(m_m, d_m); + // norm squared + double md2 = md[0]*md[0]+md[1]*md[1]+md[2]*md[2]; + // cycle on 5 Gaussians + for(unsigned j=0; j<5; ++j) { + // calculate exponent + double ov = pref[j] * std::exp(-0.5 * md2 * invs2[j]); + // update derivative prefix + ov_der_p += ov * invs2[j]; + // increase total overlap + ov_tot += ov; + } + // set derivative + ov_der = ov_der_p * md; + // return total overlap + return ov_tot; +} + +// get overlap +double EMMIVOX::get_overlap(const Vector &d_m, const Vector &m_m, + const Vector5d &cfact, const Vector5d &m_s, double bfact) +{ + // calculate vector difference + Vector md = delta(m_m, d_m); + // norm squared + double md2 = md[0]*md[0]+md[1]*md[1]+md[2]*md[2]; + // cycle on 5 Gaussians + double ov_tot = 0.0; + for(unsigned j=0; j<5; ++j) { + // total value of b + double m_b = m_s[j]+bfact/4.0; + // calculate invs2 + double invs2 = 1.0/(inv_pi2_*m_b); + // final calculation + ov_tot += cfact[j] * pow(invs2, 1.5) * std::exp(-0.5 * md2 * invs2); + } + return ov_tot; +} + +void EMMIVOX::update_neighbor_sphere() +{ + // number of atoms + unsigned natoms = Model_type_.size(); + // clear neighbor sphere + ns_.clear(); + // store reference positions + refpos_ = getPositions(); + + // cycle on voxels - in parallel + #pragma omp parallel num_threads(OpenMP::getNumThreads()) + { + // private variables + vector< pair > ns_l; + #pragma omp for + for(unsigned id=0; id dist(getPositions().size()); + bool update = false; + +// calculate displacement + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(unsigned im=0; im=1.0) update=true; + +// return if update or not + return update; +} + +void EMMIVOX::update_neighbor_list() +{ + // number of atoms + unsigned natoms = Model_type_.size(); + // clear neighbor list + nl_.clear(); + + // cycle on neighbour sphere - in parallel + #pragma omp parallel num_threads(OpenMP::getNumThreads()) + { + // private variables + vector< pair > nl_l; + #pragma omp for + for(unsigned long long i=0; i0 && getStep()%MCBstride_==0) { + // clear vectors + Model_nb_.clear(); Model_nb_.resize(natoms); + // cycle over the neighbor list to creat a list of voxels per atom + #pragma omp parallel num_threads(OpenMP::getNumThreads()) + { + // private variables + vector< vector > Model_nb_l(natoms); + #pragma omp for + for(unsigned long long i=0; i nl_id(nl_size), nl_im(nl_size); + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(unsigned long long i=0; i(nl_[i].first); + nl_im[i] = static_cast(nl_[i].second); + } + // create tensors on device + nl_id_gpu_ = torch::from_blob(nl_id.data(), {nl_size}, torch::kInt32).clone().to(device_t_); + nl_im_gpu_ = torch::from_blob(nl_im.data(), {nl_size}, torch::kInt32).clone().to(device_t_); + // now we need to create pref_nl_gpu_ [5,nl_size] + pref_nl_gpu_ = torch::index_select(pref_gpu_,1,nl_im_gpu_); + // and invs2_nl_gpu_ [5,nl_size] + invs2_nl_gpu_ = torch::index_select(invs2_gpu_,1,nl_im_gpu_); + // and Map_m_nl_gpu_ [3,nl_size] + Map_m_nl_gpu_ = torch::index_select(Map_m_gpu_,1,nl_id_gpu_); +} + +void EMMIVOX::prepare() +{ + if(getExchangeStep()) first_time_=true; +} + +// calculate forward model +void EMMIVOX::calculate_fmod() +{ + if(gpu_) calculate_fmod_gpu(); + else calculate_fmod_cpu(); +} + +// calculate forward model on cpu +void EMMIVOX::calculate_fmod_cpu() +{ + // number of data points + unsigned nd = ovdd_.size(); + + // clear density vector + for(unsigned i=0; i ovmd_l(nd, 0.0); + #pragma omp for + for(unsigned long long i=0; i1) { + // sum across replicas + multi_sim_comm.Sum(&ovmd_[0], nd); + // and divide by number of replicas + double escale = 1.0 / static_cast(nrep_); + for(unsigned i=0; i posg(3*natoms); + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for (int i=0; i1) { + // communicate ovmd_gpu_ to CPU [1, nd] + torch::Tensor ovmd_cpu = ovmd_gpu_.detach().to(torch::kCPU).to(torch::kFloat64); + // and put them in ovmd_ + ovmd_ = vector(ovmd_cpu.data_ptr(), ovmd_cpu.data_ptr() + ovmd_cpu.numel()); + // sum across replicas + multi_sim_comm.Sum(&ovmd_[0], nd); + // and divide by number of replicas + double escale = 1.0 / static_cast(nrep_); + for(int i=0; i0 && step%mapstride_==0) do_comm = true; + if(dscale_>0 && step%MCSstride_==0) do_comm = true; + if(dbfact_>0 && step%MCBstride_==0) do_comm = true; + if(do_corr_) do_comm = true; + // in case of metainference: already communicated + if(!no_aver_ && nrep_>1) do_comm = false; + if(do_comm) { + // communicate ovmd_gpu_ to CPU [1, nd] + torch::Tensor ovmd_cpu = ovmd_gpu_.detach().to(torch::kCPU).to(torch::kFloat64); + // and put them in ovmd_ + ovmd_ = vector(ovmd_cpu.data_ptr(), ovmd_cpu.data_ptr() + ovmd_cpu.numel()); + } +} + +// calculate score +void EMMIVOX::calculate_score() +{ + if(gpu_) calculate_score_gpu(); + else calculate_score_cpu(); +} + +// calculate score on cpu +void EMMIVOX::calculate_score_cpu() +{ + // number of atoms + unsigned natoms = Model_type_.size(); + + // calculate total energy and get derivatives + ene_ = calculate_Marginal(scale_, offset_, score_der_); + + // with marginal, simply multiply by number of replicas! + if(!no_aver_ && nrep_>1) ene_ *= static_cast(nrep_); + + // clear atom derivatives + for(unsigned i=0; i atom_der_l(natoms, Vector(0,0,0)); + #pragma omp for reduction (sumTensor : virial_) + for(unsigned long long i=0; i(); + // with marginal, simply multiply by number of replicas! + if(!no_aver_ && nrep_>1) ene_ *= static_cast(nrep_); + // + // 2) communicate derivatives to CPU + torch::Tensor atom_der_cpu = atoms_der_gpu.detach().to(torch::kCPU).to(torch::kFloat64); + // convert to vector + vector atom_der = vector(atom_der_cpu.data_ptr(), atom_der_cpu.data_ptr() + atom_der_cpu.numel()); + // and put in atom_der_ + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) + for(int i=0; i0 && step(anneal_) * static_cast(step); + } else { + // reset to input TEMP + kbt_ = kbt0_; + } + // set value + getPntrToComponent("kbt")->set(kbt_); + + // neighbor list update + if(first_time_ || getExchangeStep() || step%nl_stride_==0) { + // check if time to update neighbor sphere + bool update = false; + if(first_time_ || getExchangeStep()) update = true; + else update = do_neighbor_sphere(); + // update neighbor sphere + if(update) update_neighbor_sphere(); + // update neighbor list + update_neighbor_list(); + // set flag + first_time_=false; + } + + // calculate forward model + calculate_fmod(); + + // Monte Carlo on bfactors + if(dbfact_>0) { + double acc = 0.0; + // do Monte Carlo + if(step%MCBstride_==0 && !getExchangeStep() && step>0) doMonteCarloBfact(); + // calculate acceptance ratio + if(MCBtrials_>0) acc = MCBaccept_ / MCBtrials_; + // set value + getPntrToComponent("accB")->set(acc); + } + + // calculate score + calculate_score(); + + // Monte Carlo on scale + if(dscale_>0) { + double acc = 0.0; + // do Monte Carlo + if(step%MCSstride_==0 && !getExchangeStep() && step>0) doMonteCarloScale(); + // calculate acceptance ratio + if(MCStrials_>0) acc = MCSaccept_ / MCStrials_; + // set acceptance value + getPntrToComponent("accS")->set(acc); + } + + // set score, virial, and derivatives + Value* score = getPntrToComponent("scoreb"); + score->set(ene_); + setBoxDerivatives(score, virial_); + #pragma omp parallel for + for(unsigned i=0; iset(scale_); + getPntrToComponent("offset")->set(offset_); + // calculate correlation coefficient + if(do_corr_) calculate_corr(); + // PRINT other quantities to files + // - status file + if(step%statusstride_==0) print_status(step); + // - density file + if(mapstride_>0 && step%mapstride_==0) write_model_density(step); +} + +void EMMIVOX::calculate_corr() +{ +// number of data points + double nd = static_cast(ovdd_.size()); +// average ovmd_ and ovdd_ + double ave_md = std::accumulate(ovmd_.begin(), ovmd_.end(), 0.) / nd; + double ave_dd = std::accumulate(ovdd_.begin(), ovdd_.end(), 0.) / nd; +// calculate correlation + double num = 0.; + double den1 = 0.; + double den2 = 0.; + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) reduction( + : num, den1, den2) + for(unsigned i=0; iset(cc); +} + +double EMMIVOX::calculate_Marginal(double scale, double offset, vector &score_der) +{ + double ene = 0.0; + // cycle on all the voxels + #pragma omp parallel for num_threads(OpenMP::getNumThreads()) reduction( + : ene) + for(unsigned id=0; id