diff --git a/src/KOKKOS/pair_pod_kokkos.cpp b/src/KOKKOS/pair_pod_kokkos.cpp new file mode 100644 index 00000000000..31962f6a2ff --- /dev/null +++ b/src/KOKKOS/pair_pod_kokkos.cpp @@ -0,0 +1,1785 @@ +// clang-format off +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + aE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "eapod.h" +#include "pair_pod_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neighbor_kokkos.h" +#include "neigh_request.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace MathConst; +using MathSpecial::powint; + +enum{FS,FS_SHIFTEDSCALED}; + +/* ---------------------------------------------------------------------- */ + +template +PairPODKokkos::PairPODKokkos(LAMMPS *lmp) : PairPOD(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + ni = 0; + nimax = 0; + nij = 0; + nijmax = 0; + atomBlockSize = 2048; + nAtomBlocks = 0; + timing = 1; + for (int i=0; i<100; i++) comptime[i] = 0; + + host_flag = (execution_space == Host); +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +template +PairPODKokkos::~PairPODKokkos() +{ + if (timing == 1) { + printf("\n begin timing \n"); + for (int i=0; i<10; i++) printf("%g ", comptime[i]); + printf("\n"); + for (int i=10; i<20; i++) printf("%g ", comptime[i]); + printf("\n"); + for (int i=20; i<30; i++) printf("%g ", comptime[i]); + printf("\n end timing \n"); + } + + if (copymode) return; + + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairPODKokkos::init_style() +{ + if (host_flag) { + if (lmp->kokkos->nthreads > 1) + error->all(FLERR,"Pair style pod/kk can currently only run on a single " + "CPU thread"); + + PairPOD::init_style(); + return; + } + + if (atom->tag_enable == 0) error->all(FLERR, "Pair style POD requires atom IDs"); + if (force->newton_pair == 0) error->all(FLERR, "Pair style POD requires newton pair on"); + + // neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + + auto request = neighbor->add_request(this, NeighConst::REQ_FULL); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); + if (neighflag == FULL) + error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairPODKokkos::init_one(int i, int j) +{ + double cutone = PairPOD::init_one(i,j); + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairPODKokkos::coeff(int narg, char **arg) +{ + if (narg < 7) utils::missing_cmd_args(FLERR, "pair_coeff", error); + + PairPOD::coeff(narg,arg); // create a PairPOD object + + copy_from_pod_class(PairPOD::fastpodptr); // copy parameters and arrays from pod class + + int n = atom->ntypes + 1; + MemKK::realloc_kokkos(d_map, "pair_pod:map", n); + + MemKK::realloc_kokkos(k_cutsq, "pair_pod:cutsq", n, n); + d_cutsq = k_cutsq.template view(); + + MemKK::realloc_kokkos(k_scale, "pair_pod:scale", n, n); + d_scale = k_scale.template view(); + + // Set up element lists + + auto h_map = Kokkos::create_mirror_view(d_map); + + for (int i = 1; i <= atom->ntypes; i++) + h_map(i) = map[i]; + + Kokkos::deep_copy(d_map,h_map); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPODKokkos::allocate() +{ + PairPOD::allocate(); +} + +template +struct FindMaxNumNeighs { + typedef DeviceType device_type; + NeighListKokkos k_list; + + FindMaxNumNeighs(NeighListKokkos* nl): k_list(*nl) {} + ~FindMaxNumNeighs() {k_list.copymode = 1;} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii, int& max_neighs) const { + const int i = k_list.d_ilist[ii]; + const int num_neighs = k_list.d_numneigh[i]; + if (max_neighs +void PairPODKokkos::compute(int eflag_in, int vflag_in) +{ +// if (host_flag) { +// atomKK->sync(Host,X_MASK|TYPE_MASK); +// PairPOD::compute(eflag_in,vflag_in); +// atomKK->modified(Host,F_MASK); +// return; +// } + + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + copymode = 1; + int newton_pair = force->newton_pair; + if (newton_pair == false) + error->all(FLERR,"PairPODKokkos requires 'newton on'"); + + atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + //k_cutsq.template sync(); + + maxneigh = 0; + if (host_flag) { + inum = list->inum; + d_numneigh = typename ArrayTypes::t_int_1d("pair_pod:numneigh",inum); + for (int i=0; inumneigh[i]; + d_ilist = typename ArrayTypes::t_int_1d("pair_pod:ilist",inum); + for (int i=0; iilist[i]; + + int maxn = 0; + for (int i=0; inumneigh[i]) maxn = list->numneigh[i]; + MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",inum,maxn); + for (int i=0; iilist[i]; + int m = list->numneigh[gi]; + if (maxneighfirstneigh[gi][l]; + } + } + } + else { + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + inum = list->inum; + int maxneighs; + Kokkos::parallel_reduce("PairPODKokkos::find_max_neighs",inum, FindMaxNumNeighs(k_list), Kokkos::Max(maxneighs)); + maxneigh = maxneighs; + } + + auto begin = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); + + // determine the number of atom blocks and divide atoms into blocks + nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize); + if (nAtomBlocks > 100) nAtomBlocks = 100; + divideInterval(atomBlocks, inum, nAtomBlocks); + + int nmax = 0; + for (int block=0; block(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + // obtain the neighbors within rcut + NeighborList(rij, numij, typeai, idxi, ai, aj, ti, tj, rcutsq, gi1, ni); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[2] += std::chrono::duration_cast(end-begin).count()/1e6; + + // compute atomic energy and force for the current atom block + begin = std::chrono::high_resolution_clock::now(); + blockatomenergyforce(ni, nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[0] += std::chrono::duration_cast(end-begin).count()/1e6; + + // tally atomic energy to global energy + tallyenergy(gi1, ni); + + // tally atomic force to global force + tallyforce(nij); + + // tally atomic stress + if (vflag) { + tallystress(nij); + } + //savedatafordebugging(); + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + atomKK->modified(execution_space,F_MASK); + + copymode = 0; +} + +template +void PairPODKokkos::copy_from_pod_class(EAPOD *podptr) +{ + nelements = podptr->nelements; // number of elements + onebody = podptr->onebody; // one-body descriptors + besseldegree = podptr->besseldegree; // degree of Bessel functions + inversedegree = podptr->inversedegree; // degree of inverse functions + nbesselpars = podptr->nbesselpars; // number of Bessel parameters + nCoeffPerElement = podptr->nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) + ns = podptr->ns; // number of snapshots for radial basis functions + nl1 = podptr->nl1; // number of one-body descriptors + nl2 = podptr->nl2; // number of two-body descriptors + nl3 = podptr->nl3; // number of three-body descriptors + nl4 = podptr->nl4; // number of four-body descriptors + nl23 = podptr->nl23; // number of two-body x three-body descriptors + nl33 = podptr->nl33; // number of three-body x three-body descriptors + nl34 = podptr->nl34; // number of three-body x four-body descriptors + nl44 = podptr->nl44; // number of four-body x four-body descriptors + n23 = podptr->n23; + n32 = podptr->n32; + nl = podptr->nl; // number of local descriptors + nrbf2 = podptr->nrbf2; + nrbf3 = podptr->nrbf3; + nrbf4 = podptr->nrbf4; + nrbfmax = podptr->nrbfmax; // number of radial basis functions + nabf3 = podptr->nabf3; // number of three-body angular basis functions + nabf4 = podptr->nabf4; // number of four-body angular basis functions + K3 = podptr->K3; // number of three-body monomials + K4 = podptr->K4; // number of four-body monomials + Q4 = podptr->Q4; // number of four-body monomial coefficients + nClusters = podptr->nClusters; // number of environment clusters + nComponents = podptr->nComponents; // number of principal components + Mdesc = podptr->Mdesc; // number of base descriptors + + rin = podptr->rin; + rcut = podptr->rcut; + rmax = rcut - rin; + + MemKK::realloc_kokkos(besselparams, "pair_pod:besselparams", 3); + auto h_besselparams = Kokkos::create_mirror_view(besselparams); + h_besselparams[0] = podptr->besselparams[0]; + h_besselparams[1] = podptr->besselparams[1]; + h_besselparams[2] = podptr->besselparams[2]; + Kokkos::deep_copy(besselparams, h_besselparams); + + MemKK::realloc_kokkos(elemindex, "pair_pod:elemindex", nelements*nelements); + auto h_elemindex = Kokkos::create_mirror_view(elemindex); + for (int i=0; ielemindex[i]; + Kokkos::deep_copy(elemindex, h_elemindex); + + MemKK::realloc_kokkos(Phi, "pair_pod:Phi", ns*ns); + auto h_Phi = Kokkos::create_mirror_view(Phi); + for (int i=0; iPhi[i]; + Kokkos::deep_copy(Phi, h_Phi); + + MemKK::realloc_kokkos(coefficients, "pair_pod:coefficients", nCoeffPerElement * nelements); + auto h_coefficients = Kokkos::create_mirror_view(coefficients); + for (int i=0; icoeff[i]; + Kokkos::deep_copy(coefficients, h_coefficients); + + if (nClusters > 1) { + MemKK::realloc_kokkos(Proj, "pair_pod:Proj", Mdesc * nComponents * nelements); + auto h_Proj = Kokkos::create_mirror_view(Proj); + for (int i=0; iProj[i]; + Kokkos::deep_copy(Proj, h_Proj); + + MemKK::realloc_kokkos(Centroids, "pair_pod:Centroids", nClusters * nComponents * nelements); + auto h_Centroids = Kokkos::create_mirror_view(Centroids); + for (int i=0; iCentroids[i]; + Kokkos::deep_copy(Centroids, h_Centroids); + } + + MemKK::realloc_kokkos(pn3, "pair_pod:pn3", nabf3+1); // array stores the number of monomials for each degree + MemKK::realloc_kokkos(pq3, "pair_pod:pq3", K3*2); // array needed for the recursive computation of the angular basis functions + MemKK::realloc_kokkos(pc3, "pair_pod:pc3", K3); // array needed for the computation of the three-body descriptors + MemKK::realloc_kokkos(pa4, "pair_pod:pa4", nabf4+1); // this array is a subset of the array {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345} + MemKK::realloc_kokkos(pb4, "pair_pod:pb4", Q4*3); // array stores the indices of the monomials needed for the computation of the angular basis functions + MemKK::realloc_kokkos(pc4, "pair_pod:pc4", Q4); // array of monomial coefficients needed for the computation of the four-body descriptors + + auto h_pn3 = Kokkos::create_mirror_view(pn3); + for (int i=0; ipn3[i]; + Kokkos::deep_copy(pn3, h_pn3); + + auto h_pq3 = Kokkos::create_mirror_view(pq3); + for (int i = 0; i < K3*2; i++) h_pq3[i] = podptr->pq3[i]; + Kokkos::deep_copy(pq3, h_pq3); + + auto h_pc3 = Kokkos::create_mirror_view(pc3); + for (int i = 0; i < K3; i++) h_pc3[i] = podptr->pc3[i]; + Kokkos::deep_copy(pc3, h_pc3); + + auto h_pa4 = Kokkos::create_mirror_view(pa4); + for (int i = 0; i < nabf4+1; i++) h_pa4[i] = podptr->pa4[i]; + Kokkos::deep_copy(pa4, h_pa4); + + auto h_pb4 = Kokkos::create_mirror_view(pb4); + for (int i = 0; i < Q4*3; i++) h_pb4[i] = podptr->pb4[i]; + Kokkos::deep_copy(pb4, h_pb4); + + auto h_pc4 = Kokkos::create_mirror_view(pc4); + for (int i = 0; i < Q4; i++) h_pc4[i] = podptr->pc4[i]; + Kokkos::deep_copy(pc4, h_pc4); + + MemKK::realloc_kokkos(ind23, "pair_pod:ind23", n23); + MemKK::realloc_kokkos(ind32, "pair_pod:ind32", n32); + MemKK::realloc_kokkos(ind33l, "pair_pod:ind33l", nl33); + MemKK::realloc_kokkos(ind33r, "pair_pod:ind33r", nl33); + MemKK::realloc_kokkos(ind34l, "pair_pod:ind34l", nl34); + MemKK::realloc_kokkos(ind34r, "pair_pod:ind34r", nl34); + MemKK::realloc_kokkos(ind44l, "pair_pod:ind44l", nl44); + MemKK::realloc_kokkos(ind44r, "pair_pod:ind44r", nl44); + + auto h_ind23 = Kokkos::create_mirror_view(ind23); + for (int i = 0; i < n23; i++) h_ind23[i] = podptr->ind23[i]; + Kokkos::deep_copy(ind23, h_ind23); + + auto h_ind32 = Kokkos::create_mirror_view(ind32); + for (int i = 0; i < n32; i++) h_ind32[i] = podptr->ind32[i]; + Kokkos::deep_copy(ind32, h_ind32); + + auto h_ind33l = Kokkos::create_mirror_view(ind33l); + for (int i = 0; i < nl33; i++) h_ind33l[i] = podptr->ind33l[i]; + Kokkos::deep_copy(ind33l, h_ind33l); + + auto h_ind33r = Kokkos::create_mirror_view(ind33r); + for (int i = 0; i < nl33; i++) h_ind33r[i] = podptr->ind33r[i]; + Kokkos::deep_copy(ind33r, h_ind33r); + + auto h_ind34l = Kokkos::create_mirror_view(ind34l); + for (int i = 0; i < nl34; i++) h_ind34l[i] = podptr->ind34l[i]; + Kokkos::deep_copy(ind34l, h_ind34l); + + auto h_ind34r = Kokkos::create_mirror_view(ind34r); + for (int i = 0; i < nl34; i++) h_ind34r[i] = podptr->ind34r[i]; + Kokkos::deep_copy(ind34r, h_ind34r); + + auto h_ind44l = Kokkos::create_mirror_view(ind44l); + for (int i = 0; i < nl44; i++) h_ind44l[i] = podptr->ind44l[i]; + Kokkos::deep_copy(ind44l, h_ind44l); + + auto h_ind44r = Kokkos::create_mirror_view(ind44r); + for (int i = 0; i < nl44; i++) h_ind44r[i] = podptr->ind44r[i]; + Kokkos::deep_copy(ind44r, h_ind44r); +} + +template +void PairPODKokkos::divideInterval(int *intervals, int N, int M) +{ + int intervalSize = N / M; // Basic size of each interval + int remainder = N % M; // Remainder to distribute + intervals[0] = 1; // Start of the first interval + for (int i = 1; i <= M; i++) { + intervals[i] = intervals[i - 1] + intervalSize + (remainder > 0 ? 1 : 0); + if (remainder > 0) { + remainder--; + } + } +} + +template +int PairPODKokkos::calculateNumberOfIntervals(int N, int intervalSize) +{ + if (intervalSize <= 0) { + printf("Interval size must be a positive integer.\n"); + return -1; + } + + int M = N / intervalSize; + if (N % intervalSize != 0) { + M++; // Add an additional interval to cover the remainder + } + + return M; +} + +template +void PairPODKokkos::grow_atoms(int Ni) +{ + if (Ni > nimax) { + nimax = Ni; + MemKK::realloc_kokkos(numij, "pair_pod:numij", nimax+1); + MemKK::realloc_kokkos(ei, "pair_pod:ei", nimax); + MemKK::realloc_kokkos(typeai, "pair_pod:typeai", nimax); + int n = nimax * nelements * K3 * nrbfmax; + if (nClusters>1) n = (n > nimax*Mdesc) ? n : nimax*Mdesc; + MemKK::realloc_kokkos(sumU, "pair_pod:sumU", n); + MemKK::realloc_kokkos(bd, "pair_pod:bd", nimax * Mdesc); + + Kokkos::deep_copy(numij, 0); + } +} + +template +void PairPODKokkos::grow_pairs(int Nij) +{ + if (Nij > nijmax) { + nijmax = Nij; + MemKK::realloc_kokkos(rij, "pair_pod:r_ij", 3 * nijmax); + MemKK::realloc_kokkos(fij, "pair_pod:f_ij", 3 * nijmax); + MemKK::realloc_kokkos(idxi, "pair_pod:idxi", nijmax); + MemKK::realloc_kokkos(ai, "pair_pod:ai", nijmax); + MemKK::realloc_kokkos(aj, "pair_pod:aj", nijmax); + MemKK::realloc_kokkos(ti, "pair_pod:ti", nijmax); + MemKK::realloc_kokkos(tj, "pair_pod:tj", nijmax); + MemKK::realloc_kokkos(rbf, "pair_pod:rbf", nijmax * nrbfmax); + MemKK::realloc_kokkos(rbfx, "pair_pod:rbfx", nijmax * nrbfmax); + MemKK::realloc_kokkos(rbfy, "pair_pod:rbfy", nijmax * nrbfmax); + MemKK::realloc_kokkos(rbfz, "pair_pod:rbfz", nijmax * nrbfmax); + int kmax = (K3 > ns) ? K3 : ns; + MemKK::realloc_kokkos(abf, "pair_pod:abf", nijmax * kmax); + MemKK::realloc_kokkos(abfx, "pair_pod:abfx", nijmax * kmax); + MemKK::realloc_kokkos(abfy, "pair_pod:abfy", nijmax * kmax); + MemKK::realloc_kokkos(abfz, "pair_pod:abfz", nijmax * kmax); + MemKK::realloc_kokkos(bdd, "pair_pod:bdd", 3 * nijmax * Mdesc); + } +} + +template +int PairPODKokkos::NeighborCount(t_pod_1i l_numij, double l_rcutsq, int gi1, int Ni) +{ + // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for + auto l_ilist = d_ilist; + auto l_x = x; + auto l_numneigh = d_numneigh; + auto l_neighbors = d_neighbors; + + // compute number of pairs for each atom i + Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + int i = team.league_rank(); + int gi = l_ilist(gi1 + i); + double xi0 = l_x(gi, 0); + double xi1 = l_x(gi, 1); + double xi2 = l_x(gi, 2); + int jnum = l_numneigh(gi); + int ncount = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum), + [&] (const int jj, int& count) { + int j = l_neighbors(gi,jj); + j &= NEIGHMASK; + double delx = xi0 - l_x(j,0); + double dely = xi1 - l_x(j,1); + double delz = xi2 - l_x(j,2); + double rsq = delx*delx + dely*dely + delz*delz; + if (rsq < l_rcutsq) count++; + },ncount); + + l_numij(i+1) = ncount; + }); + + // accumalative sum + Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) { + if (i > 0) { + update += l_numij(i); + if (final) { + l_numij(i) = update; + } + } + }); + + int total_neighbors = 0; + Kokkos::deep_copy(Kokkos::View(&total_neighbors), Kokkos::subview(l_numij, Ni)); + + return total_neighbors; +} + +template +void PairPODKokkos::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, + t_pod_1i l_idxi, t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni) +{ + // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for + auto l_ilist = d_ilist; + auto l_x = x; + auto l_numneigh = d_numneigh; + auto l_neighbors = d_neighbors; + auto l_map = d_map; + auto l_type = type; + + Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + int i = team.league_rank(); + int gi = l_ilist(gi1 + i); + double xi0 = l_x(gi, 0); + double xi1 = l_x(gi, 1); + double xi2 = l_x(gi, 2); + int itype = l_map(l_type(gi)) + 1; //map[atomtypes[gi]] + 1; + l_typeai(i) = itype; + int jnum = l_numneigh(gi); + int nij0 = l_numij(i); + Kokkos::parallel_scan(Kokkos::TeamThreadRange(team,jnum), + [&] (const int jj, int& offset, bool final) { + int gj = l_neighbors(gi,jj); + gj &= NEIGHMASK; + double delx = l_x(gj,0) - xi0; + double dely = l_x(gj,1) - xi1; + double delz = l_x(gj,2) - xi2; + double rsq = delx*delx + dely*dely + delz*delz; + if (rsq >= l_rcutsq) return; + if (final) { + int nij1 = nij0 + offset; + l_rij(nij1 * 3 + 0) = delx; + l_rij(nij1 * 3 + 1) = dely; + l_rij(nij1 * 3 + 2) = delz; + l_idxi(nij1) = i; + l_ai(nij1) = gi; + l_aj(nij1) = gj; + l_ti(nij1) = itype; + l_tj(nij1) = l_map(l_type(gj)) + 1; //map[atomtypes[gj)) + 1; + } + offset++; + }); + }); +} + +template +void PairPODKokkos::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, + t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, + int l_inversedegree, int l_nbesselpars, int l_ns, int Nij) +{ + Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) { + double xij1 = l_rij(0+3*n); + double xij2 = l_rij(1+3*n); + double xij3 = l_rij(2+3*n); + + double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3); + double dr1 = xij1/dij; + double dr2 = xij2/dij; + double dr3 = xij3/dij; + + double r = dij - l_rin; + double y = r/l_rmax; + double y2 = y*y; + + double y3 = 1.0 - y2*y; + double y4 = y3*y3 + 1e-6; + double y5 = sqrt(y4); + double y6 = exp(-1.0/y5); + double y7 = y4*sqrt(y4); + + // Calculate the final cutoff function as y6/exp(-1) + double fcut = y6/exp(-1.0); + + // Calculate the derivative of the final cutoff function + double dfcut = ((3.0/(l_rmax*exp(-1.0)))*(y2)*y6*(y*y2 - 1.0))/y7; + + // Calculate fcut/r, fcut/r^2, and dfcut/r + double f1 = fcut/r; + double f2 = f1/r; + double df1 = dfcut/r; + + double alpha = l_besselparams(0); + double t1 = (1.0-exp(-alpha)); + double t2 = exp(-alpha*r/l_rmax); + double x0 = (1.0 - t2)/t1; + double dx0 = (alpha/l_rmax)*t2/t1; + + alpha = l_besselparams(1); + t1 = (1.0-exp(-alpha)); + t2 = exp(-alpha*r/l_rmax); + double x1 = (1.0 - t2)/t1; + double dx1 = (alpha/l_rmax)*t2/t1; + + alpha = l_besselparams(2); + t1 = (1.0-exp(-alpha)); + t2 = exp(-alpha*r/l_rmax); + double x2 = (1.0 - t2)/t1; + double dx2 = (alpha/l_rmax)*t2/t1; + + for (int i=0; i +void PairPODKokkos::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2) +{ + Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) { + int j = idx / r1; // Calculate column index + int i = idx % r1; // Calculate row index + double sum = 0.0; + for (int k = 0; k < c1; ++k) { + sum += a(i + r1*k) * b(k + c1*j); // Manually calculate the 1D index + } + c(i + r1*j) = sum; // Manually calculate the 1D index for c + }); +} + +template +void PairPODKokkos::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N) +{ + Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) { + double x = l_rij(j*3 + 0); + double y = l_rij(j*3 + 1); + double z = l_rij(j*3 + 2); + + double xx = x*x; + double yy = y*y; + double zz = z*z; + double xy = x*y; + double xz = x*z; + double yz = y*z; + + double dij = sqrt(xx + yy + zz); + const double u = x / dij; + const double v = y / dij; + const double w = z / dij; + + double dij3 = dij * dij * dij; + const double dudx = (yy + zz) / dij3; + const double dudy = -xy / dij3; + const double dudz = -xz / dij3; + + const double dvdx = -xy / dij3; + const double dvdy = (xx + zz) / dij3; + const double dvdz = -yz / dij3; + + const double dwdx = -xz / dij3; + const double dwdy = -yz / dij3; + const double dwdz = (xx + yy) / dij3; + + int idxa = j; + l_abf(idxa) = 1.0; + l_abfx(idxa) = 0.0; + l_abfy(idxa) = 0.0; + l_abfz(idxa) = 0.0; + + // Loop over all angular basis functions + for (int n=1; n +void PairPODKokkos::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, + t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij) +{ + int totalIterations = l_nrbf3 * l_K3 * Ni; + if (l_nelements==1) { + Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) { + int k = idx % l_K3; + int temp = idx / l_K3; + int m = temp % l_nrbf3; + int i = temp / l_nrbf3; + int kmi = k + l_K3*m + l_K3*l_nrbf3*i; + + int start = l_numij(i); + int nj = l_numij(i+1)-start; + double sum=0.0; + for (int j=0; j +void PairPODKokkos::twobodydescderiv(t_pod_1d d2, t_pod_1d dd2, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbfmax, int l_nrbf2, const int Ni, const int Nij) +{ + int totalIterations = l_nrbf2 * Nij; + Kokkos::parallel_for("TwoBodyDescDeriv", totalIterations, KOKKOS_LAMBDA(int idx) { + int n = idx / l_nrbf2; // pair index + int m = idx % l_nrbf2; // rbd index + int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m + int i1 = 3*(n + Nij * m + Nij * l_nrbf2 * (l_tj(n) - 1)); // Index of the descriptor for atom n, RBF m, and atom type tj(n) + Kokkos::atomic_add(&d2(l_idxi(n) + Ni * (m + l_nrbf2 * (l_tj(n) - 1))), l_rbf(i2)); // Add the radial basis function to the corresponding descriptor + dd2(0 + i1) = l_rbfx(i2); // Add the derivative with respect to x to the corresponding descriptor derivative + dd2(1 + i1) = l_rbfy(i2); // Add the derivative with respect to y to the corresponding descriptor derivative + dd2(2 + i1) = l_rbfz(i2); // Add the derivative with respect to z to the corresponding descriptor derivative + }); +} + +template +void PairPODKokkos::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, + int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni) +{ + int totalIterations = l_nrbf3 * Ni; + Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) { + int m = idx % l_nrbf3; + int i = idx / l_nrbf3; + int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i; + for (int p = 0; p < l_nabf3; p++) { + int n1 = l_pn3(p); + int n2 = l_pn3(p + 1); + int nn = n2 - n1; + int ipm = i + Ni * (p + l_nabf3 * m); + int k = 0; + for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i2 = i1; i2 < l_nelements; i2++) { + double tmp=0; + for (int q = 0; q < nn; q++) { + tmp += l_pc3(n1 + q) * l_sumU(i1 + l_nelements * (n1 + q) + nmi) * l_sumU(i2 + l_nelements * (n1 + q) + nmi); + } + d3(ipm + totalIterations * l_nabf3 * k) = tmp; + k += 1; + } + } + } + }); +} + +template +void PairPODKokkos::threebodydescderiv(t_pod_1d dd3, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, + int l_nrbfmax, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij) +{ + int totalIterations = l_nrbf3 * Nij; + if (l_nelements==1) { + Kokkos::parallel_for("ThreeBodyDescDeriv1", totalIterations, KOKKOS_LAMBDA(int idx) { + int j = idx / l_nrbf3; // Calculate j using integer division + int m = idx % l_nrbf3; // Calculate m using modulo operation + int idxR = j + Nij * m; // Pre-compute the index for rbf + double rbfBase = l_rbf(idxR); + double rbfxBase = l_rbfx(idxR); + double rbfyBase = l_rbfy(idxR); + double rbfzBase = l_rbfz(idxR); + + for (int p = 0; p < l_nabf3; p++) { + int n1 = l_pn3(p); + int nn = l_pn3(p + 1) - n1; + int baseIdx = 3 * j + 3 * Nij * (p + l_nabf3 * m); // Pre-compute the base index for dd3 + int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); + double tmp1 = 0; + double tmp2 = 0; + double tmp3 = 0; + for (int q = 0; q < nn; q++) { + int idxNQ = n1 + q; // Combine n1 and q into a single index for pc3 and sumU + double f = 2.0 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU); + int idxA = j + Nij*idxNQ; // Pre-compute the index for abf + double abfA = l_abf(idxA); + + // Use the pre-computed indices to update dd3 + tmp1 += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA); + tmp2 += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA); + tmp3 += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); + } + dd3(baseIdx) = tmp1; + dd3(baseIdx + 1) = tmp2; + dd3(baseIdx + 2) = tmp3; + } + }); + } + else { + int N3 = 3 * Nij * l_nabf3 * l_nrbf3; + Kokkos::parallel_for("ThreeBodyDescDeriv2", totalIterations, KOKKOS_LAMBDA(int idx) { + int j = idx / l_nrbf3; // Derive the original j value + int m = idx % l_nrbf3; // Derive the original m value + int i2 = l_tj(j) - 1; + int idxK = l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*l_idxi(j); + int idxR = j + Nij * m; // Pre-compute the index for rbf + double rbfBase = l_rbf(idxR); + double rbfxBase = l_rbfx(idxR); + double rbfyBase = l_rbfy(idxR); + double rbfzBase = l_rbfz(idxR); + for (int p = 0; p < l_nabf3; p++) { + int n1 = l_pn3(p); + int nn = l_pn3(p + 1) - n1; + int jmp = 3 * j + 3 * Nij * (p + l_nabf3 * m); + for (int i1 = 0; i1 < l_nelements; i1++) { + int c3 = (i1 == i2) ? 2 : 1; + double tmp1 = 0; + double tmp2 = 0; + double tmp3 = 0; + for (int q = 0; q < nn; q++) { + int idxNQ = n1 + q; // Combine n1 and q into a single index + int idxA = j + Nij*idxNQ; // Pre-compute the index for abf + double abfA = l_abf(idxA); + double f = c3*l_pc3(idxNQ) * l_sumU(i1 + l_nelements * idxNQ + idxK); + tmp1 += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA); + tmp2 += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA); + tmp3 += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA); + } + int ii = jmp + N3 * l_elemindex(i2 + l_nelements * i1); + dd3(0 + ii) = tmp1; + dd3(1 + ii) = tmp2; + dd3(2 + ii) = tmp3; + } + } + }); + } +} + +template +void PairPODKokkos::fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, + t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni) +{ + int totalIterations = l_nrbf4 * Ni; + Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) { + int m = idx % l_nrbf4; + int i = idx / l_nrbf4; + int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i; + for (int p = 0; p < l_nabf4; p++) { + int n1 = l_pa4(p); + int n2 = l_pa4(p + 1); + int nn = n2 - n1; + int k = 0; + for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i2 = i1; i2 < l_nelements; i2++) { + for (int i3 = i2; i3 < l_nelements; i3++) { + double tmp = 0.0; + for (int q = 0; q < nn; q++) { + int c = l_pc4(n1 + q); + int j1 = l_pb4(n1 + q); + int j2 = l_pb4(n1 + q + l_Q4); + int j3 = l_pb4(n1 + q + 2 * l_Q4); + tmp += c * l_sumU(idxU + i1 + l_nelements * j1) * l_sumU(idxU + i2 + l_nelements * j2) * l_sumU(idxU + i3 + l_nelements * j3); + } + int kk = p + l_nabf4 * m + l_nabf4 * l_nrbf4 * k; + d4(i + Ni * kk) = tmp; + k += 1; + } + } + } + } + }); +} + +template +void PairPODKokkos::fourbodydescderiv(t_pod_1d dd4, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, + int l_nelements, int l_nrbfmax, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij) +{ + int totalIterations = l_nrbf4 * Nij; + if (l_nelements==1) { + Kokkos::parallel_for("fourbodydescderiv1", totalIterations, KOKKOS_LAMBDA(int idx) { + int j = idx / l_nrbf4; // Derive the original j value + int m = idx % l_nrbf4; // Derive the original m value + int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j); + int baseIdxJ = j + Nij * m; // Pre-compute the index for rbf + double rbfBase = l_rbf(baseIdxJ); + double rbfxBase = l_rbfx(baseIdxJ); + double rbfyBase = l_rbfy(baseIdxJ); + double rbfzBase = l_rbfz(baseIdxJ); + + for (int p = 0; p < l_nabf4; p++) { + int n1 = l_pa4(p); + int n2 = l_pa4(p + 1); + int nn = n2 - n1; + int kk = p + l_nabf4 * m; + int ii = 3 * Nij * kk; + int baseIdx = 3 * j + ii; + double tmp1 = 0; + double tmp2 = 0; + double tmp3 = 0; + for (int q = 0; q < nn; q++) { + int idxNQ = n1 + q; // Combine n1 and q into a single index + int c = l_pc4(idxNQ); + int j1 = l_pb4(idxNQ); + int j2 = l_pb4(idxNQ + l_Q4); + int j3 = l_pb4(idxNQ + 2 * l_Q4); + double c1 = l_sumU(idxU + j1); + double c2 = l_sumU(idxU + j2); + double c3 = l_sumU(idxU + j3); + double t12 = c * c1 * c2; + double t13 = c * c1 * c3; + double t23 = c * c2 * c3; + + // Pre-calculate commonly used indices + int baseIdxJ3 = j + Nij * j3; // Common index for j3 terms + int baseIdxJ2 = j + Nij * j2; // Common index for j2 terms + int baseIdxJ1 = j + Nij * j1; // Common index for j1 terms + + // Temporary variables to store repeated calculations + double abfBaseJ1 = l_abf(baseIdxJ1); + double abfBaseJ2 = l_abf(baseIdxJ2); + double abfBaseJ3 = l_abf(baseIdxJ3); + // Update dd4 using pre-computed indices + tmp1 += t12 * (l_abfx(baseIdxJ3) * rbfBase + rbfxBase * abfBaseJ3) + + t13 * (l_abfx(baseIdxJ2) * rbfBase + rbfxBase * abfBaseJ2) + + t23 * (l_abfx(baseIdxJ1) * rbfBase + rbfxBase * abfBaseJ1); + tmp2 += t12 * (l_abfy(baseIdxJ3) * rbfBase + rbfyBase * abfBaseJ3) + + t13 * (l_abfy(baseIdxJ2) * rbfBase + rbfyBase * abfBaseJ2) + + t23 * (l_abfy(baseIdxJ1) * rbfBase + rbfyBase * abfBaseJ1); + tmp3 += t12 * (l_abfz(baseIdxJ3) * rbfBase + rbfzBase * abfBaseJ3) + + t13 * (l_abfz(baseIdxJ2) * rbfBase + rbfzBase * abfBaseJ2) + + t23 * (l_abfz(baseIdxJ1) * rbfBase + rbfzBase * abfBaseJ1); + } + dd4(baseIdx) = tmp1; + dd4(baseIdx + 1) = tmp2; + dd4(baseIdx + 2) = tmp3; + } + }); + } + else { + int N3 = 3*Nij * l_nabf4 * l_nrbf4; + Kokkos::parallel_for("fourbodydescderiv2", totalIterations, KOKKOS_LAMBDA(int idx) { + int j = idx / l_nrbf4; // Derive the original j value + int m = idx % l_nrbf4; // Derive the original m value + int idxM = j + Nij * m; + double rbfM = l_rbf(idxM); + double rbfxM = l_rbfx(idxM); + double rbfyM = l_rbfy(idxM); + double rbfzM = l_rbfz(idxM); + int typej = l_tj(j) - 1; + for (int p = 0; p < l_nabf4; p++) { + int n1 = l_pa4(p); + int n2 = l_pa4(p + 1); + int nn = n2 - n1; + int jpm = 3 * j + 3 * Nij * (p + l_nabf4 * m); + int k = 0; + for (int i1 = 0; i1 < l_nelements; i1++) { + for (int i2 = i1; i2 < l_nelements; i2++) { + for (int i3 = i2; i3 < l_nelements; i3++) { + double tmp1 = 0; + double tmp2 = 0; + double tmp3 = 0; + for (int q = 0; q < nn; q++) { + int c = l_pc4(n1 + q); + int j1 = l_pb4(n1 + q); + int j2 = l_pb4(n1 + q + l_Q4); + int j3 = l_pb4(n1 + q + 2 * l_Q4); + + int idx1 = i1 + l_nelements * j1 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); + int idx2 = i2 + l_nelements * j2 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); + int idx3 = i3 + l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j); + double c1 = l_sumU(idx1); + double c2 = l_sumU(idx2 ); + double c3 = l_sumU(idx3); + double t12 = c*(c1 * c2); + double t13 = c*(c1 * c3); + double t23 = c*(c2 * c3); + + int idxJ3 = j + Nij * j3; + int idxJ2 = j + Nij * j2; + int idxJ1 = j + Nij * j1; + double abfJ1 = l_abf(idxJ1); + double abfJ2 = l_abf(idxJ2); + double abfJ3 = l_abf(idxJ3); + double abfxJ1 = l_abfx(idxJ1); + double abfxJ2 = l_abfx(idxJ2); + double abfxJ3 = l_abfx(idxJ3); + double abfyJ1 = l_abfy(idxJ1); + double abfyJ2 = l_abfy(idxJ2); + double abfyJ3 = l_abfy(idxJ3); + double abfzJ1 = l_abfz(idxJ1); + double abfzJ2 = l_abfz(idxJ2); + double abfzJ3 = l_abfz(idxJ3); + + // Compute contributions for each condition + if (typej == i3) { + tmp1 += t12 * (abfxJ3 * rbfM + rbfxM * abfJ3); + tmp2 += t12 * (abfyJ3 * rbfM + rbfyM * abfJ3); + tmp3 += t12 * (abfzJ3 * rbfM + rbfzM * abfJ3); + } + if (typej == i2) { + tmp1 += t13 * (abfxJ2 * rbfM + rbfxM * abfJ2); + tmp2 += t13 * (abfyJ2 * rbfM + rbfyM * abfJ2); + tmp3 += t13 * (abfzJ2 * rbfM + rbfzM * abfJ2); + } + if (typej == i1) { + tmp1 += t23 * (abfxJ1 * rbfM + rbfxM * abfJ1); + tmp2 += t23 * (abfyJ1 * rbfM + rbfyM * abfJ1); + tmp3 += t23 * (abfzJ1 * rbfM + rbfzM * abfJ1); + } + } + int baseIdx = jpm + N3 * k; + dd4(0 + baseIdx) = tmp1; + dd4(1 + baseIdx) = tmp2; + dd4(2 + baseIdx) = tmp3; + k += 1; + } + } + } + } + }); + } +} + +template +void PairPODKokkos::fourbodydesc23(t_pod_1d d23, t_pod_1d d2, t_pod_1d d3, t_pod_1i l_ind23, + t_pod_1i l_ind32, int l_n23, int l_n32, int Ni) +{ + int totalIterations = l_n32 * l_n23 * Ni; + Kokkos::parallel_for("fourbodydesc23", totalIterations, KOKKOS_LAMBDA(int idx) { + int n = idx % Ni; + int temp = idx / Ni; + int i = temp % l_n23; + int j = temp / l_n23; + + int indexDst = n + Ni * i + Ni * l_n23 * j; + int indexSrc2 = n + Ni * l_ind23(i); + int indexSrc3 = n + Ni * l_ind32(j); + d23(indexDst) = d2(indexSrc2) * d3(indexSrc3); + }); +} + +template +void PairPODKokkos::fourbodydescderiv23(t_pod_1d dd23, t_pod_1d d2, t_pod_1d d3, t_pod_1d dd2, + t_pod_1d dd3, t_pod_1i l_idxi, t_pod_1i l_ind23, t_pod_1i l_ind32, int l_n23, int l_n32, int Ni, int N) +{ + int totalIterations = l_n32 * l_n23 * Ni; + Kokkos::parallel_for("fourbodydescderiv23", totalIterations, KOKKOS_LAMBDA(int idx) { + int n = idx % N; + int temp = idx / N; + int i = temp % l_n23; + int j = temp / l_n23; + + int k = 3 * (n + N * i + N * l_n23 * j); + int k1 = 3 * n + 3 * N * l_ind23(i); + int k2 = 3 * n + 3 * N * l_ind32(i); + int m1 = l_idxi(n) + Ni * l_ind23(i); + int m2 = l_idxi(n) + Ni * l_ind32(i); + dd23(0 + k) = d2(m1) * dd3(0 + k2) + dd2(0 + k1) * d3(m2); + dd23(1 + k) = d2(m1) * dd3(1 + k2) + dd2(1 + k1) * d3(m2); + dd23(2 + k) = d2(m1) * dd3(2 + k2) + dd2(2 + k1) * d3(m2); + }); +} + +template +void PairPODKokkos::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni) +{ + int totalIterations = n12 * Ni; + Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) { + int n = idx % Ni; + int i = idx / Ni; + + d12(n + Ni * i) = d1(n + Ni * ind1(i)) * d2(n + Ni * ind2(i)); + }); +} + +template +void PairPODKokkos::crossdescderiv(t_pod_1d dd12, t_pod_1d d1, t_pod_1d d2, t_pod_1d dd1, t_pod_1d dd2, + t_pod_1i l_idxi, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni, int Nij) +{ + int totalIterations = 3*n12*Nij; + Kokkos::parallel_for("crossdescderiv", totalIterations, KOKKOS_LAMBDA(int idx) { + int d = idx % 3; + int tmp = idx / 3; + int n = tmp % Nij; + int i = tmp / Nij; + int k1 = d + 3 * n + 3 * Nij * ind1(i); + int k2 = d + 3 * n + 3 * Nij * ind2(i); + dd12(idx) = d1(l_idxi(n) + Ni * ind1(i)) * dd2(k2) + dd1(k1) * d2(l_idxi(n) + Ni * ind2(i)); + }); +} + +template +void PairPODKokkos::set_array_to_zero(t_pod_1d a, int N) +{ + Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) { + a(i) = 0.0; + }); +} + +template +void PairPODKokkos::blockatom_base_descriptors(t_pod_1d bd, t_pod_1d bdd, int Ni, int Nij) +{ + auto begin = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); + + auto d2 = Kokkos::subview(bd, std::make_pair(0, Ni * nl2)); + auto d3 = Kokkos::subview(bd, std::make_pair(Ni * nl2, Ni * (nl2 + nl3))); + auto d4 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4))); + auto d23 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4), Ni * (nl2 + nl3 + nl4 + nl23))); + auto d33 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23), Ni * (nl2 + nl3 + nl4 + nl23 + nl33))); + auto d34 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23 + nl33), Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34))); + auto d44 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34), Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44))); + auto dd2 = Kokkos::subview(bdd, std::make_pair(0, 3 * Nij * nl2)); + auto dd3 = Kokkos::subview(bdd, std::make_pair(3 * Nij * nl2, 3 * Nij * (nl2 + nl3))); + auto dd4 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3), 3 * Nij * (nl2 + nl3 + nl4))); + auto dd23 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4), 3 * Nij * (nl2 + nl3 + nl4 + nl23))); + auto dd33 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33))); + auto dd34 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34))); + auto dd44 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44))); + + begin = std::chrono::high_resolution_clock::now(); + radialbasis(abf, abfx, abfy, abfz, rij, besselparams, rin, rmax, + besseldegree, inversedegree, nbesselpars, ns, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[10] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + matrixMultiply(abf, Phi, rbf, Nij, ns, nrbfmax); + matrixMultiply(abfx, Phi, rbfx, Nij, ns, nrbfmax); + matrixMultiply(abfy, Phi, rbfy, Nij, ns, nrbfmax); + matrixMultiply(abfz, Phi, rbfz, Nij, ns, nrbfmax); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[11] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + set_array_to_zero(d2, Ni*nl2); + set_array_to_zero(dd2, 3*Nij*nl2); + twobodydescderiv(d2, dd2, rbf, rbfx, rbfy, rbfz, idxi, tj, nrbfmax, nrbf2, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[12] += std::chrono::duration_cast(end-begin).count()/1e6; + + if ((nl3 > 0) && (Nij>1)) { + begin = std::chrono::high_resolution_clock::now(); + angularbasis(abf, abfx, abfy, abfz, rij, pq3, K3, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[13] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + set_array_to_zero(sumU, nelements * nrbf3 * K3 * Ni); + radialangularsum(sumU, rbf, abf, tj, numij, nelements, nrbf3, K3, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[14] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + //set_array_to_zero(d3, Ni*nl3); + threebodydesc(d3, sumU, pc3, pn3, nelements, nrbf3, nabf3, K3, Ni); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[15] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + set_array_to_zero(dd3, 3*Nij*nl3); + threebodydescderiv(dd3, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, + idxi, tj, pc3, pn3, elemindex, nelements, nrbfmax, nrbf3, nabf3, K3, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[16] += std::chrono::duration_cast(end-begin).count()/1e6; + } + + if ((nl4 > 0) && (Nij>2)) { + begin = std::chrono::high_resolution_clock::now(); + //set_array_to_zero(d4, Ni*nl4); + fourbodydesc(d4, sumU, pa4, pb4, pc4, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[17] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + //set_array_to_zero(dd4, 3*Nij*nl4); + fourbodydescderiv(dd4, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, idxi, tj, + pa4, pb4, pc4, elemindex, nelements, nrbfmax, nrbf3, nrbf4, nabf4, K3, Q4, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[18] += std::chrono::duration_cast(end-begin).count()/1e6; + } + + if ((nl23>0) && (Nij>2)) { + fourbodydesc23(d23, d2, d3, ind23, ind32, n23, n32, Ni); + fourbodydescderiv23(dd23, d2, d3, dd2, dd3, idxi, ind23, ind32, n23, n32, Ni, Nij); + } + + if ((nl33>0) && (Nij>3)) { + begin = std::chrono::high_resolution_clock::now(); + crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni); + crossdescderiv(dd33, d3, d3, dd3, dd3, idxi, ind33l, ind33r, nl33, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[19] += std::chrono::duration_cast(end-begin).count()/1e6; + } + + if ((nl34>0) && (Nij>4)) { + begin = std::chrono::high_resolution_clock::now(); + crossdesc(d34, d3, d4, ind34l, ind34r, nl34, Ni); + crossdescderiv(dd34, d3, d4, dd3, dd4, idxi, ind34l, ind34r, nl34, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[20] += std::chrono::duration_cast(end-begin).count()/1e6; + } + + if ((nl44>0) && (Nij>5)) { + begin = std::chrono::high_resolution_clock::now(); + crossdesc(d44, d4, d4, ind44l, ind44r, nl44, Ni); + crossdescderiv(dd44, d4, d4, dd4, dd4, idxi, ind44l, ind44r, nl44, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[21] += std::chrono::duration_cast(end-begin).count()/1e6; + } +} + +template +void PairPODKokkos::blockatomenv_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni) +{ + auto P = abf; + auto cp = abfx; + auto pca = abfy; // Ni*nComponents + auto D = abfz; // Ni*nClusters + auto sumD = rbf; // Ni + + auto proj = Proj; + auto cent = Centroids; + auto cefs = coefficients; + auto tyai = typeai; + + int nCom = nComponents; + int nCls = nClusters; + int nDes = Mdesc; + int nCoeff = nCoeffPerElement; + + int totalIterations = Ni*nCom; + Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) { + int i = idx % Ni; + int k = idx / Ni; + double sum = 0.0; + int typei = tyai[i]-1; + for (int m = 0; m < nDes; m++) { + sum += proj[k + nCom*m + nCom*nDes*typei] * B[i + Ni*m]; + } + pca[i + Ni*k] = sum; + }); + + totalIterations = Ni*nCls; + Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) { + int i = idx % Ni; + int j = idx / Ni; + int typei = tyai[i]-1; + double sum = 1e-20; + for (int k = 0; k < nCom; k++) { + double c = cent[k + j * nCom + nCls*nCom*typei]; + double p = pca[i + Ni*k]; + sum += (p - c) * (p - c); + } + D[i + Ni*j] = 1.0 / sum; + }); + + Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) { + double sum = 0; + for (int j = 0; j < nCls; j++) sum += D[i + Ni*j]; + sumD[i] = sum; + for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum; + }); + + Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) { + int nc = nCoeff*(tyai[n]-1); + ei[n] = cefs[0 + nc]; + for (int k = 0; k +void PairPODKokkos::blockatomenergyforce(int Ni, int Nij) +{ + auto begin = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); + + // calculate base descriptors and their derivatives with respect to atom coordinates + begin = std::chrono::high_resolution_clock::now(); + blockatom_base_descriptors(bd, bdd, Ni, Nij); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[22] += std::chrono::duration_cast(end-begin).count()/1e6; + + // local shadow copies of member variables + auto l_coefficients = coefficients; + auto l_typeai = typeai; + auto l_ei = ei; + auto l_fij = fij; + auto l_bd = bd; + auto l_bdd = bdd; + auto l_nCoeffPerElement = nCoeffPerElement; + auto l_Mdesc = Mdesc; + auto l_ti = ti; + auto l_idxi = idxi; + + if (nClusters > 1) { + begin = std::chrono::high_resolution_clock::now(); + auto cb = sumU; + blockatomenv_descriptors(ei, cb, bd, Ni); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[25] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + int N3 = 3*Nij; + Kokkos::parallel_for("compute_fij", N3, KOKKOS_LAMBDA(const int idx) { + int n = idx / 3; + int i = l_idxi[n]; + double f0 = 0.0; + for (int m = 0; m < l_Mdesc; m++) { + f0 += cb(i + Ni*m) * l_bdd(idx + N3*m); + } + l_fij(idx) = f0; + }); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[26] += std::chrono::duration_cast(end-begin).count()/1e6; + + return; + } + + begin = std::chrono::high_resolution_clock::now(); + Kokkos::parallel_for("compute_ei", Ni, KOKKOS_LAMBDA(const int n) { + int nc = l_nCoeffPerElement * (l_typeai(n) - 1); + double sum = l_coefficients(0 + nc); + for (int m = 0; m < l_Mdesc; ++m) { + sum += l_coefficients(1 + m + nc) * l_bd(n + Ni * m); + } + l_ei(n) = sum; + }); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[23] += std::chrono::duration_cast(end-begin).count()/1e6; + + begin = std::chrono::high_resolution_clock::now(); + int N3 = 3*Nij; + Kokkos::parallel_for("compute_fij", N3, KOKKOS_LAMBDA(const int idx) { + int n = idx / 3; + int nc = l_nCoeffPerElement * (l_ti(n) - 1); // Assuming ti is a 1-D Kokkos::View + double f0 = 0.0; + for (int m = 0; m < l_Mdesc; m++) { + f0 += l_coefficients(1 + m + nc) * l_bdd(idx + N3*m); + } + l_fij(idx) = f0; + }); + Kokkos::fence(); + end = std::chrono::high_resolution_clock::now(); + comptime[24] += std::chrono::duration_cast(end-begin).count()/1e6; +} + +template +void PairPODKokkos::tallyforce(int Nij) { + auto l_f = f; + auto l_fij = fij; + auto l_ai = ai; + auto l_aj = aj; + Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) { + int im = l_ai(n); + int jm = l_aj(n); + int n3 = 3*n; + double fx = l_fij(n3 + 0); + double fy = l_fij(n3 + 1); + double fz = l_fij(n3 + 2); + Kokkos::atomic_add(&l_f(im, 0), fx); + Kokkos::atomic_add(&l_f(im, 1), fy); + Kokkos::atomic_add(&l_f(im, 2), fz); + Kokkos::atomic_sub(&l_f(jm, 0), fx); + Kokkos::atomic_sub(&l_f(jm, 1), fy); + Kokkos::atomic_sub(&l_f(jm, 2), fz); + }); +} + +template +void PairPODKokkos::tallyenergy(int istart, int Ni) +{ + auto l_ei = ei; + auto l_eatom = d_eatom; + + // For global energy tally + if (eflag_global) { + double local_eng_vdwl = 0.0; + Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) { + update += l_ei(k); + }, local_eng_vdwl); + + // Update global energy on the host after the parallel region + eng_vdwl += local_eng_vdwl; + } + + // For per-atom energy tally + if (eflag_atom) { + Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) { + l_eatom(istart + k) += l_ei(k); + }); + } +} + +template +void PairPODKokkos::tallystress(int Nij) +{ + auto l_fij = fij; + auto l_rij = rij; + auto l_ai = ai; + auto l_aj = aj; + auto l_vatom = d_vatom; + + if (vflag_global) { + for (int j=0; j<3; j++) { + F_FLOAT sum = 0.0; + Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + int k3 = 3*k; + update += l_rij(j + k3) * l_fij(j + k3); + }, sum); + virial[j] -= sum; + } + + F_FLOAT sum = 0.0; + Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + int k3 = 3*k; + update += l_rij(k3) * l_fij(1 + k3); + }, sum); + virial[3] -= sum; + + sum = 0.0; + Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + int k3 = 3*k; + update += l_rij(k3) * l_fij(2 + k3); + }, sum); + virial[4] -= sum; + + sum = 0.0; + Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) { + int k3 = 3*k; + update += l_rij(1+k3) * l_fij(2+k3); + }, sum); + virial[5] -= sum; + } + + if (vflag_atom) { + Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) { + int i = l_ai(k); + int j = l_aj(k); + int k3 = 3*k; + double v_local[6]; + v_local[0] = -l_rij(k3) * l_fij(k3 + 0); + v_local[1] = -l_rij(k3 + 1) * l_fij(k3 + 1); + v_local[2] = -l_rij(k3 + 2) * l_fij(k3 + 2); + v_local[3] = -l_rij(k3 + 0) * l_fij(k3 + 1); + v_local[4] = -l_rij(k3 + 0) * l_fij(k3 + 2); + v_local[5] = -l_rij(k3 + 1) * l_fij(k3 + 2); + + for (int d = 0; d < 6; ++d) { + Kokkos::atomic_add(&l_vatom(i, d), 0.5 * v_local[d]); + } + + for (int d = 0; d < 6; ++d) { + Kokkos::atomic_add(&l_vatom(j, d), 0.5 * v_local[d]); + } + + }); + } +} + +template +void PairPODKokkos::savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols) +{ + auto A = Kokkos::create_mirror_view(d_A); + Kokkos::deep_copy(A, d_A); + + FILE *fp = fopen(filename.c_str(), "wb"); + double sz[2]; + sz[0] = (double) nrows; + sz[1] = (double) ncols; + fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp); + fwrite( reinterpret_cast( A.data() ), sizeof(double) * (nrows*ncols), 1, fp); + fclose(fp); +} + +template +void PairPODKokkos::saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols) +{ + auto A = Kokkos::create_mirror_view(d_A); + Kokkos::deep_copy(A, d_A); + + FILE *fp = fopen(filename.c_str(), "wb"); + int sz[2]; + sz[0] = nrows; + sz[1] = ncols; + fwrite( reinterpret_cast( sz ), sizeof(int) * (2), 1, fp); + fwrite( reinterpret_cast( A.data() ), sizeof(int) * (nrows*ncols), 1, fp); + fclose(fp); +} + +template +void PairPODKokkos::savedatafordebugging() +{ + saveintmatrix2binfile("podkktypeai.bin", typeai, ni, 1); + saveintmatrix2binfile("podkknumij.bin", numij, ni+1, 1); + saveintmatrix2binfile("podkkai.bin", ai, nij, 1); + saveintmatrix2binfile("podkkaj.bin", aj, nij, 1); + saveintmatrix2binfile("podkkti.bin", ti, nij, 1); + saveintmatrix2binfile("podkktj.bin", tj, nij, 1); + saveintmatrix2binfile("podkkidxi.bin", idxi, nij, 1); + savematrix2binfile("podkkrbf.bin", rbf, nrbfmax, nij); + savematrix2binfile("podkkrbfx.bin", rbfx, nrbfmax, nij); + savematrix2binfile("podkkrbfy.bin", rbfy, nrbfmax, nij); + savematrix2binfile("podkkrbfz.bin", rbfz, nrbfmax, nij); + int kmax = (K3 > ns) ? K3 : ns; + savematrix2binfile("podkkabf.bin", abf, kmax, nij); + savematrix2binfile("podkkabfx.bin", abfx, kmax, nij); + savematrix2binfile("podkkabfy.bin", abfy, kmax, nij); + savematrix2binfile("podkkabfz.bin", abfz, kmax, nij); + savematrix2binfile("podkkbdd.bin", bdd, 3*nij, Mdesc); + savematrix2binfile("podkkbd.bin", bd, ni, Mdesc); + savematrix2binfile("podkksumU.bin", sumU, nelements * K3 * nrbfmax, ni); + savematrix2binfile("podkkrij.bin", rij, 3, nij); + savematrix2binfile("podkkfij.bin", fij, 3, nij); + savematrix2binfile("podkkei.bin", ei, ni, 1); + + error->all(FLERR, "Save data and stop the run for debugging"); +} + +/* ---------------------------------------------------------------------- + memory usage of arrays +------------------------------------------------------------------------- */ + +template +double PairPODKokkos::memory_usage() +{ + double bytes = 0; + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class PairPODKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairPODKokkos; +#endif +} diff --git a/src/KOKKOS/pair_pod_kokkos.h b/src/KOKKOS/pair_pod_kokkos.h new file mode 100644 index 00000000000..944f6c2a9f6 --- /dev/null +++ b/src/KOKKOS/pair_pod_kokkos.h @@ -0,0 +1,231 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(pod/kk,PairPODKokkos); +PairStyle(pod/kk/device,PairPODKokkos); +PairStyle(pod/kk/host,PairPODKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_POD_KOKKOS_H +#define LMP_PAIR_POD_KOKKOS_H + +#include "eapod.h" +#include "pair_pod.h" +#include "kokkos_type.h" +#include "pair_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairPODKokkos : public PairPOD { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + + PairPODKokkos(class LAMMPS *); + ~PairPODKokkos() override; + + void compute(int, int) override; + void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + + //protected: + int inum, maxneigh; + int host_flag; + + int eflag, vflag; + int neighflag; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d d_ilist; + typename AT::t_int_1d d_numneigh; +// typename AT::t_int_1d_randomread d_ilist; +// typename AT::t_int_1d_randomread d_numneigh; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + typedef Kokkos::DualView tdual_fparams; + tdual_fparams k_cutsq, k_scale; + typedef Kokkos::View t_fparams; + t_fparams d_cutsq, d_scale; + typename AT::t_int_1d d_map; + + friend void pair_virial_fdotr_compute(PairPODKokkos*); + + void grow(int, int); + void copy_from_pod_class(EAPOD *podptr); + void divideInterval(int *intervals, int N, int M); + int calculateNumberOfIntervals(int N, int intervalSize); + void grow_atoms(int Ni); + void grow_pairs(int Nij); + + void allocate() override; + double memory_usage() override; + + typedef Kokkos::View t_pod_1i; + typedef Kokkos::View t_pod_2i; + typedef Kokkos::View t_pod_1d; + typedef Kokkos::View t_pod_2d; + typedef Kokkos::View t_pod_3d3; + + + int atomBlockSize; // size of each atom block + int nAtomBlocks; // number of atoms blocks + int atomBlocks[101]; // atom blocks + double comptime[100]; + int timing; + + int ni; // number of atoms i in the current atom block + int nij; // number of pairs (i,j) in the current atom block + int nimax; // maximum number of atoms i + int nijmax; // maximum number of pairs (i,j) + + int nelements; // number of elements + int onebody; // one-body descriptors + int besseldegree; // degree of Bessel functions + int inversedegree; // degree of inverse functions + int nbesselpars; // number of Bessel parameters + int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) + int ns; // number of snapshots for radial basis functions + int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, n23, n32, nl; // number of local descriptors + int nrbf2, nrbf3, nrbf4, nrbfmax; // number of radial basis functions + int nabf3, nabf4; // number of angular basis functions + int K3, K4, Q4; // number of monomials + + // environmental variables + int nClusters; // number of environment clusters + int nComponents; // number of principal components + int Mdesc; // number of base descriptors + + double rin; // inner cut-off radius + double rcut; // outer cut-off radius + double rmax; // rcut - rin + double rcutsq; + + t_pod_1d rij; // (xj - xi) for all pairs (I, J) + t_pod_1d fij; // force for all pairs (I, J) + t_pod_1d ei; // energy for each atom I + t_pod_1i typeai; // types of atoms I only + t_pod_1i numij; // number of pairs (I, J) for each atom I + t_pod_1i idxi; // storing linear indices of atom I for all pairs (I, J) + t_pod_1i ai; // IDs of atoms I for all pairs (I, J) + t_pod_1i aj; // IDs of atoms J for all pairs (I, J) + t_pod_1i ti; // types of atoms I for all pairs (I, J) + t_pod_1i tj; // types of atoms J for all pairs (I, J) + + t_pod_1d besselparams; + t_pod_1d Phi; // eigenvectors matrix ns x ns + t_pod_1d rbf; // radial basis functions nij x nrbfmax + t_pod_1d rbfx; // x-derivatives of radial basis functions nij x nrbfmax + t_pod_1d rbfy; // y-derivatives of radial basis functions nij x nrbfmax + t_pod_1d rbfz; // z-derivatives of radial basis functions nij x nrbfmax + t_pod_1d abf; // angular basis functions nij x K3 + t_pod_1d abfx; // x-derivatives of angular basis functions nij x K3 + t_pod_1d abfy; // y-derivatives of angular basis functions nij x K3 + t_pod_1d abfz; // z-derivatives of angular basis functions nij x K3 + t_pod_1d sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements + t_pod_1d Proj; // PCA Projection matrix + t_pod_1d Centroids; // centroids of the clusters + t_pod_1d bd; // base descriptors ni x Mdesc + t_pod_1d bdd; // base descriptors derivatives 3 x nij x Mdesc + t_pod_1d coefficients; // coefficients nCoeffPerElement x nelements + t_pod_1i pq3, pn3, pc3; // arrays to compute 3-body angular basis functions + t_pod_1i pa4, pb4, pc4; // arrays to compute 4-body angular basis functions + t_pod_1i ind23; // n23 + t_pod_1i ind32; // n32 + t_pod_1i ind33l, ind33r; // nl33 + t_pod_1i ind34l, ind34r; // nl34 + t_pod_1i ind44l, ind44r; // nl44 + t_pod_1i elemindex; + + void set_array_to_zero(t_pod_1d a, int N); + + int NeighborCount(t_pod_1i, double, int, int); + int NeighborCount(t_pod_1i, int); + + void NeighborList(t_pod_1d l_rij, t_pod_1i l_numij, t_pod_1i l_typeai, t_pod_1i l_idxi, + t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni); + + void radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, + t_pod_1d rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, + int l_inversedegree, int l_nbesselpars, int l_ns, int Nij); + + void matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2); + + void angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N); + + void radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, + t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij); + + void twobodydescderiv(t_pod_1d d2, t_pod_1d dd2, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, + t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbfmax, int l_nrbf2, const int Ni, const int Nij); + + void threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, + int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni); + + void threebodydescderiv(t_pod_1d dd3, t_pod_1d l_rbf, t_pod_1d l_rbfx, + t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, + t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, + int l_nelements, int l_nrbfmax, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij); + + void fourbodydesc(t_pod_1d d4, t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, + int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni); + + void fourbodydescderiv(t_pod_1d dd4, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, t_pod_1d l_rbfz, + t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1d l_sumU, t_pod_1i l_idxi, + t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, int l_nelements, + int l_nrbfmax, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij); + + void fourbodydesc23(t_pod_1d d23, t_pod_1d d2, t_pod_1d d3, t_pod_1i l_ind23, + t_pod_1i l_ind32, int l_n23, int l_n32, int Ni); + + void fourbodydescderiv23(t_pod_1d dd23, t_pod_1d d2, t_pod_1d d3, t_pod_1d dd2, + t_pod_1d dd3, t_pod_1i l_idxi, t_pod_1i l_ind23, t_pod_1i l_ind32, int l_n23, int l_n32, int Ni, int N); + + void crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni); + + void crossdescderiv(t_pod_1d dd12, t_pod_1d d1, t_pod_1d d2, t_pod_1d dd1, t_pod_1d dd2, + t_pod_1i l_idxi, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni, int Nij); + + void blockatom_base_descriptors(t_pod_1d bd, t_pod_1d bdd, int Ni, int Nij); + void blockatomenv_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni); + + void blockatomenergyforce(int Ni, int Nij); + + void tallyforce(int Nij); + + void tallyenergy(int istart, int Ni); + + void tallystress(int Nij); + + void savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols); + void saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols); + void savedatafordebugging(); +}; +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/ML-POD/compute_pod_atom.cpp b/src/ML-POD/compute_pod_atom.cpp new file mode 100644 index 00000000000..df298257210 --- /dev/null +++ b/src/ML-POD/compute_pod_atom.cpp @@ -0,0 +1,274 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_pod_atom.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "eapod.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +enum{SCALAR,VECTOR,ARRAY}; + +ComputePODAtom::ComputePODAtom(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr) +{ + + int nargmin = 7; + + if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); + if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); + + std::string pod_file = std::string(arg[3]); // pod input file + std::string coeff_file = ""; // coefficient input file + std::string proj_file = std::string(arg[4]); // coefficient input file + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + + int ntypes = atom->ntypes; + memory->create(map, ntypes + 1, "compute_pod_global:map"); + map_element2type(narg - 6, arg + 6, podptr->nelements); + + //size_array_rows = 1 + 3*atom->natoms; + //size_array_cols = podptr->nCoeffAll; + + cutmax = podptr->rcut; + + nmax = 0; + nijmax = 0; + pod = nullptr; + elements = nullptr; + + size_peratom_cols = podptr->Mdesc * podptr->nClusters; + peratom_flag = 1; +} + +/* ---------------------------------------------------------------------- */ + +ComputePODAtom::~ComputePODAtom() +{ + memory->destroy(map); + memory->destroy(pod); + delete podptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODAtom::init() +{ + if (force->pair == nullptr) + error->all(FLERR,"Compute pod requires a pair style be defined"); + + if (cutmax > force->pair->cutforce) + error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff"); + + // need an occasional full neighbor list + + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL); + + if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) + error->warning(FLERR,"More than one compute pod"); +} + + +/* ---------------------------------------------------------------------- */ + +void ComputePODAtom::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODAtom::compute_peratom() +{ + invoked_peratom = update->ntimestep; + + // grow pod array if necessary + + if (atom->natoms > nmax) { + memory->destroy(pod); + nmax = atom->natoms; + int numdesc = podptr->Mdesc * podptr->nClusters; + memory->create(pod, nmax, numdesc,"sna/atom:sna"); + array_atom = pod; + } + + // invoke full neighbor list (will copy or build if necessary) + + neighbor->build_one(list); + + double **x = atom->x; + int **firstneigh = list->firstneigh; + int *numneigh = list->numneigh; + int *type = atom->type; + int *ilist = list->ilist; + int inum = list->inum; + int nClusters = podptr->nClusters; + int Mdesc = podptr->Mdesc; + double rcutsq = podptr->rcut*podptr->rcut; + + for (int ii = 0; ii < inum; ii++) { + int i = ilist[ii]; + int jnum = numneigh[i]; + + // allocate temporary memory + if (nijmax < jnum) { + nijmax = MAX(nijmax, jnum); + podptr->free_temp_memory(); + podptr->allocate_temp_memory(nijmax); + } + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; + ti = &podptr->tmpint[2*nijmax]; + tj = &podptr->tmpint[3*nijmax]; + + // get neighbor list for atom i + lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); + + // peratom base descriptors + double *bd = &podptr->bd[0]; + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + + if (nClusters>1) { + // peratom env descriptors + double *pd = &podptr->pd[0]; + double *pdd = &podptr->pdd[0]; + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + for (int k = 0; k < nClusters; k++) + for (int m = 0; m < Mdesc; m++) { + int mk = m + Mdesc*k; + pod[i][mk] = pd[k]*bd[m]; + // for (int n=0; n 1e-20) { + rij[nij * 3 + 0] = delx; + rij[nij * 3 + 1] = dely; + rij[nij * 3 + 2] = delz; + ai[nij] = atomid[gi]-1; + aj[nij] = atomid[gj]-1; + ti[nij] = itype; + tj[nij] = map[atomtypes[gj]] + 1; + nij++; + } + } +} + +void ComputePODAtom::map_element2type(int narg, char **arg, int nelements) +{ + int i,j; + const int ntypes = atom->ntypes; + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if "NULL" + // nelements = # of unique elements + // elements = list of element names + + if (narg != ntypes) + error->all(FLERR, "Number of element to type mappings does not match number of atom types"); + + if (elements) { + for (i = 0; i < nelements; i++) delete[] elements[i]; + delete[] elements; + } + elements = new char*[ntypes]; + for (i = 0; i < ntypes; i++) elements[i] = nullptr; + + nelements = 0; + map[0] = -1; + for (i = 1; i <= narg; i++) { + std::string entry = arg[i-1]; + if (entry == "NULL") { + map[i] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (entry == elements[j]) break; + map[i] = j; + if (j == nelements) { + elements[j] = utils::strdup(entry); + nelements++; + } + } +} diff --git a/src/ML-POD/compute_pod_atom.h b/src/ML-POD/compute_pod_atom.h new file mode 100644 index 00000000000..0dcd46cbba2 --- /dev/null +++ b/src/ML-POD/compute_pod_atom.h @@ -0,0 +1,61 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(pod/atom,ComputePODAtom); +// clang-format on +#else + +#ifndef LMP_COMPUTE_POD_ATOM_H +#define LMP_COMPUTE_POD_ATOM_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputePODAtom : public Compute { + public: + ComputePODAtom(class LAMMPS *, int, char **); + ~ComputePODAtom() override; + void init() override; + void init_list(int, class NeighList *) override; + void compute_peratom() override; + double memory_usage() override; + void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh, + double rcutsq, int i); + void map_element2type(int narg, char **arg, int nelements); + + private: + class NeighList *list; + class EAPOD *podptr; + double **pod; + double cutmax; + int nmax; + int nij; + int nijmax; + + double *tmpmem; // temporary memory + double *rij; // (xj - xi) for all pairs (I, J) + char **elements; + int *map; + int *ai; // IDs of atoms I for all pairs (I, J) + int *aj; // IDs of atoms J for all pairs (I, J) + int *ti; // types of atoms I for all pairs (I, J) + int *tj; // types of atoms J for all pairs (I, J) +}; + +} // namespace LAMMPS_NS + +#endif +#endif \ No newline at end of file diff --git a/src/ML-POD/compute_pod_global.cpp b/src/ML-POD/compute_pod_global.cpp new file mode 100644 index 00000000000..f8cff059b43 --- /dev/null +++ b/src/ML-POD/compute_pod_global.cpp @@ -0,0 +1,279 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_pod_global.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "eapod.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +enum{SCALAR,VECTOR,ARRAY}; + +ComputePODGlobal::ComputePODGlobal(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr) +{ + array_flag = 1; + extarray = 0; + + int nargmin = 7; + + if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); + if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); + + std::string pod_file = std::string(arg[3]); // pod input file + std::string coeff_file = ""; // coefficient input file + std::string proj_file = std::string(arg[4]); // coefficient input file + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + + int ntypes = atom->ntypes; + memory->create(map, ntypes + 1, "compute_pod_global:map"); + map_element2type(narg - 6, arg + 6, podptr->nelements); + + size_array_rows = 1 + 3*atom->natoms; + size_array_cols = podptr->nCoeffAll; + cutmax = podptr->rcut; + + nijmax = 0; + pod = nullptr; + elements = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +ComputePODGlobal::~ComputePODGlobal() +{ + memory->destroy(map); + memory->destroy(pod); + delete podptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODGlobal::init() +{ + if (force->pair == nullptr) + error->all(FLERR,"Compute pod requires a pair style be defined"); + + if (cutmax > force->pair->cutforce) + error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff"); + + // need an occasional full neighbor list + + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL); + + if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) + error->warning(FLERR,"More than one compute pod"); + + // allocate memory for global array + memory->create(pod,size_array_rows,size_array_cols, + "compute_pod_global:pod"); + array = pod; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODGlobal::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODGlobal::compute_array() +{ + // int ntotal = atom->nlocal + atom->nghost; + invoked_peratom = update->ntimestep; + + // clear global array + + for (int irow = 0; irow < size_array_rows; irow++) + for (int icoeff = 0; icoeff < size_array_cols; icoeff++) + pod[irow][icoeff] = 0.0; + + // invoke full neighbor list (will copy or build if necessary) + + neighbor->build_one(list); + + double **x = atom->x; + int **firstneigh = list->firstneigh; + int *numneigh = list->numneigh; + int *type = atom->type; + int *ilist = list->ilist; + int inum = list->inum; + int nClusters = podptr->nClusters; + int Mdesc = podptr->Mdesc; + int nCoeffPerElement = podptr->nCoeffPerElement; + + double rcutsq = podptr->rcut*podptr->rcut; + + for (int ii = 0; ii < inum; ii++) { + int i = ilist[ii]; + int jnum = numneigh[i]; + + // allocate temporary memory + if (nijmax < jnum) { + nijmax = MAX(nijmax, jnum); + podptr->free_temp_memory(); + podptr->allocate_temp_memory(nijmax); + } + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; + ti = &podptr->tmpint[2*nijmax]; + tj = &podptr->tmpint[3*nijmax]; + + // get neighbor list for atom i + lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); + + // peratom base descriptors + double *bd = &podptr->bd[0]; + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + + pod[0][nCoeffPerElement*(ti[0]-1)] += 1.0; // one-body descriptor + + if (nClusters>1) { + // peratom env descriptors + double *pd = &podptr->pd[0]; + double *pdd = &podptr->pdd[0]; + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + + for (int j = 0; j < nClusters; j++) { + for (int m=0; m 1e-20) { + rij[nij * 3 + 0] = delx; + rij[nij * 3 + 1] = dely; + rij[nij * 3 + 2] = delz; + ai[nij] = atomid[gi]-1; + aj[nij] = atomid[gj]-1; + ti[nij] = itype; + tj[nij] = map[atomtypes[gj]] + 1; + nij++; + } + } +} + +void ComputePODGlobal::map_element2type(int narg, char **arg, int nelements) +{ + int i,j; + const int ntypes = atom->ntypes; + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if "NULL" + // nelements = # of unique elements + // elements = list of element names + + if (narg != ntypes) + error->all(FLERR, "Number of element to type mappings does not match number of atom types"); + + if (elements) { + for (i = 0; i < nelements; i++) delete[] elements[i]; + delete[] elements; + } + elements = new char*[ntypes]; + for (i = 0; i < ntypes; i++) elements[i] = nullptr; + + nelements = 0; + map[0] = -1; + for (i = 1; i <= narg; i++) { + std::string entry = arg[i-1]; + if (entry == "NULL") { + map[i] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (entry == elements[j]) break; + map[i] = j; + if (j == nelements) { + elements[j] = utils::strdup(entry); + nelements++; + } + } +} diff --git a/src/ML-POD/compute_pod_global.h b/src/ML-POD/compute_pod_global.h new file mode 100644 index 00000000000..001c01eb5ec --- /dev/null +++ b/src/ML-POD/compute_pod_global.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(pod/gdd,ComputePODGlobal); +// clang-format on +#else + +#ifndef LMP_COMPUTE_POD_GLOBAL_H +#define LMP_COMPUTE_POD_GLOBAL_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputePODGlobal : public Compute { + public: + ComputePODGlobal(class LAMMPS *, int, char **); + ~ComputePODGlobal() override; + void init() override; + void init_list(int, class NeighList *) override; + void compute_array() override; + double memory_usage() override; + void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh, + double rcutsq, int i); + void map_element2type(int narg, char **arg, int nelements); + + private: + class NeighList *list; + class EAPOD *podptr; + double **pod; + double cutmax; + int nij; + int nijmax; + + double *tmpmem; // temporary memory + double *rij; // (xj - xi) for all pairs (I, J) + char **elements; + int *map; + int *ai; // IDs of atoms I for all pairs (I, J) + int *aj; // IDs of atoms J for all pairs (I, J) + int *ti; // types of atoms I for all pairs (I, J) + int *tj; // types of atoms J for all pairs (I, J) +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/ML-POD/compute_pod_local.cpp b/src/ML-POD/compute_pod_local.cpp new file mode 100644 index 00000000000..a563448d5e1 --- /dev/null +++ b/src/ML-POD/compute_pod_local.cpp @@ -0,0 +1,276 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_pod_local.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "eapod.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +enum{SCALAR,VECTOR,ARRAY}; + +ComputePODLocal::ComputePODLocal(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr) +{ + array_flag = 1; + extarray = 0; + + int nargmin = 7; + + if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); + if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); + + std::string pod_file = std::string(arg[3]); // pod input file + std::string coeff_file = ""; // coefficient input file + std::string proj_file = std::string(arg[4]); // coefficient input file + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + + int ntypes = atom->ntypes; + memory->create(map, ntypes + 1, "compute_pod_local:map"); + + map_element2type(narg - 6, arg + 6, podptr->nelements); + + int numdesc = podptr->Mdesc * podptr->nClusters; + size_array_rows = 1 + 3*atom->natoms; + size_array_cols = atom->natoms*numdesc; + cutmax = podptr->rcut; + + nijmax = 0; + pod = nullptr; + elements = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +ComputePODLocal::~ComputePODLocal() +{ + memory->destroy(map); + memory->destroy(pod); + delete podptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODLocal::init() +{ + if (force->pair == nullptr) + error->all(FLERR,"Compute pod requires a pair style be defined"); + + if (cutmax > force->pair->cutforce) + error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff"); + + // need an occasional full neighbor list + + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL); + + if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) + error->warning(FLERR,"More than one compute pod"); + + // allocate memory for global array + memory->create(pod,size_array_rows,size_array_cols, + "compute_pod_local:pod"); + array = pod; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODLocal::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODLocal::compute_array() +{ + // int ntotal = atom->nlocal + atom->nghost; + invoked_peratom = update->ntimestep; + + // clear global array + + for (int irow = 0; irow < size_array_rows; irow++) + for (int icoeff = 0; icoeff < size_array_cols; icoeff++) + pod[irow][icoeff] = 0.0; + + // invoke full neighbor list (will copy or build if necessary) + + neighbor->build_one(list); + + double **x = atom->x; + int **firstneigh = list->firstneigh; + int *numneigh = list->numneigh; + int *type = atom->type; + int *ilist = list->ilist; + int inum = list->inum; + int nClusters = podptr->nClusters; + int Mdesc = podptr->Mdesc; + + double rcutsq = podptr->rcut*podptr->rcut; + + for (int ii = 0; ii < inum; ii++) { + int i = ilist[ii]; + int jnum = numneigh[i]; + + // allocate temporary memory + if (nijmax < jnum) { + nijmax = MAX(nijmax, jnum); + podptr->free_temp_memory(); + podptr->allocate_temp_memory(nijmax); + } + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; + ti = &podptr->tmpint[2*nijmax]; + tj = &podptr->tmpint[3*nijmax]; + + // get neighbor list for atom i + lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); + + // peratom base descriptors + double *bd = &podptr->bd[0]; + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + + if (nClusters>1) { + // peratom env descriptors + double *pd = &podptr->pd[0]; + double *pdd = &podptr->pdd[0]; + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + for (int k = 0; k < nClusters; k++) + for (int m = 0; m < Mdesc; m++) { + int imk = m + Mdesc*k + Mdesc*nClusters*i; + pod[0][imk] = pd[k]*bd[m]; + for (int n=0; n 1e-20) { + rij[nij * 3 + 0] = delx; + rij[nij * 3 + 1] = dely; + rij[nij * 3 + 2] = delz; + ai[nij] = atomid[gi]-1; + aj[nij] = atomid[gj]-1; + ti[nij] = itype; + tj[nij] = map[atomtypes[gj]] + 1; + nij++; + } + } +} + +void ComputePODLocal::map_element2type(int narg, char **arg, int nelements) +{ + int i,j; + const int ntypes = atom->ntypes; + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if "NULL" + // nelements = # of unique elements + // elements = list of element names + + if (narg != ntypes) + error->all(FLERR, "Number of element to type mappings does not match number of atom types"); + + if (elements) { + for (i = 0; i < nelements; i++) delete[] elements[i]; + delete[] elements; + } + elements = new char*[ntypes]; + for (i = 0; i < ntypes; i++) elements[i] = nullptr; + + nelements = 0; + map[0] = -1; + for (i = 1; i <= narg; i++) { + std::string entry = arg[i-1]; + if (entry == "NULL") { + map[i] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (entry == elements[j]) break; + map[i] = j; + if (j == nelements) { + elements[j] = utils::strdup(entry); + nelements++; + } + } +} diff --git a/src/ML-POD/compute_pod_local.h b/src/ML-POD/compute_pod_local.h new file mode 100644 index 00000000000..516d666e5ec --- /dev/null +++ b/src/ML-POD/compute_pod_local.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(pod/ldd,ComputePODLocal); +// clang-format on +#else + +#ifndef LMP_COMPUTE_POD_LOCAL_H +#define LMP_COMPUTE_POD_LOCAL_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputePODLocal : public Compute { + public: + ComputePODLocal(class LAMMPS *, int, char **); + ~ComputePODLocal() override; + void init() override; + void init_list(int, class NeighList *) override; + void compute_array() override; + double memory_usage() override; + void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh, + double rcutsq, int i); + void map_element2type(int narg, char **arg, int nelements); + + private: + class NeighList *list; + class EAPOD *podptr; + double **pod; + double cutmax; + int nij; + int nijmax; + + double *tmpmem; // temporary memory + double *rij; // (xj - xi) for all pairs (I, J) + char **elements; + int *map; + int *ai; // IDs of atoms I for all pairs (I, J) + int *aj; // IDs of atoms J for all pairs (I, J) + int *ti; // types of atoms I for all pairs (I, J) + int *tj; // types of atoms J for all pairs (I, J) +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/ML-POD/compute_podd_atom.cpp b/src/ML-POD/compute_podd_atom.cpp new file mode 100644 index 00000000000..9e3f8c45d63 --- /dev/null +++ b/src/ML-POD/compute_podd_atom.cpp @@ -0,0 +1,275 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "compute_podd_atom.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "pair.h" +#include "eapod.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +enum{SCALAR,VECTOR,ARRAY}; + +ComputePODDAtom::ComputePODDAtom(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr) +{ + + int nargmin = 7; + + if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); + if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors"); + + std::string pod_file = std::string(arg[3]); // pod input file + std::string coeff_file = ""; // coefficient input file + std::string proj_file = std::string(arg[4]); // coefficient input file + std::string centroid_file = std::string(arg[5]); // coefficient input file + podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + + int ntypes = atom->ntypes; + memory->create(map, ntypes + 1, "compute_pod_global:map"); + map_element2type(narg - 6, arg + 6, podptr->nelements); + + cutmax = podptr->rcut; + + nmax = 0; + nijmax = 0; + pod = nullptr; + elements = nullptr; + + size_peratom_cols = podptr->Mdesc * podptr->nClusters*3*atom->natoms; + peratom_flag = 1; +} + +/* ---------------------------------------------------------------------- */ + +ComputePODDAtom::~ComputePODDAtom() +{ + memory->destroy(map); + memory->destroy(pod); + delete podptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODDAtom::init() +{ + if (force->pair == nullptr) + error->all(FLERR,"Compute pod requires a pair style be defined"); + + if (cutmax > force->pair->cutforce) + error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff"); + + // need an occasional full neighbor list + + neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL); + + if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0) + error->warning(FLERR,"More than one compute pod"); +} + + +/* ---------------------------------------------------------------------- */ + +void ComputePODDAtom::init_list(int /*id*/, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputePODDAtom::compute_peratom() +{ + invoked_peratom = update->ntimestep; + + // grow pod array if necessary + + if (atom->natoms > nmax) { + memory->destroy(pod); + nmax = atom->natoms; + int numdesc = podptr->Mdesc * podptr->nClusters * 3 * atom->natoms; + memory->create(pod, nmax, numdesc,"sna/atom:sna"); + array_atom = pod; + } + + for (int i = 0; i < atom->natoms; i++) + for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) { + pod[i][icoeff] = 0.0; + } + + // invoke full neighbor list (will copy or build if necessary) + + neighbor->build_one(list); + + double **x = atom->x; + int **firstneigh = list->firstneigh; + int *numneigh = list->numneigh; + int *type = atom->type; + int *ilist = list->ilist; + int inum = list->inum; + int nClusters = podptr->nClusters; + int Mdesc = podptr->Mdesc; + double rcutsq = podptr->rcut*podptr->rcut; + + for (int ii = 0; ii < inum; ii++) { + int i = ilist[ii]; + int jnum = numneigh[i]; + + // allocate temporary memory + if (nijmax < jnum) { + nijmax = MAX(nijmax, jnum); + podptr->free_temp_memory(); + podptr->allocate_temp_memory(nijmax); + } + + rij = &podptr->tmpmem[0]; + tmpmem = &podptr->tmpmem[3*nijmax]; + ai = &podptr->tmpint[0]; + aj = &podptr->tmpint[nijmax]; + ti = &podptr->tmpint[2*nijmax]; + tj = &podptr->tmpint[3*nijmax]; + + // get neighbor list for atom i + lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i); + + // peratom base descriptors + double *bd = &podptr->bd[0]; + double *bdd = &podptr->bdd[0]; + podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij); + + if (nClusters>1) { + // peratom env descriptors + double *pd = &podptr->pd[0]; + double *pdd = &podptr->pdd[0]; + podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, nij); + for (int n=0; n 1e-20) { + rij[nij * 3 + 0] = delx; + rij[nij * 3 + 1] = dely; + rij[nij * 3 + 2] = delz; + ai[nij] = atomid[gi]-1; + aj[nij] = atomid[gj]-1; + ti[nij] = itype; + tj[nij] = map[atomtypes[gj]] + 1; + nij++; + } + } +} + +void ComputePODDAtom::map_element2type(int narg, char **arg, int nelements) +{ + int i,j; + const int ntypes = atom->ntypes; + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if "NULL" + // nelements = # of unique elements + // elements = list of element names + + if (narg != ntypes) + error->all(FLERR, "Number of element to type mappings does not match number of atom types"); + + if (elements) { + for (i = 0; i < nelements; i++) delete[] elements[i]; + delete[] elements; + } + elements = new char*[ntypes]; + for (i = 0; i < ntypes; i++) elements[i] = nullptr; + + nelements = 0; + map[0] = -1; + for (i = 1; i <= narg; i++) { + std::string entry = arg[i-1]; + if (entry == "NULL") { + map[i] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (entry == elements[j]) break; + map[i] = j; + if (j == nelements) { + elements[j] = utils::strdup(entry); + nelements++; + } + } +} diff --git a/src/ML-POD/compute_podd_atom.h b/src/ML-POD/compute_podd_atom.h new file mode 100644 index 00000000000..1339ed9d269 --- /dev/null +++ b/src/ML-POD/compute_podd_atom.h @@ -0,0 +1,61 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS +// clang-format off +ComputeStyle(podd/atom,ComputePODDAtom); +// clang-format on +#else + +#ifndef LMP_COMPUTE_PODD_ATOM_H +#define LMP_COMPUTE_PODD_ATOM_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputePODDAtom : public Compute { + public: + ComputePODDAtom(class LAMMPS *, int, char **); + ~ComputePODDAtom() override; + void init() override; + void init_list(int, class NeighList *) override; + void compute_peratom() override; + double memory_usage() override; + void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh, + double rcutsq, int i); + void map_element2type(int narg, char **arg, int nelements); + + private: + class NeighList *list; + class EAPOD *podptr; + double **pod; + double cutmax; + int nmax; + int nij; + int nijmax; + + double *tmpmem; // temporary memory + double *rij; // (xj - xi) for all pairs (I, J) + char **elements; + int *map; + int *ai; // IDs of atoms I for all pairs (I, J) + int *aj; // IDs of atoms J for all pairs (I, J) + int *ti; // types of atoms I for all pairs (I, J) + int *tj; // types of atoms J for all pairs (I, J) +}; + +} // namespace LAMMPS_NS + +#endif +#endif \ No newline at end of file diff --git a/src/ML-POD/eapod.cpp b/src/ML-POD/eapod.cpp new file mode 100644 index 00000000000..414b9616d31 --- /dev/null +++ b/src/ML-POD/eapod.cpp @@ -0,0 +1,4201 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/ Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Ngoc Cuong Nguyen (MIT) +------------------------------------------------------------------------- */ + +// POD header file +#include "eapod.h" + +// LAMMPS header files + +#include "comm.h" +#include "error.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "tokenizer.h" + +#include + +using namespace LAMMPS_NS; +using MathConst::MY_PI; +using MathSpecial::cube; +using MathSpecial::powint; + +#define MAXLINE 1024 + +// constructor +EAPOD::EAPOD(LAMMPS *_lmp, const std::string &pod_file, const std::string &coeff_file, const std::string &proj_file, const std::string ¢roids_file) : + Pointers(_lmp), elemindex(nullptr), Phi(nullptr), Lambda(nullptr), Proj(nullptr), + Centroids(nullptr), bd(nullptr), bdd(nullptr), pd(nullptr), pdd(nullptr), coeff(nullptr), tmpmem(nullptr), tmpint(nullptr), + pn3(nullptr), pq3(nullptr), pc3(nullptr), pq4(nullptr), pa4(nullptr), pb4(nullptr), pc4(nullptr), + ind23(nullptr), ind32(nullptr), ind33(nullptr), ind34(nullptr), ind43(nullptr), ind44(nullptr) +{ + ind33l = nullptr; + ind33r = nullptr; + ind34l = nullptr; + ind34r = nullptr; + ind44l = nullptr; + ind44r = nullptr; + + rin = 0.5; + rcut = 5.0; + nClusters = 1; + nComponents = 1; + nelements = 1; + onebody = 1; + besseldegree = 4; + inversedegree = 8; + nbesselpars = 3; + true4BodyDesc = 1; + ns = nbesselpars*besseldegree + inversedegree; + Njmax = 100; + nrbf2 = 6; + nrbf3 = 5; + nrbf4 = 4; + nabf3 = 5; + nabf4 = 4; + nrbf23 = 0; + nabf23 = 0; + nrbf33 = 0; + nabf33 = 0; + nrbf34 = 0; + nabf34 = 0; + nabf43 = 0; + nrbf44 = 0; + nabf44 = 0; + P3 = 4; + P4 = 3; + P23 = 0; + P33 = 0; + P34 = 0; + P44 = 0; + pdegree[0] = besseldegree; + pdegree[1] = inversedegree; + pbc[0] = 1; + pbc[1] = 1; + pbc[2] = 1; + besselparams[0] = 1e-3; + besselparams[1] = 2.0; + besselparams[2] = 4.0; + + // read pod input file to podstruct + read_pod_file(pod_file); + + // read pod coefficient file to podstruct + if (coeff_file != "") { + ncoeff = read_coeff_file(coeff_file); + if (ncoeff != nCoeffAll) + error->all(FLERR,"number of coefficients in the coefficient file is not correct"); + } + if (nClusters > 1) { + // read projection matrix file to podstruct + if (proj_file != "") { + nproj = read_projection_matrix(proj_file); + if (nproj != nComponents*Mdesc*nelements) + error->all(FLERR,"number of coefficients in the projection file is not correct"); + } + + // read centroids file to podstruct + if (centroids_file != "") { + ncentroids = read_centroids(centroids_file); + if (ncentroids != nComponents*nClusters*nelements) + error->all(FLERR,"number of coefficients in the projection file is not correct"); + } + } +} + +// destructor +EAPOD::~EAPOD() +{ + memory->destroy(elemindex); + memory->destroy(Phi); + memory->destroy(Lambda); + memory->destroy(Proj); + memory->destroy(Centroids); + memory->destroy(bd); + memory->destroy(bdd); + memory->destroy(pd); + memory->destroy(pdd); + memory->destroy(coeff); + memory->destroy(tmpmem); + memory->destroy(tmpint); + memory->destroy(pn3); + memory->destroy(pq3); + memory->destroy(pc3); + memory->destroy(pa4); + memory->destroy(pb4); + memory->destroy(pc4); + memory->destroy(pq4); + memory->destroy(ind23); + memory->destroy(ind32); + memory->destroy(ind33); + memory->destroy(ind34); + memory->destroy(ind43); + memory->destroy(ind44); + memory->destroy(ind33l); + memory->destroy(ind34l); + memory->destroy(ind44l); + memory->destroy(ind33r); + memory->destroy(ind34r); + memory->destroy(ind44r); +} + +void EAPOD::read_pod_file(std::string pod_file) +{ + std::string podfilename = pod_file; + FILE *fppod; + if (comm->me == 0) { + + fppod = utils::open_potential(podfilename,lmp,nullptr); + if (fppod == nullptr) + error->one(FLERR,"Cannot open POD coefficient file {}: ", + podfilename, utils::getsyserror()); + } + + // loop through lines of POD file and parse keywords + + char line[MAXLINE],*ptr; + int eof = 0; + + while (true) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fppod); + if (ptr == nullptr) { + eof = 1; + fclose(fppod); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + // words = ptrs to all words in line + // strip single and double quotes from words + + std::vector words; + try { + words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + } catch (TokenizerException &) { + // ignore + } + + if (words.size() == 0) continue; + + auto keywd = words[0]; + + if (keywd == "species") { + nelements = words.size()-1; + for (int ielem = 1; ielem <= nelements; ielem++) { + species.push_back(words[ielem]); + } + } + + if (keywd == "pbc") { + if (words.size() != 4) + error->one(FLERR,"Improper POD file.", utils::getsyserror()); + pbc[0] = utils::inumeric(FLERR,words[1],false,lmp); + pbc[1] = utils::inumeric(FLERR,words[2],false,lmp); + pbc[2] = utils::inumeric(FLERR,words[3],false,lmp); + } + + if ((keywd != "#") && (keywd != "species") && (keywd != "pbc")) { + + if (words.size() != 2) + error->one(FLERR,"Improper POD file.", utils::getsyserror()); + + if (keywd == "rin") rin = utils::numeric(FLERR,words[1],false,lmp); + if (keywd == "rcut") rcut = utils::numeric(FLERR,words[1],false,lmp); + if (keywd == "number_of_enviroment_clusters") + nClusters = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "number_of_principal_components") + nComponents = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "bessel_polynomial_degree") + besseldegree = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "inverse_polynomial_degree") + inversedegree = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "onebody") onebody = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "twobody_number_radial_basis_functions") + nrbf2 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "threebody_number_radial_basis_functions") + nrbf3 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "threebody_angular_degree") + P3 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "fourbody_number_radial_basis_functions") + nrbf4 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "fourbody_angular_degree") + P4 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "true4BodyDesc") + true4BodyDesc = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "fivebody_number_radial_basis_functions") + nrbf33 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "fivebody_angular_degree") + P33 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "sixbody_number_radial_basis_functions") + nrbf34 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "sixbody_angular_degree") + P34 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "sevenbody_number_radial_basis_functions") + nrbf44 = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "sevenbody_angular_degree") + P44 = utils::inumeric(FLERR,words[1],false,lmp); + } + } + // if (nrbf2 < nrbf3) error->all(FLERR,"number of three-body radial basis functions must be equal or less than number of two-body radial basis functions"); + if (nrbf3 < nrbf4) error->all(FLERR,"number of four-body radial basis functions must be equal or less than number of three-body radial basis functions"); + if (nrbf4 < nrbf33) error->all(FLERR,"number of five-body radial basis functions must be equal or less than number of four-body radial basis functions"); + if (nrbf4 < nrbf34) error->all(FLERR,"number of six-body radial basis functions must be equal or less than number of four-body radial basis functions"); + if (nrbf4 < nrbf44) error->all(FLERR,"number of seven-body radial basis functions must be equal or less than number of four-body radial basis functions"); + nrbfmax = (nrbf2 < nrbf3) ? nrbf3 : nrbf2; + nrbfmax = (nrbfmax < nrbf4) ? nrbf4 : nrbfmax; + nrbfmax = (nrbfmax < nrbf33) ? nrbf33 : nrbfmax; + nrbfmax = (nrbfmax < nrbf34) ? nrbf34 : nrbfmax; + nrbfmax = (nrbfmax < nrbf44) ? nrbf44 : nrbfmax; + + if (P3 < P4) error->all(FLERR,"four-body angular degree must be equal or less than three-body angular degree"); + if (P4 < P33) error->all(FLERR,"five-body angular degree must be equal or less than four-body angular degree"); + if (P4 < P34) error->all(FLERR,"six-body angular degree must be equal or less than four-body angular degree"); + if (P4 < P44) error->all(FLERR,"seven-body angular degree must be equal or less than four-body angular degree"); + + if (P3 > 12) error->all(FLERR,"three-body angular degree must be equal or less than 12"); + //if (P34 > 6) error->all(FLERR,"six-body angular degree must be equal or less than 6"); + //if (P44 > 6) error->all(FLERR,"seven-body angular degree must be equal or less than 6"); + + // four-body potential + if ((nrbf4 > 0) && (nrbf33 == 0)) { + if (P4 > 6) { + nrbf23 = nrbf4; + P23 = P4; + nrbf4 = 0; + P4 = 0; + } + else { + if (true4BodyDesc == 1) { + nrbf23 = 0; + P23 = 0; + } + else { + nrbf23 = nrbf4; + P23 = P4; + nrbf4 = 0; + P4 = 0; + } + } + } + + // five-body potential + if ((nrbf33 > 0) && (nrbf34 == 0)) { + if (true4BodyDesc == 1) { + nrbf23 = 0; + P23 = 0; + } + else { + nrbf23 = nrbf4; + P23 = P4; + nrbf4 = 0; + P4 = 0; + } + } + + // six-body potential or seven-body potential + if (nrbf34 > 0) { + if (true4BodyDesc == 1) { + nrbf23 = 0; + P23 = 0; + } + else { + nrbf23 = nrbf4; + P23 = P4; + nrbf4 = nrbf34; + P4 = P34; + } + } + + int Ne = nelements; + + memory->create(elemindex, Ne*Ne, "elemindex"); + int k = 0; + for (int i1 = 0; i1create(ind23, n23, "ind23"); + memory->create(ind32, n32, "ind32"); + memory->create(ind33, n33, "ind33"); + memory->create(ind34, n34, "ind34"); + memory->create(ind43, n43, "ind43"); + memory->create(ind44, n44, "ind44"); + + indexmap3(ind23, 1, nrbf23, Ne, 1, nrbf2); + indexmap3(ind32, nabf23, nrbf23, Ne*(Ne+1)/2, nabf3, nrbf3); + indexmap3(ind33, nabf33, nrbf33, Ne*(Ne+1)/2, nabf3, nrbf3); + indexmap3(ind34, nabf34, nrbf34, Ne*(Ne+1)/2, nabf3, nrbf3); + indexmap3(ind43, nabf43, nrbf34, Ne*(Ne+1)*(Ne+2)/6, nabf4, nrbf4); + indexmap3(ind44, nabf44, nrbf44, Ne*(Ne+1)*(Ne+2)/6, nabf4, nrbf4); + + nld33 = 0; + nld34 = 0; + nld44 = 0; + int nebf3 = Ne*(Ne+1)/2; + int nebf4 = Ne*(Ne+1)*(Ne+2)/6; + int dabf3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int dabf4[] = {0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6}; + if (nrbf33>0) { + nld33 = crossindices(dabf3, nabf3, nrbf3, nebf3, dabf3, nabf3, nrbf3, nebf3, P33, nrbf33); + memory->create(ind33l, nld33, "ind33l"); + memory->create(ind33r, nld33, "ind33r"); + crossindices(ind33l, ind33r, dabf3, nabf3, nrbf3, nebf3, dabf3, nabf3, nrbf3, nebf3, P33, nrbf33); + } + if (nrbf34>0) { + nld34 = crossindices(dabf3, nabf3, nrbf3, nebf3, dabf4, nabf4, nrbf4, nebf4, P34, nrbf34); + memory->create(ind34l, nld34, "ind34l"); + memory->create(ind34r, nld34, "ind34r"); + crossindices(ind34l, ind34r, dabf3, nabf3, nrbf3, nebf3, dabf4, nabf4, nrbf4, nebf4, P34, nrbf34); + } + if (nrbf44>0) { + nld44 = crossindices(dabf4, nabf4, nrbf4, nebf4, dabf4, nabf4, nrbf4, nebf4, P44, nrbf44); + memory->create(ind44l, nld44, "ind44l"); + memory->create(ind44r, nld44, "ind44r"); + crossindices(ind44l, ind44r, dabf4, nabf4, nrbf4, nebf4, dabf4, nabf4, nrbf4, nebf4, P44, nrbf44); + } + ngd33 = nld33*Ne; + ngd34 = nld34*Ne; + ngd44 = nld44*Ne; + nl33 = nld33; + nl34 = nld34; + nl44 = nld44; + nd33 = ngd33; + nd34 = ngd34; + nd44 = ngd44; + + Mdesc = nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44; + nl = nl1 + nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44; + nd = nd1 + nd2 + nd3 + nd4 + nd23 + nd33 + nd34 + nd44; + nCoeffPerElement = nl1 + Mdesc*nClusters; + nCoeffAll = nCoeffPerElement*nelements; + + allocate_temp_memory(Njmax); + + if (comm->me == 0) { + utils::logmesg(lmp, "**************** Begin of POD Potentials ****************\n"); + utils::logmesg(lmp, "species: "); + for (int i=0; ime == 0) { + + fpcoeff = utils::open_potential(coefffilename,lmp,nullptr); + if (fpcoeff == nullptr) + error->one(FLERR,"Cannot open POD coefficient file {}: ", + coefffilename, utils::getsyserror()); + } + + // check format for first line of file + + char line[MAXLINE],*ptr; + int eof = 0; + int nwords = 0; + while (nwords == 0) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcoeff); + if (ptr == nullptr) { + eof = 1; + fclose(fpcoeff); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + nwords = utils::count_words(utils::trim_comment(line)); + } + + if (nwords != 2) + error->all(FLERR,"Incorrect format in POD coefficient file"); + + // strip single and double quotes from words + + int ncoeffall; + std::string tmp_str; + try { + ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f"); + tmp_str = words.next_string(); + ncoeffall = words.next_int(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what()); + } + + // loop over single block of coefficients and insert values in coeff + + memory->create(coeff, ncoeffall, "pod:pod_coeff"); + + for (int icoeff = 0; icoeff < ncoeffall; icoeff++) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcoeff); + if (ptr == nullptr) { + eof = 1; + fclose(fpcoeff); + } + } + + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) + error->all(FLERR,"Incorrect format in POD coefficient file"); + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + try { + ValueTokenizer cff(utils::trim_comment(line)); + if (cff.count() != 1) + error->all(FLERR,"Incorrect format in POD coefficient file"); + + coeff[icoeff] = cff.next_double(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what()); + } + } + if (comm->me == 0) { + if (!eof) fclose(fpcoeff); + } + + if (comm->me == 0) { + utils::logmesg(lmp, "**************** Begin of POD Coefficients ****************\n"); + utils::logmesg(lmp, "total number of coefficients for POD potential: {}\n", ncoeffall); + utils::logmesg(lmp, "**************** End of POD Potentials ****************\n\n"); + } + + return ncoeffall; +} + +//funcion to read the projection matrix from file. +int EAPOD::read_projection_matrix(std::string proj_file) +{ + std::string projfilename = proj_file; + FILE *fpproj; + if (comm->me == 0) { + + fpproj = utils::open_potential(projfilename,lmp,nullptr); + if (fpproj == nullptr) + error->one(FLERR,"Cannot open PCA projection matrix file {}: ", + projfilename, utils::getsyserror()); + } + + // check format for first line of file + + char line[MAXLINE],*ptr; + int eof = 0; + int nwords = 0; + while (nwords == 0) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpproj); + if (ptr == nullptr) { + eof = 1; + fclose(fpproj); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + nwords = utils::count_words(utils::trim_comment(line)); + } + + if (nwords != 2) + error->all(FLERR,"Incorrect format in PCA projection matrix file"); + + // strip single and double quotes from words + + int nprojall; + std::string tmp_str; + try { + ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f"); + tmp_str = words.next_string(); + nprojall = words.next_int(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in PCA projection matrix file: {}", e.what()); + } + + // loop over single block of coefficients and insert values in coeff + + memory->create(Proj, nprojall, "pod:pca_proj"); + + for (int iproj = 0; iproj < nprojall; iproj++) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpproj); + if (ptr == nullptr) { + eof = 1; + fclose(fpproj); + } + } + + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) + error->all(FLERR,"Incorrect format in PCA projection matrix file"); + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + try { + ValueTokenizer cff(utils::trim_comment(line)); + if (cff.count() != 1) + error->all(FLERR,"Incorrect format in PCA projection matrix file"); + + Proj[iproj] = cff.next_double(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in PCA projection matrix file: {}", e.what()); + } + } + if (comm->me == 0) { + if (!eof) fclose(fpproj); + } + + if (comm->me == 0) { + utils::logmesg(lmp, "**************** Begin of PCA projection matrix ****************\n"); + utils::logmesg(lmp, "total number of elements for PCA projection matrix: {}\n", nprojall); + utils::logmesg(lmp, "**************** End of PCA projection matrix ****************\n\n"); + } + + return nprojall; +} + +// read Centroids from file +int EAPOD::read_centroids(std::string centroids_file) +{ + std::string centfilename = centroids_file; + FILE *fpcent; + if (comm->me == 0) { + + fpcent = utils::open_potential(centfilename,lmp,nullptr); + if (fpcent == nullptr) + error->one(FLERR,"Cannot open PCA centroids file {}: ", + centfilename, utils::getsyserror()); + } + + // check format for first line of file + + char line[MAXLINE],*ptr; + int eof = 0; + int nwords = 0; + while (nwords == 0) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcent); + if (ptr == nullptr) { + eof = 1; + fclose(fpcent); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + nwords = utils::count_words(utils::trim_comment(line)); + } + + if (nwords != 2) + error->all(FLERR,"Incorrect format in PCA centroids file"); + + // strip single and double quotes from words + + int ncentall; + std::string tmp_str; + try { + ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f"); + tmp_str = words.next_string(); + ncentall = words.next_int(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in PCA centroids file: {}", e.what()); + } + + // loop over single block of coefficients and insert values in coeff + + memory->create(Centroids, ncentall, "pod:pca_cent"); + + for (int icent = 0; icent < ncentall; icent++) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcent); + if (ptr == nullptr) { + eof = 1; + fclose(fpcent); + } + } + + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) + error->all(FLERR,"Incorrect format in PCA centroids file"); + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + + try { + ValueTokenizer cff(utils::trim_comment(line)); + if (cff.count() != 1) + error->all(FLERR,"Incorrect format in PCA centroids file"); + + Centroids[icent] = cff.next_double(); + } catch (TokenizerException &e) { + error->all(FLERR,"Incorrect format in PCA centroids file: {}", e.what()); + } + } + if (comm->me == 0) { + if (!eof) fclose(fpcent); + } + + if (comm->me == 0) { + utils::logmesg(lmp, "**************** Begin of PCA centroids ****************\n"); + utils::logmesg(lmp, "total number of elements for PCA centroids: {}\n", ncentall); + utils::logmesg(lmp, "**************** End of PCA centroids ****************\n\n"); + } + + return ncentall; +} + + +void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, double *temp, + int *ti, int *tj, int Nj) +{ + for (int i=0; i0) && (Nj>0)) { + twobodydescderiv(d2, dd2, rbf, rbfx, rbfy, rbfz, tj, Nj); + } + + if ((nl3 > 0) && (Nj>1)) { + double *abf = &temp[4*n1 + n5 + 4*n2]; // Nj*K3 + double *abfx = &temp[4*n1 + n5 + 4*n2 + n4]; // Nj*K3 + double *abfy = &temp[4*n1 + n5 + 4*n2 + 2*n4]; // Nj*K3 + double *abfz = &temp[4*n1 + n5 + 4*n2 + 3*n4]; // Nj*K3 + double *tm = &temp[4*n1 + n5 + 4*n2 + 4*n4]; // 4*K3 + + angularbasis(abf, abfx, abfy, abfz, rij, tm, pq3, Nj, K3); + + radialangularbasis(sumU, U, Ux, Uy, Uz, rbf, rbfx, rbfy, rbfz, + abf, abfx, abfy, abfz, tm, tj, Nj, K3, nrbf3, nelements); + + threebodydesc(d3, sumU, Nj); + threebodydescderiv(dd3, sumU, Ux, Uy, Uz, tj, Nj); + + if ((nl23>0) && (Nj>2)) { + fourbodydesc23(d23, d2, d3); + fourbodydescderiv23(dd23, d2, d3, dd2, dd3, 3*Nj); + } + + if ((nl33>0) && (Nj>3)) { + crossdesc(d33, d3, d3, ind33l, ind33r, nl33); + crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, nl33, 3*Nj); + } + + if ((nl4 > 0) && (Nj>2)) { + if (K4 < K3) { + for (int m=0; m0) && (Nj>4)) { + crossdesc(d34, d3, d4, ind34l, ind34r, nl34); + crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, nl34, 3*Nj); + } + + if ((nl44>0) && (Nj>5)) { + crossdesc(d44, d4, d4, ind44l, ind44r, nl44); + crossdescderiv(dd44, d4, d4, dd4, dd4, ind44l, ind44r, nl44, 3*Nj); + } + } + } +} + +double EAPOD::peratomenergyforce(double *fij, double *rij, double *temp, + int *ti, int *tj, int Nj) +{ + int N = 3*Nj; + for (int n=0; n 1) { // multi-environment descriptors + // calculate multi-environment descriptors and their derivatives with respect to atom coordinates + peratomenvironment_descriptors(pd, pdd, bd, bdd, temp, ti[0] - 1, Nj); + + for (int j = 0; jNjmax) { + Njmax = Nj; + free_temp_memory(); + allocate_temp_memory(Njmax); + } + + double *rij = &tmpmem[0]; // 3*Nj + double *fij = &tmpmem[3*Nj]; // 3*Nj + int *ai = &tmpint[0]; // Nj + int *aj = &tmpint[Nj]; // Nj + int *ti = &tmpint[2*Nj]; // Nj + int *tj = &tmpint[3*Nj]; // Nj + + myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i); + + etot += peratomenergyforce(fij, rij, &tmpmem[6*Nj], ti, tj, Nj); + + tallyforce(force, fij, ai, aj, Nj); + } + } + + return etot; +} + +void EAPOD::base_descriptors(double *basedesc, double *x, + int *atomtype, int *alist, int *jlist, int *pairnumsum, int natom) +{ + for (int i=0; i0) { + // reallocate temporary memory + if (Nj>Njmax) { + Njmax = Nj; + free_temp_memory(); + allocate_temp_memory(Njmax); + printf("reallocate temporary memory with Njmax = %d ...\n", Njmax); + } + + double *rij = &tmpmem[0]; // 3*Nj + int *ai = &tmpint[0]; // Nj + int *aj = &tmpint[Nj]; // Nj + int *ti = &tmpint[2*Nj]; // Nj + int *tj = &tmpint[3*Nj]; // Nj + + myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i); + + // many-body descriptors + peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj); + + for (int m=0; m0) { + gd[nCoeffPerElement*(atomtype[i]-1)] += 1.0; + } + + if (Nj>0) { + // reallocate temporary memory + if (Nj>Njmax) { + Njmax = Nj; + free_temp_memory(); + allocate_temp_memory(Njmax); + printf("reallocate temporary memory with Njmax = %d ...\n", Njmax); + } + + double *rij = &tmpmem[0]; // 3*Nj + int *ai = &tmpint[0]; // Nj + int *aj = &tmpint[Nj]; // Nj + int *ti = &tmpint[2*Nj]; // Nj + int *tj = &tmpint[3*Nj]; // Nj + + myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i); + + // many-body descriptors + peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj); + + for (int m=0; m0) { + gd[nCoeffPerElement*(atomtype[i]-1)] += 1.0; + } + + if (Nj>0) { + // reallocate temporary memory + if (Nj>Njmax) { + Njmax = Nj; + free_temp_memory(); + allocate_temp_memory(Njmax); + printf("reallocate temporary memory with Njmax = %d ...\n", Njmax); + } + + double *rij = &tmpmem[0]; // 3*Nj + int *ai = &tmpint[0]; // Nj + int *aj = &tmpint[Nj]; // Nj + int *ti = &tmpint[2*Nj]; // Nj + int *tj = &tmpint[3*Nj]; // Nj + + myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i); + + // many-body descriptors + peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj); + + // calculate multi-environment descriptors and their derivatives with respect to atom coordinates + peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1, Nj); + + for (int j = 0; j < nClusters; j++) { + probdesc[i + natom*(j)] = pd[j]; + for (int m=0; mcreate(coeff, nc, "coeff"); + + // Copy the coefficients + for (int n=0; n= max(1,3*N-1) + int info = 1; // = 0: successful exit + double work[ns*ns]; + char chv = 'V'; + char chu = 'U'; + DSYEV(&chv, &chu, &ns, A, &ns, b, work, &lwork, &info); + + // Order eigenvalues and eigenvectors from largest to smallest + for (int j=0; jcreate(Phi, ns * ns, "Phi"); + memory->create(Lambda, ns, "Lambda"); + + // Perform eigenvalue decomposition of the snapshots matrix S and store the eigenvectors and eigenvalues + eigenvaluedecomposition(Phi, Lambda, 2000); +} + +/** + * @brief Initialize arrays for the three-body descriptors. + * + * @param Pa3 The degree of the angular basis functions of the three-body descriptors. + */ +void EAPOD::init3body(int Pa3) +{ + // Define the number of monomials for each degree + int npa[] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455}; + + // Set the number of coefficients, the number of basis functions, and the degree of the Bessel function + nabf3 = Pa3+1; // Number of angular basis functions + K3 = npa[nabf3]; // number of monimials + P3 = nabf3-1; // the degree of angular basis functions of the three-body descriptors + + // Allocate memory for the coefficients, the basis functions, and the cutoff function + memory->create(pn3, nabf3+1, "pn3"); // array stores the number of monomials for each degree + memory->create(pq3, K3*2, "pq3"); // array needed for the recursive computation of the angular basis functions + memory->create(pc3, K3, "pc3"); // array needed for the computation of the three-body descriptors + + // Initialize the arrays + init3bodyarray(pn3, pq3, pc3, nabf3-1); +} + +/** + * @brief Initialize arrays for the four-body descriptors. + * + * @param Pa4 The degree of the angular basis functions of the four-body descriptors. + */ +void EAPOD::init4body(int Pa4) +{ + // Define the number of monomials for each degree + int npa[] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455}; + + // Define the number of angular basis functions for each degree + int nb[] = {1, 2, 4, 7, 11, 16, 23}; + + // Define the number of terms needed to compute angular basis functions + int ns[] = {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345}; + + // Set the degree of the angular basis functions of the four-body descriptors + P4 = Pa4; + + // Set the number of monomials for the angular basis functions of the four-body descriptors + K4 = npa[Pa4+1]; + + // Allocate memory for the output arrays + int *pn4, *tm4; + memory->create(pn4, Pa4+2, "pn4"); // array stores the number of monomials for each degree + memory->create(pq4, K4*2, "pq4"); // array needed for the recursive computation of the angular basis functions + memory->create(tm4, K4, "tm4"); + + // Initialize the arrays + init3bodyarray(pn4, pq4, tm4, Pa4); + + // Set the number of angular basis functions for the four-body descriptors + nabf4 = nb[Pa4]; + + // the size of the array pc4 + Q4 = ns[nabf4]; + + // Allocate memory for the coefficients, the basis functions, and the cutoff function + memory->create(pa4, nabf4+1, "pa4"); // this array is a subset of the array ns + memory->create(pb4, Q4*3, "pb4"); // array stores the indices of the monomials needed for the computation of the angular basis functions + memory->create(pc4, Q4, "pc4"); // array of monomial coefficients needed for the computation of the four-body descriptors + + // Initialize the arrays + init4bodyarray(pa4, pb4, pc4, Pa4); + + // Deallocate memory + memory->destroy(pn4); + memory->destroy(tm4); +} + + +/** + * @brief Estimate the amount of memory needed for the computation. + * + * @param Nj Number of neighboring atoms. + * @return int The estimated amount of memory needed. + */ +int EAPOD::estimate_temp_memory(int Nj) +{ + // Determine the maximum number of radial basis functions and angular basis functions + int Kmax = (K3 > K4) ? K3 : K4; + int nrbf34 = (nrbf3 > nrbf4) ? nrbf3 : nrbf4; + int nrbfmax = (nrbf2 > nrbf34) ? nrbf2 : nrbf34; + int Knrbf34 = (K3*nrbf3 > K4*nrbf4) ? K3*nrbf3 : K4*nrbf4; + + // Determine the maximum number of local descriptors + int nld = (nl23 > nl33) ? nl23 : nl33; + nld = (nld > nl34) ? nld : nl34; + nld = (nld > nl44) ? nld : nl44; + + // rij, fij, and d2, dd2, d3, dd3, d4, dd4 + int nmax1 = 6*Nj + nl2 + 3*Nj*nl2 + nl3 + 3*Nj*nl3 + nl4 + 3*Nj*nl4 + nld + 3*Nj*nld; + + // U, Ux, Uy, Uz + int nmax2 = 4*Nj*Knrbf34; + + // sumU and cU + int nmax3 = 2*nelements*Knrbf34; + + // rbf, rbfx, rbfy, rbfz + int nmax4 = 4*Nj*nrbfmax; + + // rbft, rbfxt, rbfyt, rbfzt + int nmax5 = 4*Nj*ns; + + // abf, abfx, abfy, abfz + int nmax6 = 4*(Nj+1)*Kmax; + + // Determine the maximum amount of memory needed for U, Ux, Uy, Uz, sumU, cU, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz + int nmax7 = (nmax5 > nmax6) ? nmax5 : nmax6; + int nmax8 = nmax2 + nmax3 + nmax4 + nmax7; + + // Determine the total amount of memory needed for all double memory + ndblmem = (nmax1 + nmax8); + + int nmax9 = 6*Nj + nComponents + nClusters + nClusters*nComponents + 2*nClusters*Mdesc + nClusters*nClusters; + if (ndblmem < nmax9) ndblmem = nmax9; + + // Determine the total amount of memory needed for all integer memory + nintmem = 4*Nj; + + // Return the estimated amount of memory needed + return ndblmem; +} + +void EAPOD::allocate_temp_memory(int Nj) +{ + estimate_temp_memory(Nj); + memory->create(tmpmem, ndblmem, "tmpmem"); + memory->create(tmpint, nintmem, "tmpint"); + memory->create(bd, Mdesc, "bdd"); + memory->create(bdd, 3*Nj*Mdesc, "bdd"); + memory->create(pd, nClusters, "bdd"); + memory->create(pdd, 3*Nj*nClusters, "bdd"); +} + +void EAPOD::free_temp_memory() +{ + memory->destroy(tmpmem); + memory->destroy(tmpint); + memory->destroy(bd); + memory->destroy(bdd); + memory->destroy(pd); + memory->destroy(pdd); +} + +/** + * @brief Map a 3D index to a 1D index. + * + * @param indx The 1D index array. + * @param n1 The size of the first dimension. + * @param n2 The size of the second dimension. + * @param n3 The size of the third dimension. + * @param N1 The stride of the first dimension. + * @param N2 The stride of the second dimension. + * @return int The total number of elements in the 1D index array. + */ +int EAPOD::indexmap3(int *indx, int n1, int n2, int n3, int N1, int N2) +{ + int k = 0; + for (int i3=0; i3= m1) && (i2 >= i1) && (a1 + a2 <= dabf12) && (j1+j2 < nrbf12)) { + n += 1; + } + } + } + + return n; +} + +/** + * @brief Calculate the number of cross descriptors between two sets of descriptors and store the indices in two arrays. + * + * @param ind1 Pointer to the array of indices of the first set of descriptors. + * @param ind2 Pointer to the array of indices of the second set of descriptors. + * @param dabf1 Pointer to the array of degrees of angular basis functions of the first set of descriptors. + * @param nabf1 Number of angular basis functions in the first set of descriptors. + * @param nrbf1 Number of radial basis functions in the first set of descriptors. + * @param nebf1 Number of element interactions in the first set of descriptors. + * @param dabf2 Pointer to the array of degrees of angular basis functions of the second set of descriptors. + * @param nabf2 Number of angular basis functions in the second set of descriptors. + * @param nrbf2 Number of radial basis functions in the second set of descriptors. + * @param nebf2 Number of element interactions in the second set descriptors. + * @param dabf12 degree of angular basis functions for the cross descriptors + * @param nrbf12 number of radial basis functions for the cross descriptors + * @return int The number of cross descriptors between two sets of descriptors. + */ +int EAPOD::crossindices(int *ind1, int *ind2, int *dabf1, int nabf1, int nrbf1, int nebf1, + int *dabf2, int nabf2, int nrbf2, int nebf2, int dabf12, int nrbf12) +{ + int n = 0; + + // Loop over the first set of descriptors + for (int i1=0; i1= m1) && (i2 >= i1) && (a1 + a2 <= dabf12) && (j1+j2 < nrbf12)) { + ind1[n] = n1; + ind2[n] = n2; + n += 1; + } + } + } + + return n; +} + +void EAPOD::print_matrix(const char* desc, int m, int n, int* a, int lda ) +{ + int i, j; + printf( "\n %s\n", desc ); + + for( i = 0; i < m; i++ ) + { + for( j = 0; j < n; j++ ) printf( " %d", a[i+j*lda] ); + printf( "\n" ); + } +} + +void EAPOD::print_matrix(const char* desc, int m, int n, double* a, int lda ) +{ + int i, j; + printf( "\n %s\n", desc ); + + for( i = 0; i < m; i++ ) + { + for( j = 0; j < n; j++ ) printf( " %6.12f", a[i+j*lda] ); + printf( "\n" ); + } +} + +void EAPOD::scalarproduct(double *d, double c, int N) +{ + for (int n=0; n species; + + double rin; + double rcut; + int true4BodyDesc; + + + int nelements; // number of elements + int pbc[3]; + int *elemindex ; + + int onebody; // one-body descriptors + int besseldegree; + int inversedegree; + int pdegree[2]; + int nbesselpars; + int timing; + double comptime[20]; + double besselparams[3]; + double *Phi ; // eigenvectors + double *Lambda ; // eigenvalues + double *coeff; // coefficients + //double *newcoeff ; // coefficients + double *tmpmem; + + // environmental variables + int nClusters; // number of environment clusters + int nComponents; // number of principal components + //int nNeighbors; // numbe of neighbors + int Mdesc; // number of base descriptors + + double *Proj; // PCA Projection matrix + double *Centroids; // centroids of the clusters + double *bd; // base descriptors + double *bdd; // derivatives of the base descriptors with respect to the atomic positions + double *pd; // multi-environment descriptors + double *pdd; // derivative of the multi-environment descriptors with respect to the atomic positions + + int nproj; // number of elements in projection matrix (nComponents * Mdesc * nelements) + int ncentroids; // number of centroids (nComponents * nClusters * nelements) + + int Njmax; + int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) + int nCoeffAll; // number of coefficients for all elements = (nl1 + Mdesc*nClusters)*nelements + int ncoeff; // number of coefficients in the input file + int ns; // number of snapshots for radial basis functions + int nd1, nd2, nd3, nd4, nd5, nd6, nd7, nd; // number of global descriptors + int nl1, nl2, nl3, nl4, nl5, nl6, nl7, nl; // number of local descriptors + int nrbf2, nrbf3, nrbf4, nrbfmax; // number of radial basis functions + int nabf3, nabf4; // number of angular basis functions + int P3, P4; // angular polynomial degrees + int K3, K4, Q4; // number of monomials + int *pn3, *pq3, *pc3; // arrays to compute 3-body angular basis functions + int *pq4, *pa4, *pb4, *pc4; // arrays to compute 3-body angular basis functions + int *tmpint; + int nintmem; // number of integers in tmpint array + int ndblmem; // number of doubles in tmpmem array + + // four-body descriptors + int *ind23, *ind32, nrbf23, nabf23, P23, n23, n32, nl23, nd23; + + // five-body descriptors + int *ind33, nrbf33, nabf33, P33, n33, nl33, nd33; + + // six-body descriptors + int *ind34, *ind43, nrbf34, nabf34, nabf43, P34, n34, n43, nl34, nd34; + + // seven-body descriptors + int *ind44, nrbf44, nabf44, P44, n44, nl44, nd44; + + int nld33, nld34, nld44, ngd33, ngd34, ngd44; + int *ind33l, *ind33r, *ind34l, *ind34r, *ind44l, *ind44r; + + EAPOD(LAMMPS *, const std::string &pod_file, const std::string &coeff_file, const std::string &proj_file, const std::string ¢roids_file); + + EAPOD(LAMMPS *lmp) : Pointers(lmp){}; + ~EAPOD() override; + + void print_matrix(const char* desc, int m, int n, int* a, int lda ); + void print_matrix(const char* desc, int m, int n, double* a, int lda ); + + void read_pod_file(std::string pod_file); + int read_coeff_file(std::string coeff_file); + int read_projection_matrix(std::string proj_file); + int read_centroids(std::string centroids_file); + + int estimate_temp_memory(int Nj); + void free_temp_memory(); + void allocate_temp_memory(int Nj); + + //void mknewcoeff(); + + void mknewcoeff(double *c, int nc); + + void twobodydescderiv(double *d2, double *dd2, double *rbf, double *rbfx, + double *rbfy, double *rbfz, int *tj, int N); + + void threebodydesc(double *d3, double *sumU, int N); + + void threebodydescderiv(double *dd3, double *sumU, double *Ux, double *Uy, double *Uz, + int *atomtype, int N); + + void fourbodydescderiv(double *d4, double *dd4, double *sumU, double *Ux, double *Uy, double *Uz, + int *atomtype, int N); + + void descriptors(double *gd, double *gdd, double *basedesc, double *probdesc, double *x, int *atomtype, int *alist, + int *jlist, int *pairnumsum, int natom); + + void descriptors(double *gd, double *gdd, double *basedesc, double *x, int *atomtype, int *alist, + int *jlist, int *pairnumsum, int natom); + + void peratombase_descriptors(double *bd, double *bdd, double *rij, double *temp, + int *ti, int *tj, int Nj); + + void peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors); + + void base_descriptors(double *basedesc, double *x, int *atomtype, int *alist, + int *jlist, int *pairnumsum, int natom); + + void descriptors(double *basedesc, double *probdesc, double *x, int *atomtype, int *alist, + int *jlist, int *pairnumsum, int natom); + + double peratomenergyforce(double *fij, double *rij, double *temp, int *ti, int *tj, int Nj); + + double energyforce(double *force, double *x, int *atomtype, int *alist, + int *jlist, int *pairnumsum, int natom); + + void tallyforce(double *force, double *fij, int *ai, int *aj, int N); + + void fourbodydesc23(double* d23, double* d2, double *d3); + void fourbodydescderiv23(double* dd23, double* d2, double *d3, double* dd2, double *dd3, int N); + + void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12); + void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2, + int *ind1, int *ind2, int n12, int N); + +}; + +} // namespace LAMMPS_NS + +#endif + diff --git a/src/ML-POD/fitpod_command.cpp b/src/ML-POD/fitpod_command.cpp index ef39962e0bf..b21bb8cb37a 100644 --- a/src/ML-POD/fitpod_command.cpp +++ b/src/ML-POD/fitpod_command.cpp @@ -15,9 +15,10 @@ Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL) ------------------------------------------------------------------------- */ + #include "fitpod_command.h" -#include "mlpod.h" +#include "eapod.h" #include "comm.h" #include "error.h" @@ -28,15 +29,18 @@ #include #include #include -#include +#include +#include +#include using namespace LAMMPS_NS; using MathSpecial::powint; -static constexpr int MAXLINE = 1024; +#define MAXLINE 1024 + static constexpr double SMALL = 1.0e-10; -FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), podptr(nullptr) +FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), fastpodptr(nullptr) { } @@ -46,48 +50,115 @@ void FitPOD::command(int narg, char **arg) std::string pod_file = std::string(arg[0]); // pod input file std::string data_file = std::string(arg[1]); // data input file - std::string coeff_file; // coefficient input file + std::string coeff_file, proj_file, cent_file; // coefficient input files if (narg > 2) coeff_file = std::string(arg[2]); // coefficient input file else coeff_file = ""; + if (narg > 3) + proj_file = std::string(arg[3]); // projection input file + else + proj_file = ""; + if (narg > 4) + cent_file = std::string(arg[4]); // centroid input file + else + cent_file = ""; + + fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, cent_file); + + desc.nCoeffAll = fastpodptr->nCoeffAll; + desc.nClusters = fastpodptr->nClusters; + read_data_files(data_file, fastpodptr->species); + + estimate_memory_neighborstruct(traindata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); + estimate_memory_neighborstruct(testdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); + if (desc.nClusters > 1) estimate_memory_neighborstruct(envdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements); + allocate_memory_neighborstruct(); + estimate_memory_fastpod(traindata); + estimate_memory_fastpod(testdata); + allocate_memory_descriptorstruct(fastpodptr->nCoeffAll); + + if (coeff_file != "") podArrayCopy(desc.c, fastpodptr->coeff, fastpodptr->nCoeffAll); + + if (compute_descriptors==0) { + + if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) { + environment_cluster_calculation(envdata); + //error->all(FLERR, "stop after enviroment_cluster_calculation"); + memory->destroy(envdata.lattice); + memory->destroy(envdata.energy); + memory->destroy(envdata.stress); + memory->destroy(envdata.position); + memory->destroy(envdata.force); + memory->destroy(envdata.atomtype); + memory->destroy(envdata.we); + memory->destroy(envdata.wf); + } - podptr = new MLPOD(lmp, pod_file, coeff_file); - read_data_files(data_file, podptr->pod.species); - - if ((int) traindata.data_path.size() > 1) - allocate_memory(traindata); - else if ((int) testdata.data_path.size() > 1) - allocate_memory(testdata); + // compute POD coefficients using least-squares method - // get POD coefficients from an input file + if (coeff_file == "") least_squares_fit(traindata); + //error->all(FLERR, "stop after least_squares_fit"); - if (coeff_file != "") podArrayCopy(desc.c, podptr->pod.coeff, podptr->pod.nd); + // calculate errors for the training data set - // compute POD coefficients using least-squares method + if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) ) + error_analysis(traindata, desc.c); - least_squares_fit(traindata); + //error->all(FLERR, "stop after error_analysis"); - // calculate errors for the training data set + // calculate energy and force for the training data set - if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) ) - error_analysis(traindata, desc.c); + if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) ) + energyforce_calculation(traindata, desc.c); - // calculate errors for the test data set + if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0))) + { + // calculate errors for the test data set - if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path)) - error_analysis(testdata, desc.c); + if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) ) { + error_analysis(testdata, desc.c); + } - // calculate energy and force for the training data set + // calculate energy and force for the test data set - if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) ) - energyforce_calculation(traindata, desc.c); + if ((testdata.test_analysis) && (testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) ) + energyforce_calculation(testdata, desc.c); - // calculate energy and force for the test data set + // deallocate testing data - if ((testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path) ) - energyforce_calculation(testdata, desc.c); + if ((int) testdata.data_path.size() > 1 && (testdata.test_analysis) && (testdata.fraction > 0) ){ + memory->destroy(testdata.lattice); + memory->destroy(testdata.energy); + memory->destroy(testdata.stress); + memory->destroy(testdata.position); + memory->destroy(testdata.force); + memory->destroy(testdata.atomtype); + memory->destroy(testdata.we); + memory->destroy(testdata.wf); + } + } + } + else if (compute_descriptors>0) { + // compute and save POD descriptors + descriptors_calculation(traindata); + + if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0))) + { + if ((int) testdata.data_path.size() > 1){ + descriptors_calculation(testdata); + memory->destroy(testdata.lattice); + memory->destroy(testdata.energy); + memory->destroy(testdata.stress); + memory->destroy(testdata.position); + memory->destroy(testdata.force); + memory->destroy(testdata.atomtype); + memory->destroy(testdata.we); + memory->destroy(testdata.wf); + } + } + } // deallocate training data @@ -98,44 +169,36 @@ void FitPOD::command(int narg, char **arg) memory->destroy(traindata.position); memory->destroy(traindata.force); memory->destroy(traindata.atomtype); - } - - // deallocate testing data - - if ((int) testdata.data_path.size() > 1 && (testdata.data_path != traindata.data_path)){ - memory->destroy(testdata.lattice); - memory->destroy(testdata.energy); - memory->destroy(testdata.stress); - memory->destroy(testdata.position); - memory->destroy(testdata.force); - memory->destroy(testdata.atomtype); + memory->destroy(traindata.we); + memory->destroy(traindata.wf); } // deallocate descriptors - memory->destroy(desc.gd); - memory->destroy(desc.gdd); memory->destroy(desc.A); memory->destroy(desc.b); memory->destroy(desc.c); - memory->destroy(desc.tmpint); - - // deallocate neighbor data + memory->destroy(desc.bd); + memory->destroy(desc.pd); + memory->destroy(desc.gd); + memory->destroy(desc.gdd); + // // deallocate neighbor data memory->destroy(nb.alist); memory->destroy(nb.pairnum); memory->destroy(nb.pairnum_cumsum); memory->destroy(nb.pairlist); memory->destroy(nb.y); - delete podptr; -} -/* ---------------------------------------------------------------------- */ + delete fastpodptr; +} int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, - std::string &file_extension, std::string &test_path, + std::string &file_extension, std::string &env_path, std::string &test_path, std::string &training_path, std::string &filenametag, - const std::string &data_file) + const std::string &data_file, std::string &group_weight_type, + std::unordered_map &we_map, + std::unordered_map &wf_map) { int precision = 8; @@ -150,8 +213,7 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, // loop through lines of training data file and parse keywords - char line[MAXLINE] = {'\0'}; - char *ptr; + char line[MAXLINE],*ptr; int eof = 0; while (true) { if (comm->me == 0) { @@ -177,7 +239,7 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, if (words.size() == 0) continue; - const auto &keywd = words[0]; + auto keywd = words[0]; if (words.size() != 2) error->one(FLERR,"Improper POD file.", utils::getsyserror()); @@ -197,6 +259,8 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, if (keywd == "randomize_test_data_set") fitting_weights[10] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "fitting_regularization_parameter") fitting_weights[11] = utils::numeric(FLERR,words[1],false,lmp); if (keywd == "precision_for_pod_coefficients") precision = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "save_pod_descriptors") save_descriptors = utils::inumeric(FLERR,words[1],false,lmp); + if (keywd == "compute_pod_descriptors") compute_descriptors = utils::inumeric(FLERR,words[1],false,lmp); // other settings @@ -204,7 +268,64 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, if (keywd == "file_extension") file_extension = words[1]; if (keywd == "path_to_training_data_set") training_path = words[1]; if (keywd == "path_to_test_data_set") test_path = words[1]; + if (keywd == "path_to_enviroment_configuration_set") env_path = words[1]; if (keywd == "basename_for_output_files") filenametag = words[1]; + + // group weight table + if (keywd == "group_weights") group_weight_type = words[1]; + if (std::strcmp(group_weight_type.c_str(), "table") == 0){ + // Read the table as a hash map. + // Get next line. + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpdata); + if (ptr == nullptr) { + eof = 1; + fclose(fpdata); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + // Tokenize. + //std::vector words; + try { + words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + } catch (TokenizerException &) { + // ignore + } + int numwords = words.size(); + + // Loop over group table entries. + while (numwords == 3){ + + // Insert in map. + double we = atof(words[1].c_str()); + we_map[words[0]] = atof(words[1].c_str()); + double wf = atof(words[2].c_str()); + wf_map[words[0]] = atof(words[2].c_str()); + + // Get next line. + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpdata); + if (ptr == nullptr) { + eof = 1; + fclose(fpdata); + } + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); + // Tokenize. + //std::vector words; + try { + words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); + } catch (TokenizerException &) { + // ignore + } + numwords = words.size(); + } + + } } if (comm->me == 0) { @@ -213,6 +334,8 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, utils::logmesg(lmp, "file extension: {}\n", file_extension); utils::logmesg(lmp, "path to training data set: {}\n", training_path); utils::logmesg(lmp, "path to test data set: {}\n", test_path); + utils::logmesg(lmp, "path to enviroment configuration set: {}\n", env_path); + utils::logmesg(lmp, "basename for output files: {}\n", filenametag); utils::logmesg(lmp, "training fraction: {}\n", fitting_weights[7]); utils::logmesg(lmp, "test fraction: {}\n", fitting_weights[8]); utils::logmesg(lmp, "randomize training data set: {}\n", fitting_weights[9]); @@ -224,14 +347,15 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format, utils::logmesg(lmp, "fitting weight for energy: {}\n", fitting_weights[0]); utils::logmesg(lmp, "fitting weight for force: {}\n", fitting_weights[1]); utils::logmesg(lmp, "fitting weight for stress: {}\n", fitting_weights[2]); - utils::logmesg(lmp, "fitting regularization parameter: {}\n", fitting_weights[11]); + utils::logmesg(lmp, "save pod descriptors: {}\n", save_descriptors); + utils::logmesg(lmp, "compute pod descriptors: {}\n", compute_descriptors); utils::logmesg(lmp, "**************** End of Data File ****************\n"); } return precision; } -void FitPOD::get_exyz_files(std::vector& files, const std::string &datapath, +void FitPOD::get_exyz_files(std::vector& files, std::vector &group_names, const std::string &datapath, const std::string &extension) { auto allfiles = platform::list_directory(datapath); @@ -239,6 +363,11 @@ void FitPOD::get_exyz_files(std::vector& files, const std::string & for (const auto &fname : allfiles) { if (utils::strmatch(fname, fmt::format(".*\\.{}$", extension))) files.push_back(datapath + platform::filepathsep + fname); + int start_pos_erase = fname.find(extension) - 1; + int ext_size = extension.size() + 1; + //std::string substr = fname.erase(start_pos_erase, ext_size); + std::string substr = fname.substr(0, start_pos_erase); + group_names.push_back(substr); } } @@ -252,8 +381,7 @@ int FitPOD::get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } - char line[MAXLINE] = {'\0'}; - char *ptr; + char line[MAXLINE],*ptr; int eof = 0; int num_configs = 0; num_atom_sum = 0; @@ -313,8 +441,8 @@ int FitPOD::get_number_atoms(std::vector& num_atom, std::vector &num_a return num_atom_all; } -void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, double *pos, double *forces, - int *atomtype, std::string file, std::vector species) +void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, double *pos, double *forces, + int *atomtype, std::string file, std::vector species, double we_group, double wf_group) { std::string filename = std::move(file); @@ -325,8 +453,7 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror()); } - char line[MAXLINE] = {'\0'}; - char *ptr; + char line[MAXLINE],*ptr; int eof = 0; int cfi = 0; int nat = 0; @@ -385,54 +512,62 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou } } - // find the word containing "energy" + if (compute_descriptors == 0) { - it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; }); + // find the word containing "energy" - // get index of element from iterator + it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; }); - index = std::distance(words.begin(), it); + // get index of element from iterator - if (words[index].find("=") != std::string::npos) { + index = std::distance(words.begin(), it); - // energy is after "=" inside this string + if (words[index].find("=") != std::string::npos) { - std::size_t found = words[index].find("="); - energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp); - } else { + // energy is after "=" inside this string - // energy is at index + 2 + std::size_t found = words[index].find("="); + energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp); + } else { - energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp); + // energy is at index + 2 - } + energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp); + } - // find the word containing "stress" + // find the word containing "stress" - it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; }); + it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; }); - // get index of element from iterator + // get index of element from iterator - index = std::distance(words.begin(), it); + index = std::distance(words.begin(), it); - if (words[index].find("=") != std::string::npos) { + if (index < std::distance(words.begin(), words.end())) { + if (words[index].find("=") != std::string::npos) { - // stress numbers start at index + 1 + // stress numbers start at index + 1 - for (int k = 0; k < 9; k++) { - stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); - } - } else { + for (int k = 0; k < 9; k++) { + stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp); + } + } else { - // lattice numbers start at index + 2 + // lattice numbers start at index + 2 - for (int k = 0; k < 9; k++) { - stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); + for (int k = 0; k < 9; k++) { + stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp); + } + } } } - cfi += 1; + // set fitting weights for this config + we[cfi] = we_group; + wf[cfi] = wf_group; + + cfi += 1; } // loop over atoms @@ -443,18 +578,25 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou if (species[ii] == words[0]) atomtype[nat] = ii+1; - for (int k = 0; k < 6; k++) { - if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); - if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); + if (compute_descriptors> 0) { + for (int k = 0; k < 3; k++) + pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); } + else { + for (int k = 0; k < 6; k++) { + if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); + if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp); + } + } + nat += 1; } } } -void FitPOD::get_data(datastruct &data, const std::vector& species) +void FitPOD::get_data(datastruct &data, const std::vector &species) { - get_exyz_files(data.data_files, data.data_path, data.file_extension); + get_exyz_files(data.data_files, data.group_names, data.data_path, data.file_extension); data.num_atom_sum = get_number_atoms(data.num_atom, data.num_atom_each_file, data.num_config, data.data_files); data.num_config_sum = data.num_atom.size(); size_t maxname = 9; @@ -486,18 +628,35 @@ void FitPOD::get_data(datastruct &data, const std::vector& species) memory->create(data.lattice, 9*n, "fitpod:lattice"); memory->create(data.stress, 9*n, "fitpod:stress"); memory->create(data.energy, n, "fitpod:energy"); + // Group weights have same size as energy. + memory->create(data.we, n, "fitpod:we"); + memory->create(data.wf, n, "fitpod:wf"); + n = data.num_atom_sum; memory->create(data.position, 3*n, "fitpod:position"); memory->create(data.force, 3*n, "fitpod:force"); memory->create(data.atomtype, n, "fitpod:atomtype"); + double we_group, wf_group; // group weights int nfiles = data.data_files.size(); // number of files int nconfigs = 0; int natoms = 0; for (int i=0; i FitPOD::shuffle(int start_in, int end_in, int num_in) for (int i = 0; i shuffle_vec(num_in); for (int i = 0; icreate(newdata.lattice, 9*n, "fitpod:newdata_lattice"); memory->create(newdata.stress, 9*n, "fitpod:newdata_stress"); memory->create(newdata.energy, n, "fitpod:newdata_energy"); - n = data.num_atom_sum; + // Group weights have same size as energy. + memory->create(newdata.we, n, "fitpod:we"); + memory->create(newdata.wf, n, "fitpod:wf"); + + n = newdata.num_atom_sum; memory->create(newdata.position, 3*n, "fitpod:newdata_position"); memory->create(newdata.force, 3*n, "fitpod:newdata_force"); memory->create(newdata.atomtype, n, "fitpod:newdata_atomtype"); @@ -673,6 +837,8 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data) double *force = &data.force[dim*natom_cumsum]; newdata.energy[cn] = data.energy[ci]; + newdata.we[cn] = data.we[ci]; + newdata.wf[cn] = data.wf[ci]; for (int j=0; j<9; j++) { newdata.stress[j+9*cn] = data.stress[j+9*ci]; newdata.lattice[j+9*cn] = data.lattice[j+9*ci]; @@ -719,8 +885,8 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vectordestroy(data.atomtype); } - if (((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path)) { + testdata.fraction = traindata.fitting_weights[8]; + testdata.test_analysis = traindata.test_analysis; + testdata.filenametag = traindata.filenametag; + + if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) { + envdata.filenametag = traindata.filenametag; + envdata.file_format = traindata.file_format; + envdata.file_extension = traindata.file_extension; + int tmp = compute_descriptors; + compute_descriptors = 1; + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin of Enviroment Configuration Set ****************\n"); + get_data(envdata, species); + if (comm->me == 0) + utils::logmesg(lmp, "**************** End of Enviroment Configuration Set ****************\n"); + compute_descriptors = tmp; + } + + if ((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0)) { + testdata.data_path = traindata.data_path; + } + else if (((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) && (testdata.test_analysis)) { testdata.training = 0; testdata.file_format = traindata.file_format; testdata.file_extension = traindata.file_extension; testdata.training_analysis = traindata.training_analysis; - testdata.test_analysis = traindata.test_analysis; testdata.training_calculation = traindata.training_calculation; testdata.test_calculation = traindata.test_calculation; - testdata.fraction = traindata.fitting_weights[8]; testdata.randomize = (int) traindata.fitting_weights[10]; - if (comm->me == 0) - utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n"); - get_data(testdata, species); - if (comm->me == 0) - utils::logmesg(lmp, "**************** End of Test Data Set ****************\n"); + + if (testdata.fraction >= 1.0) { + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n"); + get_data(testdata, species); + if (comm->me == 0) + utils::logmesg(lmp, "**************** End of Test Data Set ****************\n"); + } + else { + datastruct datatm; + testdata.copydatainfo(datatm); + + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n"); + get_data(datatm, species); + if (comm->me == 0) + utils::logmesg(lmp, "**************** End of Test Data Set ****************\n"); + + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin of Select Test Data Set ****************\n"); + select_data(testdata, datatm); + if (comm->me == 0) + utils::logmesg(lmp, "**************** End of Select Test Data Set ****************\n"); + + memory->destroy(datatm.lattice); + memory->destroy(datatm.energy); + memory->destroy(datatm.stress); + memory->destroy(datatm.position); + memory->destroy(datatm.force); + memory->destroy(datatm.atomtype); + } } else { testdata.data_path = traindata.data_path; - } + } } int FitPOD::latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx) @@ -868,34 +1079,10 @@ int FitPOD::podfullneighborlist(double *y, int *alist, int *neighlist, int *numn return nn; } -void FitPOD::allocate_memory(const datastruct &data) +void FitPOD::estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements) { - int nd = podptr->pod.nd; - memory->create(desc.gd, nd, "fitpod:desc_gd"); - memory->create(desc.A, nd*nd, "fitpod:desc_A"); - memory->create(desc.b, nd, "fitpod:desc_b"); - memory->create(desc.c, nd, "fitpod:desc_c"); - podArraySetValue(desc.A, 0.0, nd*nd); - podArraySetValue(desc.b, 0.0, nd); - podArraySetValue(desc.c, 0.0, nd); - int dim = 3; int natom_max = data.num_atom_max; - int nd1 = podptr->pod.nd1; - int nd2 = podptr->pod.nd2; - int nd3 = podptr->pod.nd3; - int nd4 = podptr->pod.nd4; - int nelements = podptr->pod.nelements; - int nbesselpars = podptr->pod.nbesselpars; - int nrbf2 = podptr->pod.nbf2; - int nabf3 = podptr->pod.nabf3; - int nrbf3 = podptr->pod.nrbf3; - int *pdegree2 = podptr->pod.twobody; - int *pdegree3 = podptr->pod.threebody; - int *pbc = podptr->pod.pbc; - double rcut = podptr->pod.rcut; - - int Nj=0, Nij=0; int m=0, n=0, p=0, nl=0, ny=0, na=0, np=0; for (int ci=0; ci<(int) data.num_atom.size(); ci++) @@ -917,22 +1104,53 @@ void FitPOD::allocate_memory(const datastruct &data) np = MAX(np, natom*natom*nl); } - memory->create(nb.y, ny, "fitpod:nb_y"); - memory->create(nb.alist, na, "fitpod:nb_alist"); - memory->create(nb.pairnum, natom_max, "fitpod:nb_pairnum"); - memory->create(nb.pairnum_cumsum, natom_max+1, "fitpod:nb_pairnum_cumsum"); - memory->create(nb.pairlist, np, "fitpod:nb_pairlist"); - - nb.natom_max = natom_max; + nb.natom_max = MAX(nb.natom_max, natom_max); nb.sze = nelements*nelements; - nb.sza = na; - nb.szy = ny; - nb.szp = np; + nb.sza = MAX(nb.sza, na); + nb.szy = MAX(nb.szy, ny); + nb.szp = MAX(nb.szp, np); +} - if (comm->me == 0) - utils::logmesg(lmp,"**************** Begin of Memory Allocation ****************\n"); +void FitPOD::allocate_memory_neighborstruct() +{ + memory->create(nb.y, nb.szy, "fitpod:nb_y"); + memory->create(nb.alist, nb.sza, "fitpod:nb_alist"); + memory->create(nb.pairnum, nb.natom_max, "fitpod:nb_pairnum"); + memory->create(nb.pairnum_cumsum, nb.natom_max+1, "fitpod:nb_pairnum_cumsum"); + memory->create(nb.pairlist, nb.szp, "fitpod:nb_pairlist"); +} + +void FitPOD::allocate_memory_descriptorstruct(int nCoeffAll) +{ + memory->create(desc.bd, nb.natom_max*fastpodptr->Mdesc, "fitpod:desc_ld"); + memory->create(desc.pd, nb.natom_max*fastpodptr->nClusters, "fitpod:desc_ld"); + memory->create(desc.gd, nCoeffAll, "fitpod:desc_gd"); + memory->create(desc.A, nCoeffAll*nCoeffAll, "fitpod:desc_A"); + memory->create(desc.b, nCoeffAll, "fitpod:desc_b"); + memory->create(desc.c, nCoeffAll, "fitpod:desc_c"); + memory->create(desc.gdd, desc.szd, "fitpod:desc_gdd"); + podArraySetValue(desc.A, 0.0, nCoeffAll*nCoeffAll); + podArraySetValue(desc.b, 0.0, nCoeffAll); + podArraySetValue(desc.c, 0.0, nCoeffAll); + + if (comm->me == 0) { + utils::logmesg(lmp, "**************** Begin of Memory Allocation ****************\n"); + utils::logmesg(lmp, "maximum number of atoms in periodic domain: {}\n", nb.natom_max); + utils::logmesg(lmp, "maximum number of atoms in extended domain: {}\n", nb.sza); + utils::logmesg(lmp, "maximum number of neighbors in extended domain: {}\n", nb.szp); + utils::logmesg(lmp, "size of double memory: {}\n", desc.szd); + utils::logmesg(lmp, "size of descriptor matrix: {} x {}\n", nCoeffAll, nCoeffAll); + utils::logmesg(lmp, "**************** End of Memory Allocation ****************\n"); + } +} - int szd = 0, szi=0, szsnap=0; +void FitPOD::estimate_memory_fastpod(const datastruct &data) +{ + int dim = 3; + int *pbc = fastpodptr->pbc; + double rcut = fastpodptr->rcut; + + int Nij=0, Nijmax=0; for (int ci=0; ci<(int) data.num_atom.size(); ci++) { int natom = data.num_atom[ci]; @@ -944,57 +1162,46 @@ void FitPOD::allocate_memory(const datastruct &data) double *a3 = &lattice[6]; Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, x, a1, a2, a3, rcut, pbc, natom); - - int ns2 = pdegree2[0]*nbesselpars + pdegree2[1]; - int ns3 = pdegree3[0]*nbesselpars + pdegree3[1]; - - int szd1 = 3*Nij+ (1+dim)*Nij*MAX(nrbf2+ns2,nrbf3+ns3) + (nabf3+1)*7; - int szi1 = 6*Nij + 2*natom+1 + (Nj-1)*Nj; - szd = MAX(szd, szd1); - szi = MAX(szi, szi1); - - if (podptr->sna.twojmax>0) { - szd1 = 0; - szd1 += Nij*dim; // rij - szd1 += MAX(2*podptr->sna.idxu_max*Nij, 2*podptr->sna.idxz_max*podptr->sna.ndoubles*natom); // (Ur, Ui) and (Zr, Zi) - szd1 += 2*podptr->sna.idxu_max*dim*Nij; // dUr, dUi - szd1 += MAX(podptr->sna.idxb_max*podptr->sna.ntriples*dim*Nij, 2*podptr->sna.idxu_max*podptr->sna.nelements*natom); // dblist and (Utotr, Utoti) - szsnap = MAX(szsnap, szd1); - } + Nijmax = MAX(Nijmax, Nij); } - szd = MAX(szsnap, szd); - szd = MAX(natom_max*(nd1+nd2+nd3+nd4) + szd, dim*natom_max*(nd-nd1-nd2-nd3-nd4)); - szd = dim*natom_max*(nd1+nd2+nd3+nd4) + szd; + desc.szd = MAX(desc.szd, 3*Nijmax*fastpodptr->nCoeffAll); +} - // gdd includes linear descriptors derivatives, quadratic descriptors derivatives and temporary memory +void FitPOD::local_descriptors_fastpod(const datastruct &data, int ci) +{ + int dim = 3; + int *pbc = fastpodptr->pbc; + double rcut = fastpodptr->rcut; + + int natom = data.num_atom[ci]; + int natom_cumsum = data.num_atom_cumsum[ci]; + int *atomtype = &data.atomtype[natom_cumsum]; + double *position = &data.position[dim*natom_cumsum]; + double *lattice = &data.lattice[9*ci]; + double *a1 = &lattice[0]; + double *a2 = &lattice[3]; + double *a3 = &lattice[6]; - memory->create(desc.gdd, szd, "fitpod:desc_gdd"); - memory->create(desc.tmpint, szi, "fitpod:desc_tmpint"); - desc.szd = szd; - desc.szi = szi; + // neighbor list + podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, + position, a1, a2, a3, rcut, pbc, natom); - if (comm->me == 0) { - utils::logmesg(lmp, "maximum number of atoms in periodic domain: {}\n", natom_max); - utils::logmesg(lmp, "maximum number of atoms in extended domain: {}\n", nb.sza); - utils::logmesg(lmp, "maximum number of neighbors in extended domain: {}\n", nb.szp); - utils::logmesg(lmp, "size of double memory: {}\n", szd); - utils::logmesg(lmp, "size of int memory: {}\n", szi); - utils::logmesg(lmp, "size of descriptor matrix: {} x {}\n", nd, nd); - utils::logmesg(lmp, "**************** End of Memory Allocation ****************\n"); + if (desc.nClusters > 1) { + fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, desc.pd, nb.y, atomtype, nb.alist, nb.pairlist, + nb.pairnum_cumsum, natom); + } + else { + fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, nb.y, atomtype, nb.alist, nb.pairlist, + nb.pairnum_cumsum, natom); } } -void FitPOD::linear_descriptors(const datastruct &data, int ci) +void FitPOD::base_descriptors_fastpod(const datastruct &data, int ci) { int dim = 3; - int nd1 = podptr->pod.nd1; - int nd2 = podptr->pod.nd2; - int nd3 = podptr->pod.nd3; - int nd4 = podptr->pod.nd4; - int nd1234 = nd1+nd2+nd3+nd4; - int *pbc = podptr->pod.pbc; - double rcut = podptr->pod.rcut; + int *pbc = fastpodptr->pbc; + double rcut = fastpodptr->rcut; int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; @@ -1006,162 +1213,227 @@ void FitPOD::linear_descriptors(const datastruct &data, int ci) double *a3 = &lattice[6]; // neighbor list - int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, - position, a1, a2, a3, rcut, pbc, natom); - - int *tmpint = &desc.tmpint[0]; - double *tmpmem = &desc.gdd[dim*natom*nd1234+natom*nd1234]; - podptr->linear_descriptors(desc.gd, desc.gdd, nb.y, tmpmem, atomtype, nb.alist, - nb.pairlist, nb.pairnum, nb.pairnum_cumsum, tmpint, natom, Nij); + podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, + position, a1, a2, a3, rcut, pbc, natom); + fastpodptr->base_descriptors(desc.bd, nb.y, atomtype, nb.alist, nb.pairlist, + nb.pairnum_cumsum, natom); } -void FitPOD::quadratic_descriptors(const datastruct &data, int ci) +void FitPOD::descriptors_calculation(const datastruct &data) { - int dim = 3; - int natom = data.num_atom[ci]; - int nd1 = podptr->pod.nd1; - int nd2 = podptr->pod.nd2; - int nd3 = podptr->pod.nd3; - int nd4 = podptr->pod.nd4; - int nd22 = podptr->pod.nd22; - int nd23 = podptr->pod.nd23; - int nd24 = podptr->pod.nd24; - int nd33 = podptr->pod.nd33; - int nd34 = podptr->pod.nd34; - int nd44 = podptr->pod.nd44; - int nd123 = nd1+nd2+nd3; - int nd1234 = nd1+nd2+nd3+nd4; - - double *fatom2 = &desc.gdd[dim*natom*(nd1)]; - double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; - double *fatom4 = &desc.gdd[dim*natom*(nd123)]; - - // global descriptors for four-body quadratic22 potential - - if (nd22 > 0) { - int nq2 = podptr->pod.quadratic22[0]*podptr->pod.nc2; - podptr->quadratic_descriptors(&desc.gd[nd1234], &desc.gdd[dim*natom*nd1234], - &desc.gd[nd1], fatom2, nq2, dim*natom); - } + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin Calculating Descriptors ****************\n"); + + // loop over each configuration in the training data set - // global descriptors for four-body quadratic23 potential + double sz[2]; + for (int ci=0; ci < (int) data.num_atom.size(); ci++) { - if (nd23 > 0) { - int nq2 = podptr->pod.quadratic23[0]*podptr->pod.nc2; - int nq3 = podptr->pod.quadratic23[1]*podptr->pod.nc3; - podptr->quadratic_descriptors(&desc.gd[nd1234+nd22], &desc.gdd[dim*natom*(nd1234+nd22)], - &desc.gd[nd1], &desc.gd[nd1+nd2], fatom2, fatom3, nq2, nq3, dim*natom); - } + if ((ci % 100)==0) { + if (comm->me == 0) + utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + } - // global descriptors for five-body quadratic24 potential + if ((ci % comm->nprocs) == comm->me) { - if (nd24 > 0) { - int nq2 = podptr->pod.quadratic24[0]*podptr->pod.nc2; - int nq4 = podptr->pod.quadratic24[1]*podptr->pod.nc4; - podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23], &desc.gdd[dim*natom*(nd1234+nd22+nd23)], - &desc.gd[nd1], &desc.gd[nd1+nd2+nd3], fatom2, fatom4, nq2, nq4, dim*natom); + // compute local POD descriptors + local_descriptors_fastpod(data, ci); + + std::string filename0 = data.data_path + "/basedescriptors_config" + std::to_string(ci+1) + ".bin"; + FILE *fp0 = fopen(filename0.c_str(), "wb"); + sz[0] = (double) data.num_atom[ci]; + sz[1] = (double) fastpodptr->Mdesc; + fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp0); + fwrite( reinterpret_cast( desc.bd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->Mdesc), 1, fp0); + fclose(fp0); + + if (desc.nClusters>1) { + std::string filename1 = data.data_path + "/environmentdescriptors_config" + std::to_string(ci+1) + ".bin"; + FILE *fp1 = fopen(filename1.c_str(), "wb"); + sz[0] = (double) data.num_atom[ci]; + sz[1] = (double) fastpodptr->nClusters; + fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp1); + fwrite( reinterpret_cast( desc.pd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->nClusters), 1, fp1); + fclose(fp1); + } + + std::string filename = data.data_path + "/globaldescriptors_config" + std::to_string(ci+1) + ".bin"; + FILE *fp = fopen(filename.c_str(), "wb"); + + sz[0] = (double) data.num_atom[ci]; + sz[1] = (double) desc.nCoeffAll; + fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp); + fwrite( reinterpret_cast( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp); + if (compute_descriptors==2) { + fwrite( reinterpret_cast( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp); + } + fclose(fp); + } } - // global descriptors for five-body quadratic33 potential + if (comm->me == 0) + utils::logmesg(lmp, "**************** End Calculating Descriptors ****************\n"); +} - if (nd33 > 0) { - int nq3 = podptr->pod.quadratic33[0]*podptr->pod.nc3; - podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24)], - &desc.gd[nd1+nd2], fatom3, nq3, dim*natom); +void FitPOD::environment_cluster_calculation(const datastruct &data) +{ + if (comm->me == 0) + utils::logmesg(lmp, "**************** Begin Calculating Enviroment Descriptor Matrix ****************\n"); + + //printf("number of configurations = %d\n", (int) data.num_atom.size()); + + int nComponents = fastpodptr->nComponents; + int Mdesc = fastpodptr->Mdesc; + int nClusters = fastpodptr->nClusters; + int nelements = fastpodptr->nelements; + memory->create(fastpodptr->Centroids, nClusters*nComponents*nelements, "fitpod:centroids"); + memory->create(fastpodptr->Proj, Mdesc*nComponents*nelements, "fitpod:P"); + + int nAtoms = 0; + int nTotalAtoms = 0; + for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + if ((ci % comm->nprocs) == comm->me) nAtoms += data.num_atom[ci]; + nTotalAtoms += data.num_atom[ci]; } - // global descriptors for six-body quadratic34 potential + double *basedescmatrix = (double *) malloc(nAtoms*Mdesc*sizeof(double)); + double *pca = (double *) malloc(nAtoms*nComponents*sizeof(double)); + double *A = (double *) malloc(Mdesc*Mdesc*sizeof(double)); + double *b = (double *) malloc(Mdesc*sizeof(double)); + double *Lambda = (double *) malloc(Mdesc*nelements*sizeof(double)); + int *clusterSizes = (int *) malloc(nClusters*nelements*sizeof(int)); + int *assignments = (int *) malloc(nAtoms*sizeof(int)); + int *nElemAtoms = (int *) malloc(nelements*sizeof(int)); + int *nElemAtomsCumSum = (int *) malloc((1+nelements)*sizeof(int)); + int *nElemAtomsCount = (int *) malloc(nelements*sizeof(int)); + + char chn = 'N'; + char cht = 'T'; + char chv = 'V'; + char chu = 'U'; + double alpha = 1.0, beta = 0.0; - if (nd34 > 0) { - int nq3 = podptr->pod.quadratic34[0]*podptr->pod.nc3; - int nq4 = podptr->pod.quadratic34[1]*podptr->pod.nc4; - podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33)], - &desc.gd[nd1+nd2], &desc.gd[nd1+nd2+nd3], fatom3, fatom4, nq3, nq4, dim*natom); + for (int elem=0; elem < nelements; elem++) { + nElemAtoms[elem] = 0; // number of atoms for this element } + for (int ci=0; ci < (int) data.num_atom.size(); ci++) { + if ((ci % comm->nprocs) == comm->me) { + int natom = data.num_atom[ci]; + int natom_cumsum = data.num_atom_cumsum[ci]; + int *atomtype = &data.atomtype[natom_cumsum]; + for (int n=0; nme == 0) + utils::logmesg(lmp, "Configuration: # {}\n", ci+1); + } - // global descriptors for seven-body quadratic44 potential + if ((ci % comm->nprocs) == comm->me) { + base_descriptors_fastpod(data, ci); - if (nd44 > 0) { - int nq4 = podptr->pod.quadratic44[0]*podptr->pod.nc4; - podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33+nd34], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33+nd34)], - &desc.gd[nd1+nd2+nd3], fatom4, nq4, dim*natom); + // basedescmatrix is a Mdesc x nAtoms matrix + int natom = data.num_atom[ci]; + int natom_cumsum = data.num_atom_cumsum[ci]; + int *atomtype = &data.atomtype[natom_cumsum]; + for (int n=0; nProj[nComponents*Mdesc*elem]; + double *centroids = &fastpodptr->Centroids[nComponents*nClusters*elem]; - for (int i=0; ipod.nd1; - int nd2 = podptr->pod.nd2; - int nd3 = podptr->pod.nd3; - int nd4 = podptr->pod.nd4; - int nd22 = podptr->pod.nd22; - int nd23 = podptr->pod.nd23; - int nd24 = podptr->pod.nd24; - int nd33 = podptr->pod.nd33; - int nd34 = podptr->pod.nd34; - int nd44 = podptr->pod.nd44; - int nd234 = podptr->pod.nd234; - int nd333 = podptr->pod.nd333; - int nd444 = podptr->pod.nd444; - int nd123 = nd1+nd2+nd3; - int nd1234 = nd1+nd2+nd3+nd4; - - // global descriptors for seven-body cubic234 potential - if (nd234 > 0) { - int nq2 = podptr->pod.cubic234[0]*podptr->pod.nc2; - int nq3 = podptr->pod.cubic234[1]*podptr->pod.nc3; - int nq4 = podptr->pod.cubic234[2]*podptr->pod.nc4; - int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44; - double *eatom2 = &desc.gd[nd1]; - double *eatom3 = &desc.gd[nd1+nd2]; - double *eatom4 = &desc.gd[nd123]; - double *fatom2 = &desc.gdd[dim*natom*(nd1)]; - double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; - double *fatom4 = &desc.gdd[dim*natom*(nd123)]; - podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3], - eatom2, eatom3, eatom4, fatom2, fatom3, fatom4, nq2, nq3, nq4, dim*natom); - } + MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world); - // global descriptors for seven-body cubic333 potential + if (comm->me == 0) + savematrix2binfile(data.filenametag + "_covariance_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); - if (nd333 > 0) { - int nq3 = podptr->pod.cubic333[0]*podptr->pod.nc3; - int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234; - double *eatom3 = &desc.gd[nd1+nd2]; - double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)]; - podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3], - eatom3, fatom3, nq3, dim*natom); - } + // Calculate eigenvalues and eigenvectors of A + int lwork = Mdesc * Mdesc; // the length of the array work, lwork >= max(1,3*N-1) + int info = 1; // = 0: successful exit + double work[lwork]; + + DSYEV(&chv, &chu, &Mdesc, A, &Mdesc, b, work, &lwork, &info); - // global descriptors for ten-body cubic444 potential + // order eigenvalues and eigenvectors from largest to smallest + for (int i=0; i 0) { - int nq4 = podptr->pod.cubic444[0]*podptr->pod.nc4; - int np4 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333; - double *eatom4 = &desc.gd[nd123]; - double *fatom4 = &desc.gdd[dim*natom*(nd123)]; - podptr->cubic_descriptors(&desc.gd[np4], &desc.gdd[dim*natom*(np4)], - eatom4, fatom4, nq4, dim*natom); + // P is a nComponents x Mdesc matrix + for (int j=0; jme == 0) { + savematrix2binfile(data.filenametag + "_eigenvector_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc); + savematrix2binfile(data.filenametag + "_eigenvalues_elem" + std::to_string(elem+1) + ".bin", b, Mdesc, 1); + } + savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", descmatrix, Mdesc, nAtoms); + savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", pca, nComponents, nAtoms); + saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1); } - // normalize cubic descriptors - int nd = podptr->pod.nd; - for (int i=(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); iProj, nComponents*Mdesc*nelements, 1, 1); + savedata2textfile(data.filenametag + "_centroids" + ".pod", "centroids: {} \n", fastpodptr->Centroids, nComponents*nClusters*nelements, 1, 1); + + free(basedescmatrix); + free(pca); + free(A); + free(b); + free(clusterSizes); + free(Lambda); + free(assignments); + free(nElemAtoms); + free(nElemAtomsCumSum); + free(nElemAtomsCount); - for (int i=dim*natom*(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); ime == 0) + utils::logmesg(lmp, "**************** End Calculating Enviroment Descriptor Matrix ****************\n"); } void FitPOD::least_squares_matrix(const datastruct &data, int ci) @@ -1169,15 +1441,15 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci) int dim = 3; int natom = data.num_atom[ci]; int natom_cumsum = data.num_atom_cumsum[ci]; - int nd = podptr->pod.nd; + int nCoeffAll = desc.nCoeffAll; int nforce = dim*natom; // compute energy weight and force weight double normconst = 1.0; if (data.normalizeenergy==1) normconst = 1.0/natom; - double we = data.fitting_weights[0]; - double wf = data.fitting_weights[1]; + double we = data.we[ci]; + double wf = data.wf[ci]; double we2 = (we*we)*(normconst*normconst); double wf2 = (wf*wf); @@ -1188,7 +1460,7 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci) // least-square matrix for all descriptors: A = A + (we*we)*(gd^T * gd) - podKron(desc.A, desc.gd, desc.gd, we2, nd, nd); + podKron(desc.A, desc.gd, desc.gd, we2, nCoeffAll, nCoeffAll); // least-square matrix for all descriptors derivatives: A = A + (wf*wf) * (gdd^T * gdd) @@ -1196,18 +1468,17 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci) char chn = 'N'; double one = 1.0; int inc1 = 1; - DGEMM(&cht, &chn, &nd, &nd, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nd); + DGEMM(&cht, &chn, &nCoeffAll, &nCoeffAll, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nCoeffAll); // least-square vector for all descriptors: b = b + (we*we*energy)*gd double wee = we2*energy; - for (int i = 0; i< nd; i++) + for (int i = 0; i< nCoeffAll; i++) desc.b[i] += wee*desc.gd[i]; // least-square vector for all descriptors derivatives: b = b + (wf*wf) * (gdd^T * f) - DGEMV(&cht, &nforce, &nd, &wf2, desc.gdd, &nforce, force, &inc1, &one, desc.b, &inc1); - + DGEMV(&cht, &nforce, &nCoeffAll, &wf2, desc.gdd, &nforce, force, &inc1, &one, desc.b, &inc1); } void FitPOD::least_squares_fit(const datastruct &data) @@ -1227,16 +1498,17 @@ void FitPOD::least_squares_fit(const datastruct &data) if ((ci % comm->nprocs) == comm->me) { // compute linear POD descriptors + local_descriptors_fastpod(data, ci); - linear_descriptors(data, ci); - - // compute quadratic POD descriptors - - quadratic_descriptors(data, ci); - - // compute cubic POD descriptors - - cubic_descriptors(data, ci); + if (save_descriptors > 0) { + std::string filename = data.data_path + "/descriptors_config" + std::to_string(ci+1) + ".bin"; + FILE *fp = fopen(filename.c_str(), "wb"); + fwrite( reinterpret_cast( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp); + if (save_descriptors==2) { + fwrite( reinterpret_cast( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp); + } + fclose(fp); + } // assemble the least-squares linear system @@ -1244,59 +1516,62 @@ void FitPOD::least_squares_fit(const datastruct &data) } } - int nd = podptr->pod.nd; + int nCoeffAll = desc.nCoeffAll; - MPI_Allreduce(MPI_IN_PLACE, desc.b, nd, MPI_DOUBLE, MPI_SUM, world); - MPI_Allreduce(MPI_IN_PLACE, desc.A, nd*nd, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(MPI_IN_PLACE, desc.b, nCoeffAll, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(MPI_IN_PLACE, desc.A, nCoeffAll*nCoeffAll, MPI_DOUBLE, MPI_SUM, world); if (comm->me == 0) { // symmetrize A - for (int i = 0; i fabs(desc.b[i])) ? maxb : fabs(desc.b[i]); - - maxb = 1.0/maxb; - for (int i = 0; i fabs(desc.b[i])) ? maxb : fabs(desc.b[i]); +// +// maxb = 1.0/maxb; +// for (int i = 0; imknewcoeff(desc.c, nCoeffAll); if (comm->me == 0) { // save coefficients into a text file std::string filename = data.filenametag + "_coefficients" + ".pod"; FILE *fp = fopen(filename.c_str(), "w"); - fmt::print(fp, "POD_coefficients: {}\n", nd); - for (int count = 0; count < nd; count++) { + fmt::print(fp, "POD_coefficients: {}\n", nCoeffAll); + for (int count = 0; count < nCoeffAll; count++) { fmt::print(fp, "{:<10.{}f}\n", desc.c[count], data.precision); } fclose(fp); @@ -1304,12 +1579,24 @@ void FitPOD::least_squares_fit(const datastruct &data) } } -double FitPOD::energyforce_calculation(double *force, double *coeff, const datastruct &data, int ci) +double latticevolume(double *lattice) +{ + double *v1 = &lattice[0]; + double *v2 = &lattice[3]; + double *v3 = &lattice[6]; + + double b0 = v1[1] * v2[2] - v1[2] * v2[1]; + double b1 = v1[2] * v2[0] - v1[0] * v2[2]; + double b2 = v1[0] * v2[1] - v1[1] * v2[0]; + + return (b0*v3[0] + b1*v3[1] + b2*v3[2]); +} + +double FitPOD::energyforce_calculation_fastpod(double *force, const datastruct &data, int ci) { int dim = 3; - int *pbc = podptr->pod.pbc; - double rcut = podptr->pod.rcut; - int nd1234 = podptr->pod.nd1 + podptr->pod.nd2 + podptr->pod.nd3 + podptr->pod.nd4; + int *pbc = fastpodptr->pbc; + double rcut = fastpodptr->rcut; int natom = data.num_atom[ci]; int natom_cumsum2 = data.num_atom_cumsum[ci]; @@ -1320,26 +1607,11 @@ double FitPOD::energyforce_calculation(double *force, double *coeff, const datas double *a2 = &lattice[3]; double *a3 = &lattice[6]; - // neighbor list - - int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, - position, a1, a2, a3, rcut, pbc, natom); - - double *tmpmem = &desc.gdd[0]; - int *tmpint = &desc.tmpint[0]; - double *rij = &tmpmem[0]; // 3*Nij - int *ai = &tmpint[0]; // Nij - int *aj = &tmpint[Nij]; // Nij - int *ti = &tmpint[2*Nij]; // Nij - int *tj = &tmpint[3*Nij]; // Nij - int *idxi = &tmpint[4*Nij]; // Nij - podptr->podNeighPairs(rij, nb.y, idxi, ai, aj, ti, tj, nb.pairnum_cumsum, atomtype, nb.pairlist, nb.alist, natom); + podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, + position, a1, a2, a3, rcut, pbc, natom); - double *effectivecoeff = &tmpmem[3*Nij]; // 3*Nij - podArraySetValue(effectivecoeff, 0.0, nd1234); - - double energy = podptr->energyforce_calculation(force, coeff, effectivecoeff, desc.gd, rij, - &tmpmem[3*Nij+nd1234], nb.pairnum_cumsum, atomtype, idxi, ai, aj, ti, tj, natom, Nij); + double energy = fastpodptr->energyforce(force, nb.y, atomtype, nb.alist, nb.pairlist, + nb.pairnum_cumsum, natom); return energy; } @@ -1376,7 +1648,7 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er int ci=0, m=8, nc=0, nf=0; for (int file = 0; file < nfiles; file++) { fmt::print(fp_analysis, "# {}\n", data.filenames[file]); - fmt::print(fp_analysis, " config # atoms energy DFT energy energy error " + fmt::print(fp_analysis, " config # atoms volume energy DFT energy energy error " " force DFT force force error\n"); int nforceall = 0; @@ -1384,6 +1656,10 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er nc += nconfigs; for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file fmt::print(fp_analysis, "{:6} {:8} ", outarray[m*ci], outarray[1 + m*ci]); + + double vol = latticevolume(&data.lattice[9*ci]); + fmt::print(fp_analysis, "{:<15.10} ", vol); + for(int count = 2; count < m; count ++) fmt::print(fp_analysis, "{:<15.10} ", outarray[count + m*ci]); fmt::print(fp_analysis, "\n"); @@ -1421,6 +1697,7 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er void FitPOD::error_analysis(const datastruct &data, double *coeff) { int dim = 3; + int nCoeffAll = desc.nCoeffAll; double energy; std::vector force(dim*data.num_atom_max); @@ -1440,21 +1717,8 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff) for (int i=0; i<4*(nfiles+1); i++) errors[i] = 0.0; - int nd1 = podptr->pod.nd1; - int nd2 = podptr->pod.nd2; - int nd3 = podptr->pod.nd3; - int nd4 = podptr->pod.nd4; - int nd22 = podptr->pod.nd22; - int nd23 = podptr->pod.nd23; - int nd24 = podptr->pod.nd24; - int nd33 = podptr->pod.nd33; - int nd34 = podptr->pod.nd34; - int nd44 = podptr->pod.nd44; - int nd1234 = nd1+nd2+nd3+nd4; - int nd = podptr->pod.nd; - - std::vector newcoeff(nd); - for (int j=0; j newcoeff(nCoeffAll); + for (int j=0; jme == 0) @@ -1475,13 +1739,7 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff) int natom = data.num_atom[ci]; int nforce = dim*natom; - for (int j=nd1234; j<(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); j++) - newcoeff[j] = coeff[j]/(natom); - - for (int j=(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); jnprocs) == comm->me) { - energy = energyforce_calculation(force.data()+1, coeff, data, ci); + energy = energyforce_calculation_fastpod(force.data()+1, data, ci); // save energy and force into a binary file - force[0] = energy; std::string filename = "energyforce_config" + std::to_string(ci+1) + ".bin"; @@ -1817,3 +2074,117 @@ void FitPOD::triclinic_lattice_conversion(double *a, double *b, double *c, doubl b[0] = bx; b[1] = by; b[2] = 0.0; c[0] = cx; c[1] = cy; c[2] = cz; } + +// Function to calculate Euclidean distance between two points in N-dimensional space +double FitPOD::squareDistance(const double *a, const double *b, int DIMENSIONS) { + double sum = 0.0; + for (int i = 0; i < DIMENSIONS; i++) { + sum += (a[i] - b[i]) * (a[i] - b[i]); + } + return sum; +} + +// Function to assign points to the nearest cluster +void FitPOD::assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS) { + // Initialize clusterSizes to zero + for (int i = 0; i < NUM_CLUSTERS; i++) { + clusterSizes[i] = 0; + } + + for (int i = 0; i < NUM_POINTS; i++) { + double minDist = squareDistance(&points[i * DIMENSIONS], ¢roids[0], DIMENSIONS); + int closestCluster = 0; + for (int j = 1; j < NUM_CLUSTERS; j++) { + double dist = squareDistance(&points[i * DIMENSIONS], ¢roids[j * DIMENSIONS], DIMENSIONS); + if (dist < minDist) { + minDist = dist; + closestCluster = j; + } + } + assignments[i] = closestCluster; + clusterSizes[closestCluster]++; + } +} + +// Function to update centroids based on point assignments +void FitPOD::updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS) { + // Reset centroids for recalculation + for (int i = 0; i < NUM_CLUSTERS * DIMENSIONS; i++) { + centroids[i] = 0.0; + } + + // Accumulate sum of points in each cluster + for (int i = 0; i < NUM_POINTS; i++) { + int cluster = assignments[i]; + for (int j = 0; j < DIMENSIONS; j++) { + centroids[cluster * DIMENSIONS + j] += points[i * DIMENSIONS + j]; + } + } + + // Use MPI_Allreduce to sum up the local sums and cluster sizes across all processes + MPI_Allreduce(MPI_IN_PLACE, centroids, NUM_CLUSTERS * DIMENSIONS, MPI_DOUBLE, MPI_SUM, world); + MPI_Allreduce(MPI_IN_PLACE, clusterSizes, NUM_CLUSTERS, MPI_INT, MPI_SUM, world); + + // Divide by number of points to get the mean (centroid) + for (int i = 0; i < NUM_CLUSTERS; i++) { + if (clusterSizes[i] != 0) { + for (int j = 0; j < DIMENSIONS; j++) { + centroids[i * DIMENSIONS + j] /= clusterSizes[i]; + } + } + } +} + +// Function for K-means clustering +void FitPOD::KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER) { + for (int iter = 0; iter < MAX_ITER; iter++) { + assignPointsToClusters(points, centroids, assignments, clusterSizes, NUM_POINTS, NUM_CLUSTERS, DIMENSIONS); + updateCentroids(points, centroids, assignments, clusterSizes, NUM_POINTS, NUM_CLUSTERS, DIMENSIONS); + } +} + +void FitPOD::savematrix2binfile(std::string filename, double *A, int nrows, int ncols) +{ + FILE *fp = fopen(filename.c_str(), "wb"); + double sz[2]; + sz[0] = (double) nrows; + sz[1] = (double) ncols; + fwrite( reinterpret_cast( sz ), sizeof(double) * (2), 1, fp); + fwrite( reinterpret_cast( A ), sizeof(double) * (nrows*ncols), 1, fp); + fclose(fp); +} + +void FitPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols) +{ + FILE *fp = fopen(filename.c_str(), "wb"); + int sz[2]; + sz[0] = nrows; + sz[1] = ncols; + fwrite( reinterpret_cast( sz ), sizeof(int) * (2), 1, fp); + fwrite( reinterpret_cast( A ), sizeof(int) * (nrows*ncols), 1, fp); + fclose(fp); +} + +void FitPOD::savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim) +{ + if (comm->me == 0) { + int precision = 15; + FILE *fp = fopen(filename.c_str(), "w"); + if (dim==1) { + fmt::print(fp, text, n); + for (int i = 0; i < n; i++) + fmt::print(fp, "{:<10.{}f} \n", A[i], precision); + } + else if (dim==2) { + fmt::print(fp, text, n); + fmt::print(fp, "{} \n", m); + for (int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) + fmt::print(fp, "{:<10.{}f} ", A[j + i*n], precision); + fmt::print(fp, " \n"); + } + } + fclose(fp); + } +} + diff --git a/src/ML-POD/fitpod_command.h b/src/ML-POD/fitpod_command.h index b3591302405..c6a2e167c50 100644 --- a/src/ML-POD/fitpod_command.h +++ b/src/ML-POD/fitpod_command.h @@ -11,6 +11,7 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ + #ifdef COMMAND_CLASS // clang-format off CommandStyle(fitpod,FitPOD); @@ -21,22 +22,25 @@ CommandStyle(fitpod,FitPOD); #define LMP_FITPOD_COMMAND_H #include "command.h" +#include namespace LAMMPS_NS { class FitPOD : public Command { - public: +public: FitPOD(LAMMPS *); void command(int, char **) override; - private: +private: struct datastruct { std::string file_format = "extxyz"; std::string file_extension = "xyz"; std::string data_path; - std::vector data_files; + std::vector data_files; // sorted file names + std::vector group_names; // sorted group names std::vector filenames; std::string filenametag = "pod"; + std::string group_weight_type = "global"; std::vector num_atom; std::vector num_atom_cumsum; @@ -48,12 +52,15 @@ class FitPOD : public Command { int num_atom_max; int num_config_sum; - double *lattice; - double *energy; - double *stress; - double *position; - double *force; - int *atomtype; + double *lattice=nullptr; + double *energy=nullptr; + double *stress=nullptr; + double *position=nullptr; + double *force=nullptr; + int *atomtype=nullptr; + // Group weights will have same size as energy. + double *we=nullptr; + double *wf=nullptr; int training = 1; int normalizeenergy = 1; @@ -65,6 +72,9 @@ class FitPOD : public Command { int precision = 8; double fraction = 1.0; + std::unordered_map we_map; + std::unordered_map wf_map; + double fitting_weights[12] = {100.0, 1.0, 0.0, 1, 1, 0, 0, 1, 1, 1, 1, 1e-10}; void copydatainfo(datastruct &data) const @@ -84,42 +94,50 @@ class FitPOD : public Command { data.precision = precision; data.training = training; data.normalizeenergy = normalizeenergy; - for (int i = 0; i < 12; i++) data.fitting_weights[i] = fitting_weights[i]; + for (int i = 0; i < 12; i++) + data.fitting_weights[i] = fitting_weights[i]; + data.we_map = we_map; + data.wf_map = wf_map; } }; struct neighborstruct { - int *alist; - int *pairnum; - int *pairnum_cumsum; - int *pairlist; - double *y; - - int natom; - int nalist; - int natom_max; - int sze; - int sza; - int szy; - int szp; + int *alist=nullptr; + int *pairnum=nullptr; + int *pairnum_cumsum=nullptr; + int *pairlist=nullptr; + double *y=nullptr; + + //int natom; + //int nalist; + int natom_max = 0; + int sze = 0; + int sza = 0; + int szy = 0; + int szp = 0; }; struct descriptorstruct { - double *gd; // global descriptors - double *gdd; // derivatives of global descriptors and peratom descriptors - double *A; // least-square matrix for all descriptors - double *b; // least-square vector for all descriptors - double *c; // coefficents of descriptors - int *tmpint; - int szd; - int szi; + double *bd=nullptr; // base descriptors + double *pd=nullptr; // multi-environment descriptors (probabilities) + double *gd=nullptr; // global descriptors + double *gdd=nullptr; // derivatives of global descriptors and peratom descriptors + double *A=nullptr; // least-square matrix for all descriptors + double *b=nullptr; // least-square vector for all descriptors + double *c=nullptr; // coefficents of descriptors + int szd = 0; + int nCoeffAll = 0; // number of global descriptors + int nClusters = 0; // number of environment clusters }; + int save_descriptors = 0; + int compute_descriptors = 0; datastruct traindata; datastruct testdata; + datastruct envdata; descriptorstruct desc; neighborstruct nb; - class MLPOD *podptr; + class EAPOD *fastpodptr; // functions for collecting/collating arrays @@ -144,18 +162,26 @@ class FitPOD : public Command { void matrix33_multiplication(double *xrot, double *Rmat, double *x, int natom); void matrix33_inverse(double *invA, double *A1, double *A2, double *A3); + double squareDistance(const double *a, const double *b, int DIMENSIONS); + void assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSION); + void updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS); + void KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER); + + void savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim); + void savematrix2binfile(std::string filename, double *A, int nrows, int ncols); + void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols); + // functions for reading input files and fitting - int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, - std::string &test_path, std::string &training_path, std::string &filenametag, - const std::string &data_file); - void get_exyz_files(std::vector &, const std::string &, const std::string &); - int get_number_atom_exyz(std::vector &num_atom, int &num_atom_sum, std::string file); - int get_number_atoms(std::vector &num_atom, std::vector &num_atom_sum, - std::vector &num_config, std::vector training_files); - void read_exyz_file(double *lattice, double *stress, double *energy, double *pos, double *forces, - int *atomtype, std::string file, std::vector species); - void get_data(datastruct &data, const std::vector& species); + int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, std::string &env_path, + std::string &test_path, std::string &training_path, std::string &filenametag, const std::string &data_file, std::string &group_weight_type, + std::unordered_map &we_map, std::unordered_map &wf_map); + void get_exyz_files(std::vector &, std::vector &, const std::string &, const std::string &); + int get_number_atom_exyz(std::vector& num_atom, int& num_atom_sum, std::string file); + int get_number_atoms(std::vector& num_atom, std::vector &num_atom_sum, std::vector& num_config, std::vector training_files); + void read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, double *pos, double *forces, + int *atomtype, std::string file, std::vector species, double we_group, double wf_group); + void get_data(datastruct &data, const std::vector &species); std::vector linspace(int start_in, int end_in, int num_in); std::vector shuffle(int start_in, int end_in, int num_in); std::vector select(int n, double fraction, int randomize); @@ -166,17 +192,20 @@ class FitPOD : public Command { int podneighborlist(int *neighlist, int *numneigh, double *r, double rcutsq, int nx, int N, int dim); int podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, int *numneighsum, - double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, - int nx); - void allocate_memory(const datastruct &data); - void linear_descriptors(const datastruct &data, int ci); - void quadratic_descriptors(const datastruct &data, int ci); - void cubic_descriptors(const datastruct &data, int ci); + double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx); + void estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements); + void allocate_memory_neighborstruct(); + void allocate_memory_descriptorstruct(int nd); + void estimate_memory_fastpod(const datastruct &data); + void local_descriptors_fastpod(const datastruct &data, int ci); + void base_descriptors_fastpod(const datastruct &data, int ci); void least_squares_matrix(const datastruct &data, int ci); void least_squares_fit(const datastruct &data); + void descriptors_calculation(const datastruct &data); + void environment_cluster_calculation(const datastruct &data); void print_analysis(const datastruct &data, double *outarray, double *errors); - void error_analysis(const datastruct &data, double *coeff); - double energyforce_calculation(double *force, double *coeff, const datastruct &data, int ci); + void error_analysis(const datastruct &data, double *coeff); + double energyforce_calculation_fastpod(double *force, const datastruct &data, int ci); void energyforce_calculation(const datastruct &data, double *coeff); }; diff --git a/src/ML-POD/mlpod.cpp b/src/ML-POD/mlpod.cpp deleted file mode 100644 index 088b9abadc1..00000000000 --- a/src/ML-POD/mlpod.cpp +++ /dev/null @@ -1,3714 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/ Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL) -------------------------------------------------------------------------- */ - -// POD header file - -#include "mlpod.h" - -// LAMMPS header files - -#include "comm.h" -#include "error.h" -#include "math_const.h" -#include "math_special.h" -#include "memory.h" -#include "tokenizer.h" - -#include - -using namespace LAMMPS_NS; -using MathConst::MY_PI; -using MathSpecial::cube; -using MathSpecial::powint; - -static constexpr int MAXLINE = 1024; - -MLPOD::podstruct::podstruct() : - twobody{4, 8, 6}, threebody{4, 8, 5, 4}, fourbody{0, 0, 0, 0}, pbc(nullptr), - elemindex(nullptr), quadratic22{0, 0}, quadratic23{0, 0}, quadratic24{0, 0}, quadratic33{0, 0}, - quadratic34{0, 0}, quadratic44{0, 0}, cubic234{0, 0, 0}, cubic333{0, 0, 0}, cubic444{0, 0, 0}, - besselparams(nullptr), coeff(nullptr), Phi2(nullptr), Phi3(nullptr), Phi4(nullptr), - Lambda2(nullptr), Lambda3(nullptr), Lambda4(nullptr), - snapelementradius{0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, - snapelementweight{1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0} -{ - snaptwojmax = 0; - snapchemflag = 0; - snaprfac0 = 0.99363; -} - -MLPOD::podstruct::~podstruct() -{ - delete[] pbc; - delete[] elemindex; - delete[] besselparams; -} - -MLPOD::MLPOD(LAMMPS *_lmp, const std::string &pod_file, const std::string &coeff_file) : - Pointers(_lmp) -{ - // read pod input file to podstruct - - read_pod(pod_file); - - // read pod coefficient file to podstruct - - if (coeff_file != "") read_coeff_file(coeff_file); - - if (pod.snaptwojmax > 0) InitSnap(); -} - -MLPOD::~MLPOD() -{ - // deallocate pod arrays - - memory->destroy(pod.coeff); - if (pod.ns2 > 0) { - memory->destroy(pod.Phi2); - memory->destroy(pod.Lambda2); - } - if (pod.ns3 > 0) { - memory->destroy(pod.Phi3); - memory->destroy(pod.Lambda3); - } - if (pod.ns4 > 0) { - memory->destroy(pod.Phi4); - memory->destroy(pod.Lambda4); - } - - // deallocate snap arrays if used - - if (pod.snaptwojmax > 0) { - memory->destroy(sna.map); - memory->destroy(sna.idx_max); - memory->destroy(sna.idxz); - memory->destroy(sna.idxb); - memory->destroy(sna.idxb_block); - memory->destroy(sna.idxu_block); - memory->destroy(sna.idxz_block); - memory->destroy(sna.idxcg_block); - memory->destroy(sna.rootpqarray); - memory->destroy(sna.cglist); - memory->destroy(sna.fac); - memory->destroy(sna.bzero); - memory->destroy(sna.wjelem); - memory->destroy(sna.radelem); - memory->destroy(sna.rcutsq); - } -} - -// clang-format off - -void MLPOD::podMatMul(double *c, double *a, double *b, int r1, int c1, int c2) -{ - int i, j, k; - - for(j = 0; j < c2; j++) - for(i = 0; i < r1; i++) - c[i + r1*j] = 0.0; - - for(j = 0; j < c2; j++) - for(i = 0; i < r1; i++) - for(k = 0; k < c1; k++) - c[i + r1*j] += a[i + r1*k] * b[k + c1*j]; -} - -void MLPOD::podArrayFill(int* output, int start, int length) -{ - for (int j = 0; j < length; ++j) - output[j] = start + j; -} - -void MLPOD::podArraySetValue(double *y, double a, int n) -{ - for (int i=0; icreate(xij, N, "pod:xij"); - memory->create(S, N*ns, "pod:S"); - memory->create(Q, N*ns, "pod:Q"); - memory->create(A, ns*ns, "pod:A"); - memory->create(b, ns, "pod:ns"); - - for (int i=0; i= max(1,3*N-1) - int info = 1; // = 0: successful exit - std::vector work(lwork); - DSYEV(&chv, &chu, &ns, A, &ns, b, work.data(), &lwork, &info); - - // order eigenvalues and eigenvectors from largest to smallest - - for (int j=0; jdestroy(xij); - memory->destroy(S); - memory->destroy(A); - memory->destroy(b); - memory->destroy(Q); -} - -void MLPOD::read_pod(const std::string &pod_file) -{ - pod.nbesselpars = 3; - delete[] pod.besselparams; - pod.besselparams = new double[3]; - delete[] pod.pbc; - pod.pbc = new int[3]; - - pod.besselparams[0] = 0.0; - pod.besselparams[1] = 2.0; - pod.besselparams[2] = 4.0; - - pod.nelements = 0; - pod.onebody = 1; - pod.besseldegree = 3; - pod.inversedegree = 6; - pod.quadraticpod = 0; - pod.rin = 0.5; - pod.rcut = 4.6; - - pod.snaptwojmax = 0; - pod.snapchemflag = 0; - pod.snaprfac0 = 0.99363; - - sna.twojmax = 0; - sna.ntypes = 0; - - std::string podfilename = pod_file; - FILE *fppod; - if (comm->me == 0) { - - fppod = utils::open_potential(podfilename,lmp,nullptr); - if (fppod == nullptr) - error->one(FLERR,"Cannot open POD coefficient file {}: ", - podfilename, utils::getsyserror()); - } - - // loop through lines of POD file and parse keywords - - char line[MAXLINE] = {'\0'}; - char *ptr; - int eof = 0; - while (true) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fppod); - if (ptr == nullptr) { - eof = 1; - fclose(fppod); - } - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); - - // words = ptrs to all words in line - // strip single and double quotes from words - - std::vector words; - try { - words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector(); - } catch (TokenizerException &) { - // ignore - } - - if (words.size() == 0) continue; - - const auto &keywd = words[0]; - - if (keywd == "species") { - pod.nelements = words.size()-1; - for (int ielem = 1; ielem <= pod.nelements; ielem++) { - pod.species.push_back(words[ielem]); - } - } - - if (keywd == "pbc") { - if (words.size() != 4) - error->one(FLERR,"Improper POD file.", utils::getsyserror()); - pod.pbc[0] = utils::inumeric(FLERR,words[1],false,lmp); - pod.pbc[1] = utils::inumeric(FLERR,words[2],false,lmp); - pod.pbc[2] = utils::inumeric(FLERR,words[3],false,lmp); - } - - if ((keywd != "#") && (keywd != "species") && (keywd != "pbc")) { - - if (words.size() != 2) - error->one(FLERR,"Improper POD file.", utils::getsyserror()); - - if (keywd == "rin") pod.rin = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "rcut") pod.rcut = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "bessel_scaling_parameter1") - pod.besselparams[0] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "bessel_scaling_parameter2") - pod.besselparams[1] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "bessel_scaling_parameter3") - pod.besselparams[2] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "bessel_polynomial_degree") - pod.besseldegree = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "inverse_polynomial_degree") - pod.inversedegree = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "onebody") pod.onebody = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "twobody_bessel_polynomial_degree") - pod.twobody[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "twobody_inverse_polynomial_degree") - pod.twobody[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "twobody_number_radial_basis_functions") - pod.twobody[2] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "threebody_bessel_polynomial_degree") - pod.threebody[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "threebody_inverse_polynomial_degree") - pod.threebody[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "threebody_number_radial_basis_functions") - pod.threebody[2] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "threebody_number_angular_basis_functions") - pod.threebody[3] = utils::inumeric(FLERR,words[1],false,lmp)-1; - if (keywd == "fourbody_bessel_polynomial_degree") - pod.fourbody[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_inverse_polynomial_degree") - pod.fourbody[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_number_radial_basis_functions") - pod.fourbody[2] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_twojmax") - pod.snaptwojmax = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_chemflag") - pod.snapchemflag = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_rfac0") - pod.snaprfac0 = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_neighbor_weight1") - pod.snapelementweight[0] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_neighbor_weight2") - pod.snapelementweight[1] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_neighbor_weight3") - pod.snapelementweight[2] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_neighbor_weight4") - pod.snapelementweight[3] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "fourbody_snap_neighbor_weight5") - pod.snapelementweight[4] = utils::numeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic_pod_potential") - pod.quadraticpod = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic22_number_twobody_basis_functions") - pod.quadratic22[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic22_number_twobody_basis_functions") - pod.quadratic22[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic23_number_twobody_basis_functions") - pod.quadratic23[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic23_number_threebody_basis_functions") - pod.quadratic23[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic24_number_twobody_basis_functions") - pod.quadratic24[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic24_number_fourbody_basis_functions") - pod.quadratic24[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic33_number_threebody_basis_functions") - pod.quadratic33[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic33_number_threebody_basis_functions") - pod.quadratic33[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic34_number_threebody_basis_functions") - pod.quadratic34[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic34_number_fourbody_basis_functions") - pod.quadratic34[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic44_number_fourbody_basis_functions") - pod.quadratic44[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "quadratic44_number_fourbody_basis_functions") - pod.quadratic44[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "cubic234_number_twobody_basis_functions") - pod.cubic234[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "cubic234_number_threebody_basis_functions") - pod.cubic234[1] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "cubic234_number_fourbody_basis_functions") - pod.cubic234[2] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "cubic333_number_threebody_basis_functions") - pod.cubic333[0] = utils::inumeric(FLERR,words[1],false,lmp); - if (keywd == "cubic444_number_fourbody_basis_functions") - pod.cubic444[0] = utils::inumeric(FLERR,words[1],false,lmp); - } - } - - pod.twobody[0] = pod.besseldegree; - pod.twobody[1] = pod.inversedegree; - pod.threebody[0] = pod.besseldegree; - pod.threebody[1] = pod.inversedegree; - - // number of snapshots - - pod.ns2 = pod.nbesselpars*pod.twobody[0] + pod.twobody[1]; - pod.ns3 = pod.nbesselpars*pod.threebody[0] + pod.threebody[1]; - pod.ns4 = pod.nbesselpars*pod.fourbody[0] + pod.fourbody[1]; - - for (int i = 0; i < pod.nbesselpars; i++) - if (fabs(pod.besselparams[i]) < 1e-3) pod.besselparams[i] = 1e-3; - - // allocate memory for eigenvectors and eigenvalues - - if (pod.ns2 > 0) { - memory->create(pod.Phi2, pod.ns2*pod.ns2, "pod:pod_Phi2"); - memory->create(pod.Lambda2, pod.ns2, "pod:pod_Lambda2"); - } - if (pod.ns3 > 0) { - memory->create(pod.Phi3, pod.ns3*pod.ns3, "pod:pod_Phi3"); - memory->create(pod.Lambda3, pod.ns3, "pod:pod_Lambda3"); - } - if (pod.ns4 > 0) { - memory->create(pod.Phi4, pod.ns4*pod.ns4, "pod:pod_Phi4"); - memory->create(pod.Lambda4, pod.ns4, "pod:pod_Lambda4"); - } - - if (pod.ns2 > 0) { - podeigenvaluedecomposition(pod.Phi2, pod.Lambda2, pod.besselparams, pod.rin, pod.rcut, - pod.twobody[0], pod.twobody[1], pod.nbesselpars, 2000); - -// /* Print eigenvalues */ -// print_matrix( "Eigenvalues for two-body potential:", 1, pod.ns2, pod.Lambda2, 1 ); -// -// /* Print eigenvectors */ -// print_matrix( "Eigenvectors for two-body potential:", pod.ns2, pod.ns2, pod.Phi2, pod.ns2); - } - if (pod.ns3 > 0) { - podeigenvaluedecomposition(pod.Phi3, pod.Lambda3, pod.besselparams, pod.rin, pod.rcut, - pod.threebody[0], pod.threebody[1], pod.nbesselpars, 2000); - } - if (pod.ns4 > 0) { - podeigenvaluedecomposition(pod.Phi4, pod.Lambda4, pod.besselparams, pod.rin, pod.rcut, - pod.fourbody[0], pod.fourbody[1], pod.nbesselpars, 2000); - } - - // number of chemical combinations - - pod.nc2 = pod.nelements*(pod.nelements+1)/2; - pod.nc3 = pod.nelements*pod.nelements*(pod.nelements+1)/2; - pod.nc4 = pod.snapchemflag ? pod.nelements*pod.nelements*pod.nelements*pod.nelements : pod.nelements; - - // number of basis functions and descriptors for one-body potential - - if (pod.onebody==1) { - pod.nbf1 = 1; - pod.nd1 = pod.nelements; - } else { - pod.nbf1 = 0; - pod.nd1 = 0; - } - - // number of basis functions and descriptors for two-body potential - - pod.nbf2 = pod.twobody[2]; - pod.nd2 = pod.nbf2*pod.nc2; - - // number of basis functions and descriptors for three-body potential - - pod.nrbf3 = pod.threebody[2]; - pod.nabf3 = pod.threebody[3]; - pod.nbf3 = pod.nrbf3*(1 + pod.nabf3); - pod.nd3 = pod.nbf3*pod.nc3; - - // number of basis functions and descriptors for four-body potential - - int twojmax = pod.snaptwojmax; - int idxb_count = 0; - if (twojmax > 0) { - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) idxb_count++; - } - pod.nbf4 = idxb_count; - pod.nd4 = pod.nbf4*pod.nc4; - - if (pod.quadraticpod==1) { - pod.quadratic23[0] = pod.nbf2; - pod.quadratic23[1] = pod.nbf3; - } - - pod.quadratic22[0] = MIN(pod.quadratic22[0], pod.nbf2); - pod.quadratic22[1] = MIN(pod.quadratic22[1], pod.nbf2); - pod.quadratic23[0] = MIN(pod.quadratic23[0], pod.nbf2); - pod.quadratic23[1] = MIN(pod.quadratic23[1], pod.nbf3); - pod.quadratic24[0] = MIN(pod.quadratic24[0], pod.nbf2); - pod.quadratic24[1] = MIN(pod.quadratic24[1], pod.nbf4); - pod.quadratic33[0] = MIN(pod.quadratic33[0], pod.nbf3); - pod.quadratic33[1] = MIN(pod.quadratic33[1], pod.nbf3); - pod.quadratic34[0] = MIN(pod.quadratic34[0], pod.nbf3); - pod.quadratic34[1] = MIN(pod.quadratic34[1], pod.nbf4); - pod.quadratic44[0] = MIN(pod.quadratic44[0], pod.nbf4); - pod.quadratic44[1] = MIN(pod.quadratic44[1], pod.nbf4); - - pod.cubic234[0] = MIN(pod.cubic234[0], pod.nbf2); - pod.cubic234[1] = MIN(pod.cubic234[1], pod.nbf3); - pod.cubic234[2] = MIN(pod.cubic234[2], pod.nbf4); - pod.cubic333[0] = MIN(pod.cubic333[0], pod.nbf3); - pod.cubic333[1] = MIN(pod.cubic333[0], pod.nbf3); - pod.cubic333[2] = MIN(pod.cubic333[0], pod.nbf3); - pod.cubic444[0] = MIN(pod.cubic444[0], pod.nbf4); - pod.cubic444[1] = MIN(pod.cubic444[0], pod.nbf4); - pod.cubic444[2] = MIN(pod.cubic444[0], pod.nbf4); - - // number of descriptors for quadratic POD potentials - - pod.nd22 = pod.quadratic22[0]*pod.quadratic22[1]*pod.nc2*pod.nc2; - pod.nd23 = pod.quadratic23[0]*pod.quadratic23[1]*pod.nc2*pod.nc3; - pod.nd24 = pod.quadratic24[0]*pod.quadratic24[1]*pod.nc2*pod.nc4; - pod.nd33 = pod.quadratic33[0]*pod.quadratic33[1]*pod.nc3*pod.nc3; - pod.nd34 = pod.quadratic34[0]*pod.quadratic34[1]*pod.nc3*pod.nc4; - pod.nd44 = pod.quadratic44[0]*pod.quadratic44[1]*pod.nc4*pod.nc4; - - int nq; - nq = pod.quadratic22[0]*pod.nc2; pod.nd22 = nq*(nq+1)/2; - nq = pod.quadratic33[0]*pod.nc3; pod.nd33 = nq*(nq+1)/2; - nq = pod.quadratic44[0]*pod.nc4; pod.nd44 = nq*(nq+1)/2; - - // number of descriptors for cubic POD potentials - - pod.nd234 = pod.cubic234[0]*pod.cubic234[1]*pod.cubic234[2]*pod.nc2*pod.nc3*pod.nc4; - nq = pod.cubic333[0]*pod.nc3; pod.nd333 = nq*(nq+1)*(nq+2)/6; - nq = pod.cubic444[0]*pod.nc4; pod.nd444 = nq*(nq+1)*(nq+2)/6; - - // total number of descriptors for all POD potentials - - pod.nd = pod.nd1 + pod.nd2 + pod.nd3 + pod.nd4 + pod.nd22 + pod.nd23 + pod.nd24 + - pod.nd33 + pod.nd34 + pod.nd44 + pod.nd234 + pod.nd333 + pod.nd444; - pod.nd1234 = pod.nd1 + pod.nd2 + pod.nd3 + pod.nd4; - - int nelements = pod.nelements; - delete[] pod.elemindex; - pod.elemindex = new int[nelements*nelements]; - - int k = 1; - for (int i=0; i < nelements; i++) { - for (int j=i; j < nelements; j++) { - pod.elemindex[i + nelements*j] = k; - pod.elemindex[j + nelements*i] = k; - k += 1; - } - } - - if (comm->me == 0) { - utils::logmesg(lmp, "**************** Begin of POD Potentials ****************\n"); - utils::logmesg(lmp, "species: "); - for (int i=0; ime == 0) { - - fpcoeff = utils::open_potential(coefffilename,lmp,nullptr); - if (fpcoeff == nullptr) - error->one(FLERR,"Cannot open POD coefficient file {}: ", - coefffilename, utils::getsyserror()); - } - - // check format for first line of file - - char line[MAXLINE] = {'\0'}; - char *ptr; - int eof = 0; - int nwords = 0; - while (nwords == 0) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpcoeff); - if (ptr == nullptr) { - eof = 1; - fclose(fpcoeff); - } - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); - - // strip comment, skip line if blank - - nwords = utils::count_words(utils::trim_comment(line)); - } - - if (nwords != 2) - error->all(FLERR,"Incorrect format in POD coefficient file"); - - // strip single and double quotes from words - - int ncoeffall; - std::string tmp_str; - try { - ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f"); - tmp_str = words.next_string(); - ncoeffall = words.next_int(); - } catch (TokenizerException &e) { - error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what()); - } - - // loop over single block of coefficients and insert values in pod.coeff - - memory->create(pod.coeff, ncoeffall, "pod:pod_coeff"); - - for (int icoeff = 0; icoeff < ncoeffall; icoeff++) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpcoeff); - if (ptr == nullptr) { - eof = 1; - fclose(fpcoeff); - } - } - - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) - error->all(FLERR,"Incorrect format in POD coefficient file"); - MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world); - - try { - ValueTokenizer coeff(utils::trim_comment(line)); - if (coeff.count() != 1) - error->all(FLERR,"Incorrect format in POD coefficient file"); - - pod.coeff[icoeff] = coeff.next_double(); - } catch (TokenizerException &e) { - error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what()); - } - } - if (comm->me == 0) { - if (!eof) fclose(fpcoeff); - } -} - -/*********************************************************************************************************/ - -void MLPOD::linear_descriptors(double *gd, double *efatom, double *y, double *tmpmem, - int *atomtype, int *alist, int *pairlist, int * /*pairnum*/, - int *pairnumsum, int *tmpint, int natom, int Nij) -{ - int dim = 3; - int nelements = pod.nelements; - int nbesselpars = pod.nbesselpars; - int nrbf2 = pod.nbf2; - int nabf3 = pod.nabf3; - int nrbf3 = pod.nrbf3; - int nd1 = pod.nd1; - int nd2 = pod.nd2; - int nd3 = pod.nd3; - int nd4 = pod.nd4; - int nd1234 = nd1+nd2+nd3+nd4; - int *pdegree2 = pod.twobody; - int *elemindex = pod.elemindex; - double rin = pod.rin; - double rcut = pod.rcut; - double *Phi2 = pod.Phi2; - double *besselparams = pod.besselparams; - - double *fatom1 = &efatom[0]; - double *fatom2 = &efatom[dim*natom*(nd1)]; - double *fatom3 = &efatom[dim*natom*(nd1+nd2)]; - double *fatom4 = &efatom[dim*natom*(nd1+nd2+nd3)]; - double *eatom1 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)]; - double *eatom2 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*nd1]; - double *eatom3 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*(nd1+nd2)]; - double *eatom4 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*(nd1+nd2+nd3)]; - - podArraySetValue(fatom1, 0.0, (1+dim)*natom*(nd1+nd2+nd3+nd4)); - - double *rij = &tmpmem[0]; // 3*Nij - int *ai = &tmpint[0]; // Nij - int *aj = &tmpint[Nij]; // Nij - int *ti = &tmpint[2*Nij]; // Nij - int *tj = &tmpint[3*Nij]; // Nij - podNeighPairs(rij, y, ai, aj, ti, tj, pairlist, pairnumsum, atomtype, - alist, natom, dim); - - // peratom descriptors for one-body, two-body, and three-body linear potentials - - poddesc(eatom1, fatom1, eatom2, fatom2, eatom3, fatom3, rij, Phi2, besselparams, - &tmpmem[3*Nij], rin, rcut, pairnumsum, atomtype, ai, aj, ti, tj, elemindex, pdegree2, - nbesselpars, nrbf2, nrbf3, nabf3, nelements, Nij, natom); - - if (pod.snaptwojmax > 0) - snapdesc(eatom4, fatom4, rij, &tmpmem[3*Nij], atomtype, ai, aj, ti, tj, natom, Nij); - - // global descriptors for one-body, two-body, three-body, and four-bodt linear potentials - - podArraySetValue(tmpmem, 1.0, natom); - - char cht = 'T'; - double one = 1.0, zero = 0.0; - int inc1 = 1; - DGEMV(&cht, &natom, &nd1234, &one, eatom1, &natom, tmpmem, &inc1, &zero, gd, &inc1); -} - -void MLPOD::quadratic_descriptors(double* d23, double *dd23, double* d2, double *d3, double* dd2, double *dd3, - int M2, int M3, int N) -{ - for (int m3 = 0; m3 0) energy += quadratic_coefficients(c2, d2, coeff22, pod.quadratic22, nc2); - - // calculate energy for quadratic23 potential - - if (nd23 > 0) energy += quadratic_coefficients(c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3); - - // calculate energy for quadratic24 potential - - if (nd24 > 0) energy += quadratic_coefficients(c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4); - - // calculate energy for quadratic33 potential - - if (nd33 > 0) energy += quadratic_coefficients(c3, d3, coeff33, pod.quadratic33, nc3); - - // calculate energy for quadratic34 potential - - if (nd34 > 0) energy += quadratic_coefficients(c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4); - - // calculate energy for quadratic44 potential - - if (nd44 > 0) energy += quadratic_coefficients(c4, d4, coeff44, pod.quadratic44, nc4); - - // calculate energy for cubic234 potential - - if (nd234 > 0) energy += cubic_coefficients(c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4); - - // calculate energy for cubic333 potential - - if (nd333 > 0) energy += cubic_coefficients(c3, d3, coeff333, pod.cubic333, nc3); - - // calculate energy for cubic444 potential - - if (nd444 > 0) energy += cubic_coefficients(c4, d4, coeff444, pod.cubic444, nc4); - - // calculate effective POD coefficients - - for (int i=0; i< nd1234; i++) c1[i] += coeff[i]; - - // calculate force = gdd * c1 - - char chn = 'N'; - double one = 1.0, zero = 0.0; - int inc1 = 1; - DGEMV(&chn, &nforce, &nd1234, &one, gdd, &nforce, c1, &inc1, &zero, force, &inc1); - - return energy; -} - -double MLPOD::energyforce_calculation(double *force, double *gd, double *gdd, double *coeff, double *y, - int *atomtype, int *alist, int *pairlist, int *pairnum, int *pairnumsum, int *tmpint, int natom, int Nij) -{ - int dim = 3; - int nd1234 = pod.nd1+pod.nd2+pod.nd3+pod.nd4; - double *tmpmem = &gdd[dim*natom*nd1234+natom*nd1234]; - - // calculate POD and SNAP descriptors and their derivatives - - linear_descriptors(gd, gdd, y, tmpmem, atomtype, alist, - pairlist, pairnum, pairnumsum, tmpint, natom, Nij); - - // calculate energy and force - - double energy = 0.0; - energy = calculate_energyforce(force, gd, gdd, coeff, &gdd[dim*natom*nd1234], natom); - - return energy; -} - -void MLPOD::podNeighPairs(double *xij, double *x, int *ai, int *aj, int *ti, int *tj, - int *pairlist, int *pairnumsum, int *atomtype, int *alist, int inum, int dim) -{ - for (int ii=0; ii j) - ik = lk + s; - k = aj[ik]; // atom k - typek = tj[ik] - 1; - xik1 = yij[0+dim*ik]; // xk - xi - xik2 = yij[1+dim*ik]; // xk - xi - xik3 = yij[2+dim*ik]; // xk - xi - riksq = xik1*xik1 + xik2*xik2 + xik3*xik3; - rik = sqrt(riksq); - - xdot = xij1*xik1 + xij2*xik2 + xij3*xik3; - costhe = xdot/(rij*rik); - costhe = costhe > 1.0 ? 1.0 : costhe; - costhe = costhe < -1.0 ? -1.0 : costhe; - xdot = costhe*(rij*rik); - - sinthe = sqrt(1.0 - costhe*costhe); - sinthe = sinthe > 1e-12 ? sinthe : 1e-12; - theta = acos(costhe); - dtheta = -1.0/sinthe; - - tm1 = 1.0/(rij*rijsq*rik); - tm2 = 1.0/(rij*riksq*rik); - dct1 = (xik1*rijsq - xij1*xdot)*tm1; - dct2 = (xik2*rijsq - xij2*xdot)*tm1; - dct3 = (xik3*rijsq - xij3*xdot)*tm1; - dct4 = (xij1*riksq - xik1*xdot)*tm2; - dct5 = (xij2*riksq - xik2*xdot)*tm2; - dct6 = (xij3*riksq - xik3*xdot)*tm2; - - for (int p=0; p = j1) idxb_count++; - - int idxb_max = idxb_count; - idx_max[2] = idxb_max; - - idxb_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - idxb[idxb_count*3 + 0] = j1; - idxb[idxb_count*3 + 1] = j2; - idxb[idxb_count*3 + 2] = j; - idxb_count++; - } - - idxb_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) { - idxb_block[j + j2*jdim + j1*jdim*jdim] = idxb_count; - idxb_count++; - } - } - - // index list for zlist - - int idxz_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) - for (int mb = 0; 2*mb <= j; mb++) - for (int ma = 0; ma <= j; ma++) - idxz_count++; - - int idxz_max = idxz_count; - idx_max[3] = idxz_max; - - idxz_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - idxz_block[j + j2*jdim + j1*jdim*jdim] = idxz_count; - - for (int mb = 0; 2*mb <= j; mb++) - for (int ma = 0; ma <= j; ma++) { - - idxz[idxz_count*10 + 0] = j1; - idxz[idxz_count*10 + 1] = j2; - idxz[idxz_count*10 + 2] = j; - idxz[idxz_count*10 + 3] = MAX(0, (2 * ma - j - j2 + j1) / 2); - idxz[idxz_count*10 + 4] = (2 * ma - j - (2 * idxz[idxz_count*10 + 3] - j1) + j2) / 2; - idxz[idxz_count*10 + 5] = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count*10 + 3] + 1; - idxz[idxz_count*10 + 6] = MAX(0, (2 * mb - j - j2 + j1) / 2); - idxz[idxz_count*10 + 7] = (2 * mb - j - (2 * idxz[idxz_count*10 + 6] - j1) + j2) / 2; - idxz[idxz_count*10 + 8] = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count*10 + 6] + 1; - - const int jju = idxu_block[j] + (j+1)*mb + ma; - idxz[idxz_count*10 + 9] = jju; - - idxz_count++; - } - } -}; - -void snapInitRootpqArray(double *rootpqarray, int twojmax) -{ - int jdim = twojmax + 1; - for (int p = 1; p <= twojmax; p++) - for (int q = 1; q <= twojmax; q++) - rootpqarray[p*jdim + q] = sqrt(((double) p)/q); -}; - -double snapDeltacg(double *factorial, int j1, int j2, int j) -{ - double sfaccg = factorial[(j1 + j2 + j) / 2 + 1]; - return sqrt(factorial[(j1 + j2 - j) / 2] * - factorial[(j1 - j2 + j) / 2] * - factorial[(-j1 + j2 + j) / 2] / sfaccg); -}; - -void snapInitClebschGordan(double *cglist, double *factorial, int twojmax) -{ - double sum,dcg,sfaccg; - int m, aa2, bb2, cc2; - int ifac; - - int idxcg_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - for (int m1 = 0; m1 <= j1; m1++) { - aa2 = 2 * m1 - j1; - - for (int m2 = 0; m2 <= j2; m2++) { - - bb2 = 2 * m2 - j2; - m = (aa2 + bb2 + j) / 2; - - if(m < 0 || m > j) { - cglist[idxcg_count] = 0.0; - idxcg_count++; - continue; - } - - sum = 0.0; - - for (int z = MAX(0, MAX(-(j - j2 + aa2) - / 2, -(j - j1 - bb2) / 2)); - z <= MIN((j1 + j2 - j) / 2, - MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); - z++) { - ifac = z % 2 ? -1 : 1; - sum += ifac / - (factorial[z] * - factorial[(j1 + j2 - j) / 2 - z] * - factorial[(j1 - aa2) / 2 - z] * - factorial[(j2 + bb2) / 2 - z] * - factorial[(j - j2 + aa2) / 2 + z] * - factorial[(j - j1 - bb2) / 2 + z]); - } - - cc2 = 2 * m - j; - dcg = snapDeltacg(factorial, j1, j2, j); - sfaccg = sqrt(factorial[(j1 + aa2) / 2] * - factorial[(j1 - aa2) / 2] * - factorial[(j2 + bb2) / 2] * - factorial[(j2 - bb2) / 2] * - factorial[(j + cc2) / 2] * - factorial[(j - cc2) / 2] * - (j + 1)); - - cglist[idxcg_count] = sum * dcg * sfaccg; - idxcg_count++; - } - } - } -} - -void snapInitSna(double *rootpqarray, double *cglist, double *factorial, int *idx_max, int *idxz, - int *idxz_block, int *idxb, int *idxb_block, int *idxu_block, int *idxcg_block, int twojmax) -{ - snapBuildIndexList(idx_max, idxz, idxz_block, idxb, - idxb_block, idxu_block, idxcg_block, twojmax); - - snapInitRootpqArray(rootpqarray, twojmax); - snapInitClebschGordan(cglist, factorial, twojmax); -} - -void MLPOD::snapSetup(int twojmax, int ntypes) -{ - sna.twojmax = twojmax; - sna.ntypes = ntypes; - - int jdim = twojmax + 1; - int jdimpq = twojmax + 2; - - memory->create(sna.map, ntypes+1, "pod:sna_map"); - memory->create(sna.idxcg_block, jdim*jdim*jdim, "pod:sna_idxcg_block"); - memory->create(sna.idxz_block, jdim*jdim*jdim, "pod:sna_idxz_block"); - memory->create(sna.idxb_block, jdim*jdim*jdim, "pod:sna_idxb_block"); - memory->create(sna.idxu_block, jdim, "pod:sna_idxu_block"); - memory->create(sna.idx_max, 5, "pod:sna_idx_max"); - - int idxb_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) idxb_count++; - - int idxz_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) - for (int mb = 0; 2*mb <= j; mb++) - for (int ma = 0; ma <= j; ma++) - idxz_count++; - - int idxcg_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - for (int m1 = 0; m1 <= j1; m1++) - for (int m2 = 0; m2 <= j2; m2++) - idxcg_count++; - } - - memory->create(sna.idxz, idxz_count*10, "pod:sna_idxz"); - memory->create(sna.idxb, idxb_count*3, "pod:sna_idxb"); - memory->create(sna.rcutsq, (ntypes+1)*(ntypes+1), "pod:sna_rcutsq"); - memory->create(sna.radelem, ntypes+1, "pod:sna_radelem"); - memory->create(sna.wjelem, ntypes+1, "pod:sna_wjelem"); - memory->create(sna.rootpqarray, jdimpq*jdimpq, "pod:sna_rootpqarray"); - memory->create(sna.cglist, idxcg_count, "pod:sna_cglist"); - memory->create(sna.bzero, jdim, "pod:sna_bzero"); - memory->create(sna.fac, 168, "pod:sna_fac"); - - for (int i=0; i rcutij) { - sfac = 0.0; - dsfac = 0.0; - } - else { - double rcutfac0 = MY_PI / (rcutij - rmin0); - sfac = 0.5 * (cos((r - rmin0) * rcutfac0) + 1.0); - dsfac = -0.5 * sin((r - rmin0) * rcutfac0) * rcutfac0; - } - } - sfac *= wjelem[tj[ij]]; - dsfac *= wjelem[tj[ij]]; - - double r0inv, dr0invdr; - double a_r, a_i, b_r, b_i; - double da_r[3], da_i[3], db_r[3], db_i[3]; - double dz0[3], dr0inv[3]; - double rootpq; - int jdim = twojmax + 1; - - r0inv = 1.0 / sqrt(r * r + z0 * z0); - a_r = r0inv * z0; - a_i = -r0inv * z; - b_r = r0inv * y; - b_i = -r0inv * x; - - dr0invdr = -cube(r0inv) * (r + z0 * dz0dr); - - dr0inv[0] = dr0invdr * ux; - dr0inv[1] = dr0invdr * uy; - dr0inv[2] = dr0invdr * uz; - - dz0[0] = dz0dr * ux; - dz0[1] = dz0dr * uy; - dz0[2] = dz0dr * uz; - - for (int k = 0; k < 3; k++) { - da_r[k] = dz0[k] * r0inv + z0 * dr0inv[k]; - da_i[k] = -z * dr0inv[k]; - } - da_i[2] += -r0inv; - - for (int k = 0; k < 3; k++) { - db_r[k] = y * dr0inv[k]; - db_i[k] = -x * dr0inv[k]; - } - db_i[0] += -r0inv; - db_r[1] += r0inv; - - Sr[ij+0*ijnum] = 1.0; - Si[ij+0*ijnum] = 0.0; - Srx[ij+0*ijnum] = 0.0; - Six[ij+0*ijnum] = 0.0; - Sry[ij+0*ijnum] = 0.0; - Siy[ij+0*ijnum] = 0.0; - Srz[ij+0*ijnum] = 0.0; - Siz[ij+0*ijnum] = 0.0; - for (int j = 1; j <= twojmax; j++) { - int jju = idxu_block[j]; - int jjup = idxu_block[j-1]; - - // fill in left side of matrix layer from previous layer - - for (int mb = 0; 2*mb <= j; mb++) { - Sr[ij+jju*ijnum] = 0.0; - Si[ij+jju*ijnum] = 0.0; - Srx[ij+jju*ijnum] = 0.0; - Six[ij+jju*ijnum] = 0.0; - Sry[ij+jju*ijnum] = 0.0; - Siy[ij+jju*ijnum] = 0.0; - Srz[ij+jju*ijnum] = 0.0; - Siz[ij+jju*ijnum] = 0.0; - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[(j - ma)*jdim + (j - mb)]; - int njju = ij+jju*ijnum; - int njju1 = ij+(jju+1)*ijnum; - int njjup = ij+jjup*ijnum; - double u_r = Sr[njjup]; - double u_i = Si[njjup]; - double ux_r = Srx[njjup]; - double ux_i = Six[njjup]; - double uy_r = Sry[njjup]; - double uy_i = Siy[njjup]; - double uz_r = Srz[njjup]; - double uz_i = Siz[njjup]; - - Sr[njju] += rootpq * (a_r * u_r + a_i * u_i); - Si[njju] += rootpq * (a_r * u_i - a_i * u_r); - Srx[njju] += rootpq * (da_r[0] * u_r + da_i[0] * u_i + a_r * ux_r + a_i * ux_i); - Six[njju] += rootpq * (da_r[0] * u_i - da_i[0] * u_r + a_r * ux_i - a_i * ux_r); - Sry[njju] += rootpq * (da_r[1] * u_r + da_i[1] * u_i + a_r * uy_r + a_i * uy_i); - Siy[njju] += rootpq * (da_r[1] * u_i - da_i[1] * u_r + a_r * uy_i - a_i * uy_r); - Srz[njju] += rootpq * (da_r[2] * u_r + da_i[2] * u_i + a_r * uz_r + a_i * uz_i); - Siz[njju] += rootpq * (da_r[2] * u_i - da_i[2] * u_r + a_r * uz_i - a_i * uz_r); - - rootpq = rootpqarray[(ma + 1)*jdim + (j - mb)]; - Sr[njju1] = -rootpq * (b_r * u_r + b_i * u_i); - Si[njju1] = -rootpq * (b_r * u_i - b_i * u_r); - Srx[njju1] = -rootpq * (db_r[0] * u_r + db_i[0] * u_i + b_r * ux_r + b_i * ux_i); - Six[njju1] = -rootpq * (db_r[0] * u_i - db_i[0] * u_r + b_r * ux_i - b_i * ux_r); - Sry[njju1] = -rootpq * (db_r[1] * u_r + db_i[1] * u_i + b_r * uy_r + b_i * uy_i); - Siy[njju1] = -rootpq * (db_r[1] * u_i - db_i[1] * u_r + b_r * uy_i - b_i * uy_r); - Srz[njju1] = -rootpq * (db_r[2] * u_r + db_i[2] * u_i + b_r * uz_r + b_i * uz_i); - Siz[njju1] = -rootpq * (db_r[2] * u_i - db_i[2] * u_r + b_r * uz_i - b_i * uz_r); - jju++; - jjup++; - } - jju++; - } - - jju = idxu_block[j]; - jjup = jju+(j+1)*(j+1)-1; - int mbpar = 1; - for (int mb = 0; 2*mb <= j; mb++) { - int mapar = mbpar; - for (int ma = 0; ma <= j; ma++) { - int njju = ij+jju*ijnum; - int njjup = ij+jjup*ijnum; - if (mapar == 1) { - Sr[njjup] = Sr[njju]; - Si[njjup] = -Si[njju]; - if (j%2==1 && mb==(j/2)) { - Srx[njjup] = Srx[njju]; - Six[njjup] = -Six[njju]; - Sry[njjup] = Sry[njju]; - Siy[njjup] = -Siy[njju]; - Srz[njjup] = Srz[njju]; - Siz[njjup] = -Siz[njju]; - } - } else { - Sr[njjup] = -Sr[njju]; - Si[njjup] = Si[njju]; - if (j%2==1 && mb==(j/2)) { - Srx[njjup] = -Srx[njju]; - Six[njjup] = Six[njju]; - Sry[njjup] = -Sry[njju]; - Siy[njjup] = Siy[njju]; - Srz[njjup] = -Srz[njju]; - Siz[njjup] = Siz[njju]; - } - } - mapar = -mapar; - jju++; - jjup--; - } - mbpar = -mbpar; - } - } - - for (int j = 0; j <= twojmax; j++) { - int jju = idxu_block[j]; - for (int mb = 0; 2*mb <= j; mb++) - for (int ma = 0; ma <= j; ma++) { - int ijk = ij+jju*ijnum; - Srx[ijk] = dsfac * Sr[ijk] * ux + sfac * Srx[ijk]; - Six[ijk] = dsfac * Si[ijk] * ux + sfac * Six[ijk]; - Sry[ijk] = dsfac * Sr[ijk] * uy + sfac * Sry[ijk]; - Siy[ijk] = dsfac * Si[ijk] * uy + sfac * Siy[ijk]; - Srz[ijk] = dsfac * Sr[ijk] * uz + sfac * Srz[ijk]; - Siz[ijk] = dsfac * Si[ijk] * uz + sfac * Siz[ijk]; - jju++; - } - } - - for (int k=0; k 1e-20) { - rij[ninside*3 + 0] = delx; - rij[ninside*3 + 1] = dely; - rij[ninside*3 + 2] = delz; - idxi[ninside] = ii; - ai[ninside] = gi; - aj[ninside] = gj; - ti[ninside] = itype; - tj[ninside] = atomtype[gj]; - ninside++; - pairnumsum[ii+1] += 1; - } - } - } - - pairnumsum[0] = 0; - for (int ii=0; ii j) - ik = lk + s; - typek = tj[ik] - 1; - xik1 = yij[0+dim*ik]; // xk - xi - xik2 = yij[1+dim*ik]; // xk - xi - xik3 = yij[2+dim*ik]; // xk - xi s - riksq = xik1*xik1 + xik2*xik2 + xik3*xik3; - rik = sqrt(riksq); - - xdot = xij1*xik1 + xij2*xik2 + xij3*xik3; - costhe = xdot/(rij*rik); - costhe = costhe > 1.0 ? 1.0 : costhe; - costhe = costhe < -1.0 ? -1.0 : costhe; - theta = acos(costhe); - - for (int p=0; p rcutij) { - sfac = 0.0; - } - else { - double rcutfac0 = MY_PI / (rcutij - rmin0); - sfac = 0.5 * (cos((r - rmin0) * rcutfac0) + 1.0); - } - } - sfac *= wjelem[tj[ij]]; - - double r0inv; - double a_r, a_i, b_r, b_i; - double rootpq; - int jdim = twojmax + 1; - - r0inv = 1.0 / sqrt(r * r + z0 * z0); - a_r = r0inv * z0; - a_i = -r0inv * z; - b_r = r0inv * y; - b_i = -r0inv * x; - - Sr[ij+0*ijnum] = 1.0; - Si[ij+0*ijnum] = 0.0; - for (int j = 1; j <= twojmax; j++) { - int jju = idxu_block[j]; - int jjup = idxu_block[j-1]; - - // fill in left side of matrix layer from previous layer - - for (int mb = 0; 2*mb <= j; mb++) { - Sr[ij+jju*ijnum] = 0.0; - Si[ij+jju*ijnum] = 0.0; - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[(j - ma)*jdim + (j - mb)]; - int njju = ij+jju*ijnum; - int njju1 = ij+(jju+1)*ijnum; - int njjup = ij+jjup*ijnum; - double u_r = Sr[njjup]; - double u_i = Si[njjup]; - - Sr[njju] += rootpq * (a_r * u_r + a_i * u_i); - Si[njju] += rootpq * (a_r * u_i - a_i * u_r); - - rootpq = rootpqarray[(ma + 1)*jdim + (j - mb)]; - Sr[njju1] = -rootpq * (b_r * u_r + b_i * u_i); - Si[njju1] = -rootpq * (b_r * u_i - b_i * u_r); - jju++; - jjup++; - } - jju++; - } - - jju = idxu_block[j]; - jjup = jju+(j+1)*(j+1)-1; - int mbpar = 1; - for (int mb = 0; 2*mb <= j; mb++) { - int mapar = mbpar; - for (int ma = 0; ma <= j; ma++) { - int njju = ij+jju*ijnum; - int njjup = ij+jjup*ijnum; - if (mapar == 1) { - Sr[njjup] = Sr[njju]; - Si[njjup] = -Si[njju]; - } else { - Sr[njjup] = -Sr[njju]; - Si[njjup] = Si[njju]; - } - mapar = -mapar; - jju++; - jjup--; - } - mbpar = -mbpar; - } - } - - for (int k=0; k 0) - snapdesc_ij(eatom4, rij, tmpmem, atomtype, idxi, ti, tj, natom, Nij); - - // global descriptors for one-body, two-body, three-body, and four-bodt linear potentials - - podArraySetValue(tmpmem, 1.0, natom); - - char cht = 'T'; - double one = 1.0; - int inc1 = 1; - DGEMV(&cht, &natom, &nd1234, &one, eatom1, &natom, tmpmem, &inc1, &one, gd, &inc1); -} - -double MLPOD::calculate_energy(double *effectivecoeff, double *gd, double *coeff) -{ - int nd1 = pod.nd1; - int nd2 = pod.nd2; - int nd3 = pod.nd3; - int nd4 = pod.nd4; - int nd1234 = nd1+nd2+nd3+nd4; - int nd22 = pod.nd22; - int nd23 = pod.nd23; - int nd24 = pod.nd24; - int nd33 = pod.nd33; - int nd34 = pod.nd34; - int nd44 = pod.nd44; - int nd234 = pod.nd234; - int nd333 = pod.nd333; - int nd444 = pod.nd444; - int nc2 = pod.nc2; - int nc3 = pod.nc3; - int nc4 = pod.nc4; - - // two-body, three-body, and four-body descriptors - - double *d2 = &gd[nd1]; - double *d3 = &gd[nd1+nd2]; - double *d4 = &gd[nd1+nd2+nd3]; - - // quadratic and cubic POD coefficients - - double *coeff22 = &coeff[nd1234]; - double *coeff23 = &coeff[nd1234+nd22]; - double *coeff24 = &coeff[nd1234+nd22+nd23]; - double *coeff33 = &coeff[nd1234+nd22+nd23+nd24]; - double *coeff34 = &coeff[nd1234+nd22+nd23+nd24+nd33]; - double *coeff44 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34]; - double *coeff234 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44]; - double *coeff333 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234]; - double *coeff444 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333]; - - // calculate energy for linear potentials - - double energy = 0.0; - for (int i=0; i< nd1234; i++) { - effectivecoeff[i] = 0.0; - energy += coeff[i]*gd[i]; - } - - // effective POD coefficients for calculating force - - double *c2 = &effectivecoeff[nd1]; - double *c3 = &effectivecoeff[nd1+nd2]; - double *c4 = &effectivecoeff[nd1+nd2+nd3]; - - // calculate energy for quadratic22 potential - - if (nd22 > 0) energy += quadratic_coefficients(c2, d2, coeff22, pod.quadratic22, nc2); - - // calculate energy for quadratic23 potential - - if (nd23 > 0) energy += quadratic_coefficients(c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3); - - // calculate energy for quadratic24 potential - - if (nd24 > 0) energy += quadratic_coefficients(c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4); - - // calculate energy for quadratic33 potential - - if (nd33 > 0) energy += quadratic_coefficients(c3, d3, coeff33, pod.quadratic33, nc3); - - // calculate energy for quadratic34 potential - - if (nd34 > 0) energy += quadratic_coefficients(c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4); - - // calculate energy for quadratic44 potential - - if (nd44 > 0) energy += quadratic_coefficients(c4, d4, coeff44, pod.quadratic44, nc4); - - // calculate energy for cubic234 potential - - if (nd234 > 0) energy += cubic_coefficients(c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4); - - // calculate energy for cubic333 potential - - if (nd333 > 0) energy += cubic_coefficients(c3, d3, coeff333, pod.cubic333, nc3); - - // calculate energy for cubic444 potential - - if (nd444 > 0) energy += cubic_coefficients(c4, d4, coeff444, pod.cubic444, nc4); - - // calculate effective POD coefficients - - for (int i=0; i< nd1234; i++) effectivecoeff[i] += coeff[i]; - - return energy; -} - -double MLPOD::calculate_energy(double *energycoeff, double *forcecoeff, double *gd, - double *gdall, double *coeff) -{ - int nd1 = pod.nd1; - int nd2 = pod.nd2; - int nd3 = pod.nd3; - int nd4 = pod.nd4; - int nd1234 = nd1+nd2+nd3+nd4; - int nd22 = pod.nd22; - int nd23 = pod.nd23; - int nd24 = pod.nd24; - int nd33 = pod.nd33; - int nd34 = pod.nd34; - int nd44 = pod.nd44; - int nd234 = pod.nd234; - int nd333 = pod.nd333; - int nd444 = pod.nd444; - int nc2 = pod.nc2; - int nc3 = pod.nc3; - int nc4 = pod.nc4; - - // quadratic and cubic POD coefficients - - double *coeff22 = &coeff[nd1234]; - double *coeff23 = &coeff[nd1234+nd22]; - double *coeff24 = &coeff[nd1234+nd22+nd23]; - double *coeff33 = &coeff[nd1234+nd22+nd23+nd24]; - double *coeff34 = &coeff[nd1234+nd22+nd23+nd24+nd33]; - double *coeff44 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34]; - double *coeff234 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44]; - double *coeff333 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234]; - double *coeff444 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333]; - - // sum global descriptors over all MPI ranks - - MPI_Allreduce(gd, gdall, nd1234, MPI_DOUBLE, MPI_SUM, world); - - for (int i=0; i< nd1234; i++) { - energycoeff[i] = 0.0; - forcecoeff[i] = 0.0; - } - - // effective POD coefficients for calculating force - - double *c2 = &forcecoeff[nd1]; - double *c3 = &forcecoeff[nd1+nd2]; - double *c4 = &forcecoeff[nd1+nd2+nd3]; - - // effective POD coefficients for calculating energy - - double *ce2 = &energycoeff[nd1]; - double *ce3 = &energycoeff[nd1+nd2]; - double *ce4 = &energycoeff[nd1+nd2+nd3]; - - // two-body, three-body, and four-body descriptors - - double *d2 = &gdall[nd1]; - double *d3 = &gdall[nd1+nd2]; - double *d4 = &gdall[nd1+nd2+nd3]; - - // calculate energy for quadratic22 potential - - if (nd22 > 0) quadratic_coefficients(ce2, c2, d2, coeff22, pod.quadratic22, nc2); - - // calculate energy for quadratic23 potential - - if (nd23 > 0) quadratic_coefficients(ce2, ce3, c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3); - - // calculate energy for quadratic24 potential - - if (nd24 > 0) quadratic_coefficients(ce2, ce4, c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4); - - // calculate energy for quadratic33 potential - - if (nd33 > 0) quadratic_coefficients(ce3, c3, d3, coeff33, pod.quadratic33, nc3); - - // calculate energy for quadratic34 potential - - if (nd34 > 0) quadratic_coefficients(ce3, ce4, c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4); - - // calculate energy for quadratic44 potential - - if (nd44 > 0) quadratic_coefficients(ce4, c4, d4, coeff44, pod.quadratic44, nc4); - - // calculate energy for cubic234 potential - - if (nd234 > 0) cubic_coefficients(ce2, ce3, ce4, c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4); - - // calculate energy for cubic333 potential - - if (nd333 > 0) cubic_coefficients(ce3, c3, d3, coeff333, pod.cubic333, nc3); - - // calculate energy for cubic444 potential - - if (nd444 > 0) cubic_coefficients(ce4, c4, d4, coeff444, pod.cubic444, nc4); - - // calculate effective POD coefficients - - for (int i=0; i< nd1234; i++) { - energycoeff[i] += coeff[i]; - forcecoeff[i] += coeff[i]; - } - - // calculate energy - - double energy = 0.0; - for (int i=0; i< nd1234; i++) - energy += energycoeff[i]*gd[i]; - - return energy; -} - -void MLPOD::pod2body_force(double *force, double *fij, double *coeff2, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int nelements, int nbf, int /*natom*/, int N) -{ - int nelements2 = nelements*(nelements+1)/2; - for (int n=0; n j) - ik = lk + s; - k = aj[ik]; // atom k - typek = tj[ik] - 1; - xik1 = yij[0+dim*ik]; // xk - xi - xik2 = yij[1+dim*ik]; // xk - xi - xik3 = yij[2+dim*ik]; // xk - xi s - riksq = xik1*xik1 + xik2*xik2 + xik3*xik3; - rik = sqrt(riksq); - - xdot = xij1*xik1 + xij2*xik2 + xij3*xik3; - costhe = xdot/(rij*rik); - costhe = costhe > 1.0 ? 1.0 : costhe; - costhe = costhe < -1.0 ? -1.0 : costhe; - xdot = costhe*(rij*rik); - - sinthe = sqrt(1.0 - costhe*costhe); - sinthe = sinthe > 1e-12 ? sinthe : 1e-12; - theta = acos(costhe); - dtheta = -1.0/sinthe; - - tm1 = 1.0/(rij*rijsq*rik); - tm2 = 1.0/(rij*riksq*rik); - dct1 = (xik1*rijsq - xij1*xdot)*tm1; - dct2 = (xik2*rijsq - xij2*xdot)*tm1; - dct3 = (xik3*rijsq - xij3*xdot)*tm1; - dct4 = (xij1*riksq - xik1*xdot)*tm2; - dct5 = (xij2*riksq - xik2*xdot)*tm2; - dct6 = (xij3*riksq - xik3*xdot)*tm2; - - for (int p=0; p 0) - pod4body_force(force, rij, coeff4, tmpmem, atomtype, idxi, ai, aj, ti, tj, natom, Nij); -} - -double MLPOD::energyforce_calculation(double *force, double *podcoeff, double *effectivecoeff, double *gd, double *rij, - double *tmpmem, int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij) -{ - int nd1234 = pod.nd1+pod.nd2+pod.nd3+pod.nd4; - double *eatom = &tmpmem[0]; - - podArraySetValue(gd, 0.0, nd1234); - linear_descriptors_ij(gd, eatom, rij, &tmpmem[natom*nd1234], pairnumsum, atomtype, idxi, ti, tj, natom, Nij); - - // Need to do MPI_Allreduce on gd for parallel - - double energy = calculate_energy(effectivecoeff, gd, podcoeff); - - podArraySetValue(force, 0.0, 3*natom); - - calculate_force(force, effectivecoeff, rij, tmpmem, pairnumsum, atomtype, idxi, ai, aj, ti, tj, natom, Nij); - - return energy; -} - - -void MLPOD::pod2body_force(double **force, double *fij, double *coeff2, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int nelements, int nbf, int /*natom*/, int N) -{ - int nelements2 = nelements*(nelements+1)/2; - for (int n=0; n j) - ik = lk + s; - k = aj[ik]; // atom k - typek = tj[ik] - 1; - xik1 = yij[0+dim*ik]; // xk - xi - xik2 = yij[1+dim*ik]; // xk - xi - xik3 = yij[2+dim*ik]; // xk - xi s - riksq = xik1*xik1 + xik2*xik2 + xik3*xik3; - rik = sqrt(riksq); - - xdot = xij1*xik1 + xij2*xik2 + xij3*xik3; - costhe = xdot/(rij*rik); - costhe = costhe > 1.0 ? 1.0 : costhe; - costhe = costhe < -1.0 ? -1.0 : costhe; - xdot = costhe*(rij*rik); - - sinthe = pow(1.0 - costhe*costhe, 0.5); - sinthe = sinthe > 1e-12 ? sinthe : 1e-12; - theta = acos(costhe); - dtheta = -1.0/sinthe; - - tm1 = 1.0/(rij*rijsq*rik); - tm2 = 1.0/(rij*riksq*rik); - dct1 = (xik1*rijsq - xij1*xdot)*tm1; - dct2 = (xik2*rijsq - xij2*xdot)*tm1; - dct3 = (xik3*rijsq - xij3*xdot)*tm1; - dct4 = (xij1*riksq - xik1*xdot)*tm2; - dct5 = (xij2*riksq - xik2*xdot)*tm2; - dct6 = (xij3*riksq - xik3*xdot)*tm2; - - for (int p=0; p 0) - pod4body_force(force, rij, coeff4, tmpmem, atomtype, idxi, ai, aj, ti, tj, natom, Nij); -} diff --git a/src/ML-POD/mlpod.h b/src/ML-POD/mlpod.h deleted file mode 100644 index 54e75988bed..00000000000 --- a/src/ML-POD/mlpod.h +++ /dev/null @@ -1,308 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - https://www.lammps.org/ Sandia National Laboratories - LAMMPS development team: developers@lammps.org - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifndef LMP_MLPOD_H -#define LMP_MLPOD_H - -#include "pointers.h" - -#define DDOT ddot_ -#define DGEMV dgemv_ -#define DGEMM dgemm_ -#define DGETRF dgetrf_ -#define DGETRI dgetri_ -#define DSYEV dsyev_ -#define DPOSV dposv_ - -extern "C" { -double DDOT(int *, double *, int *, double *, int *); -void DGEMV(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, - int *); -void DGEMM(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *, - double *, double *, int *); -void DGETRF(int *, int *, double *, int *, int *, int *); -void DGETRI(int *, double *, int *, int *, double *, int *, int *); -void DSYEV(char *, char *, int *, double *, int *, double *, double *, int *, int *); -void DPOSV(char *, int *, int *, double *, int *, double *, int *, int *); -} - -namespace LAMMPS_NS { - -class MLPOD : protected Pointers { - - private: - // functions for reading input files - - void read_pod(const std::string &pod_file); - void read_coeff_file(const std::string &coeff_file); - - // functions for calculating/collating POD descriptors/coefficients for energies - - void podradialbasis(double *rbf, double *drbf, double *xij, double *besselparams, double rin, - double rmax, int besseldegree, int inversedegree, int nbesselpars, int N); - void pod1body(double *eatom, double *fatom, int *atomtype, int nelements, int natom); - void podtally2b(double *eatom, double *fatom, double *eij, double *fij, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int nelements, int nbf, int natom, int N); - void pod3body(double *eatom, double *fatom, double *rij, double *e2ij, double *f2ij, - double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, int *tj, - int nrbf, int nabf, int nelements, int natom, int Nij); - void poddesc(double *eatom1, double *fatom1, double *eatom2, double *fatom2, double *eatom3, - double *fatom3, double *rij, double *Phi, double *besselparams, double *tmpmem, - double rin, double rcut, int *pairnumsum, int *atomtype, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int *pdegree, int nbesselpars, int nrbf2, int nrbf3, - int nabf, int nelements, int Nij, int natom); - double quadratic_coefficients(double *c2, double *c3, double *d2, double *d3, double *coeff23, - int *quadratic, int nc2, int nc3); - double quadratic_coefficients(double *c3, double *d3, double *coeff33, int *quadratic, int nc3); - double cubic_coefficients(double *c2, double *c3, double *c4, double *d2, double *d3, double *d4, - double *coeff234, int *cubic, int nc2, int nc3, int nc4); - double cubic_coefficients(double *c3, double *d3, double *coeff333, int *cubic, int nc3); - double quadratic_coefficients(double *ce2, double *ce3, double *c2, double *c3, double *d2, - double *d3, double *coeff23, int *quadratic, int nc2, int nc3); - double quadratic_coefficients(double *ce3, double *c3, double *d3, double *coeff33, - int *quadratic, int nc3); - double cubic_coefficients(double *ce2, double *ce3, double *ce4, double *c2, double *c3, - double *c4, double *d2, double *d3, double *d4, double *coeff234, - int *cubic, int nc2, int nc3, int nc4); - double cubic_coefficients(double *ce3, double *c3, double *d3, double *coeff333, int *cubic, - int nc3); - - // functions for calculating/collating SNAP descriptors/coefficients for energies - - void snapSetup(int twojmax, int ntypes); - void InitSnap(); - void snapComputeUlist(double *Sr, double *Si, double *dSr, double *dSi, double *rootpqarray, - double *rij, double *wjelem, double *radelem, double rmin0, double rfac0, - double rcutfac, int *idxu_block, int *ti, int *tj, int twojmax, - int idxu_max, int ijnum, int switch_flag); - void snapZeroUarraytot2(double *Stotr, double *Stoti, double wself, int *idxu_block, int *type, - int *map, int *ai, int wselfall_flag, int chemflag, int idxu_max, - int nelements, int twojmax, int inum); - void snapAddUarraytot(double *Stotr, double *Stoti, double *Sr, double *Si, int *map, int *ai, - int *tj, int idxu_max, int inum, int ijnum, int chemflag); - void snapComputeZi2(double *zlist_r, double *zlist_i, double *Stotr, double *Stoti, - double *cglist, int *idxz, int *idxu_block, int *idxcg_block, int twojmax, - int idxu_max, int idxz_max, int nelements, int bnorm_flag, int inum); - void snapComputeBi1(double *blist, double *zlist_r, double *zlist_i, double *Stotr, double *Stoti, - int *idxb, int *idxu_block, int *idxz_block, int twojmax, int idxb_max, - int idxu_max, int idxz_max, int nelements, int inum); - void snapComputeDbidrj(double *dblist, double *zlist_r, double *zlist_i, double *dulist_r, - double *dulist_i, int *idxb, int *idxu_block, int *idxz_block, int *map, - int *ai, int *tj, int twojmax, int idxb_max, int idxu_max, int idxz_max, - int nelements, int bnorm_flag, int chemflag, int inum, int ijnum); - void snapdesc(double *blist, double *bd, double *rij, double *tmpmem, int *atomtype, int *ai, - int *aj, int *ti, int *tj, int natom, int Nij); - - // functions for calculating/collating POD descriptors/coefficients for forces - - void podradialbasis(double *rbf, double *xij, double *besselparams, double rin, double rmax, - int besseldegree, int inversedegree, int nbesselpars, int N); - void pod1body(double *eatom, int *atomtype, int nelements, int natom); - void podtally2b(double *eatom, double *eij, int *ai, int *ti, int *tj, int *elemindex, - int nelements, int nbf, int natom, int N); - void pod3body(double *eatom, double *yij, double *e2ij, double *tmpmem, int *elemindex, - int *pairnumsum, int *ai, int *ti, int *tj, int nrbf, int nabf, int nelements, - int natom, int Nij); - void poddesc_ij(double *eatom1, double *eatom2, double *eatom3, double *rij, double *Phi, - double *besselparams, double *tmpmem, double rin, double rcut, int *pairnumsum, - int *atomtype, int *ai, int *ti, int *tj, int *elemindex, int *pdegree, - int nbesselpars, int nrbf2, int nrbf3, int nabf, int nelements, int Nij, - int natom); - void snapComputeUij(double *Sr, double *Si, double *rootpqarray, double *rij, double *wjelem, - double *radelem, double rmin0, double rfac0, double rcutfac, int *idxu_block, - int *ti, int *tj, int twojmax, int idxu_max, int ijnum, int switch_flag); - void snapdesc_ij(double *blist, double *rij, double *tmpmem, int *atomtype, int *ai, int *ti, - int *tj, int natom, int Nij); - void pod2body_force(double *force, double *fij, double *coeff2, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int nelements, int nbf, int natom, int Nij); - void pod3body_force(double *force, double *yij, double *e2ij, double *f2ij, double *coeff3, - double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, - int *tj, int nrbf, int nabf, int nelements, int natom, int Nij); - void snapTallyForce(double *force, double *dbdr, double *coeff4, int *ai, int *aj, int *ti, - int ijnum, int ncoeff, int ntype); - void pod4body_force(double *force, double *rij, double *coeff4, double *tmpmem, int *atomtype, - int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij); - void pod2body_force(double **force, double *fij, double *coeff2, int *ai, int *aj, int *ti, - int *tj, int *elemindex, int nelements, int nbf, int natom, int Nij); - void pod3body_force(double **force, double *yij, double *e2ij, double *f2ij, double *coeff3, - double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, - int *tj, int nrbf, int nabf, int nelements, int natom, int Nij); - void snapTallyForce(double **force, double *dbdr, double *coeff4, int *ai, int *aj, int *ti, - int ijnum, int ncoeff, int ntype); - void pod4body_force(double **force, double *rij, double *coeff4, double *tmpmem, int *atomtype, - int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij); - - // eigenproblem functions - - void podeigenvaluedecomposition(double *Phi, double *Lambda, double *besselparams, double rin, - double rcut, int besseldegree, int inversedegree, int nbesselpars, - int N); - - public: - MLPOD(LAMMPS *, const std::string &pod_file, const std::string &coeff_file); - - MLPOD(LAMMPS *lmp) : Pointers(lmp){}; - ~MLPOD() override; - - struct podstruct { - podstruct(); - virtual ~podstruct(); - - std::vector species; - int twobody[3]; - int threebody[4]; - int fourbody[4]; - int *pbc; - int *elemindex; - - int quadratic22[2]; - int quadratic23[2]; - int quadratic24[2]; - int quadratic33[2]; - int quadratic34[2]; - int quadratic44[2]; - int cubic234[3]; - int cubic333[3]; - int cubic444[3]; - int nelements; - int onebody; - int besseldegree; - int inversedegree; - - int quadraticpod; - - double rin; - double rcut; - double *besselparams; - double *coeff; - double *Phi2, *Phi3, *Phi4, *Lambda2, *Lambda3, *Lambda4; - - // variables declaring number of snapshots, descriptors, and combinations - - int nbesselpars = 3; - int ns2, ns3, - ns4; // number of snapshots for radial basis functions for linear POD potentials - int nc2, nc3, nc4; // number of chemical combinations for linear POD potentials - int nbf1, nbf2, nbf3, nbf4; // number of basis functions for linear POD potentials - int nd1, nd2, nd3, nd4; // number of descriptors for linear POD potentials - int nd22, nd23, nd24, nd33, nd34, nd44; // number of descriptors for quadratic POD potentials - int nd234, nd333, nd444; // number of descriptors for cubic POD potentials - int nrbf3, nabf3, nrbf4, nabf4; - int nd, nd1234; - - int snaptwojmax; // also used to tell if SNAP is used when allocating/deallocating - int snapchemflag; - double snaprfac0; - double snapelementradius[10]; - double snapelementweight[10]; - }; - - struct snastruct { - int twojmax; - int ncoeff; - int idxb_max; - int idxu_max; - int idxz_max; - int idxcg_max; - int ntypes; - int nelements; - int ndoubles; // number of multi-element pairs - int ntriples; // number of multi-element triplets - int bnormflag; - int chemflag; - int switchflag; - int bzeroflag; - int wselfallflag; - - double wself; - double rmin0; - double rfac0; - double rcutfac; - double rcutmax; - - int *map; // map types to [0,nelements) - int *idx_max; - int *idxz; - int *idxz_block; - int *idxb; - int *idxb_block; - int *idxu_block; - int *idxcg_block; - - double *rcutsq; - double *radelem; - double *wjelem; - double *bzero; - double *fac; - double *rootpqarray; - double *cglist; - }; - - podstruct pod; - snastruct sna; - - // functions for collecting/collating arrays - - void podMatMul(double *c, double *a, double *b, int r1, int c1, int c2); - void podArraySetValue(double *y, double a, int n); - void podArrayCopy(double *y, double *x, int n); - void podArrayFill(int *output, int start, int length); - - // functions for calculating energy and force descriptors - - void podNeighPairs(double *xij, double *x, int *ai, int *aj, int *ti, int *tj, int *pairlist, - int *pairnumsum, int *atomtype, int *alist, int inum, int dim); - void linear_descriptors(double *gd, double *efatom, double *y, double *tmpmem, int *atomtype, - int *alist, int *pairlist, int *pairnum, int *pairnumsum, int *tmpint, - int natom, int Nij); - void quadratic_descriptors(double *d23, double *dd23, double *d2, double *d3, double *dd2, - double *dd3, int M2, int M3, int N); - void quadratic_descriptors(double *d33, double *dd33, double *d3, double *dd3, int M3, int N); - void cubic_descriptors(double *d234, double *dd234, double *d2, double *d3, double *d4, - double *dd2, double *dd3, double *dd4, int M2, int M3, int M4, int N); - void cubic_descriptors(double *d333, double *Dd333, double *d3, double *Dd3, int M3, int N); - double calculate_energyforce(double *force, double *gd, double *gdd, double *coeff, double *tmp, - int natom); - double energyforce_calculation(double *f, double *gd, double *gdd, double *coeff, double *y, - int *atomtype, int *alist, int *pairlist, int *pairnum, - int *pairnumsum, int *tmpint, int natom, int Nij); - - // functions for calculating energies and forces - - void podNeighPairs(double *rij, double *x, int *idxi, int *ai, int *aj, int *ti, int *tj, - int *pairnumsum, int *atomtype, int *jlist, int *alist, int inum); - int lammpsNeighPairs(double *rij, double **x, double rcutsq, int *idxi, int *ai, int *aj, int *ti, - int *tj, int *pairnumsum, int *atomtype, int *numneigh, int *ilist, - int **jlist, int inum); - void linear_descriptors_ij(double *gd, double *eatom, double *rij, double *tmpmem, - int *pairnumsum, int *atomtype, int *ai, int *ti, int *tj, int natom, - int Nij); - double calculate_energy(double *effectivecoeff, double *gd, double *coeff); - double calculate_energy(double *energycoeff, double *forcecoeff, double *gd, double *gdall, - double *coeff); - void calculate_force(double *force, double *effectivecoeff, double *rij, double *tmpmem, - int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti, - int *tj, int natom, int Nij); - void calculate_force(double **force, double *effectivecoeff, double *rij, double *tmpmem, - int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti, - int *tj, int natom, int Nij); - double energyforce_calculation(double *force, double *podcoeff, double *effectivecoeff, - double *gd, double *rij, double *tmpmem, int *pairnumsum, - int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj, - int natom, int Nij); - -}; - -} // namespace LAMMPS_NS - -#endif diff --git a/src/ML-POD/pair_pod.cpp b/src/ML-POD/pair_pod.cpp index d106b11a18e..b17aa5d08f7 100644 --- a/src/ML-POD/pair_pod.cpp +++ b/src/ML-POD/pair_pod.cpp @@ -17,54 +17,138 @@ #include "pair_pod.h" -#include "mlpod.h" +#include "eapod.h" #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" +#include "math_const.h" +#include "math_special.h" #include "memory.h" #include "neigh_list.h" #include "neighbor.h" +#include "tokenizer.h" + +#include +#include +#include using namespace LAMMPS_NS; +using MathConst::MY_PI; +using MathSpecial::powint; + +#define MAXLINE 1024 /* ---------------------------------------------------------------------- */ -PairPOD::PairPOD(LAMMPS *lmp) : - Pair(lmp), gd(nullptr), gdall(nullptr), podcoeff(nullptr), newpodcoeff(nullptr), - energycoeff(nullptr), forcecoeff(nullptr), podptr(nullptr), tmpmem(nullptr), typeai(nullptr), - numneighsum(nullptr), rij(nullptr), idxi(nullptr), ai(nullptr), aj(nullptr), ti(nullptr), - tj(nullptr) +PairPOD::PairPOD(LAMMPS *lmp) : Pair(lmp), fastpodptr(nullptr) { single_enable = 0; restartinfo = 0; one_coeff = 1; manybody_flag = 1; centroidstressflag = CENTROID_NOTAVAIL; - peratom_warn = true; + peratom_warn = false; - dim = 3; - nablockmax = 0; + ni = 0; + nimax = 0; nij = 0; - nijmax = 0; - szd = 0; + nijmax = 0; + atomBlockSize = 4096; + nAtomBlocks = 0; + + rij = nullptr; + fij = nullptr; + ei = nullptr; + typeai = nullptr; + numij = nullptr; + idxi = nullptr; + ai = nullptr; + aj = nullptr; + ti = nullptr; + tj = nullptr; + Phi = nullptr; + rbf = nullptr; + rbfx = nullptr; + rbfy = nullptr; + rbfz = nullptr; + abf = nullptr; + abfx = nullptr; + abfy = nullptr; + abfz = nullptr; + sumU = nullptr; + Centroids = nullptr; + Proj = nullptr; + bd = nullptr; + bdd = nullptr; + pd = nullptr; + pdd = nullptr; + coefficients = nullptr; + pn3 = nullptr; + pc3 = nullptr; + pa4 = nullptr; + pb4 = nullptr; + pc4 = nullptr; + ind23 = nullptr; + ind32 = nullptr; + ind33l = nullptr; + ind33r = nullptr; + ind34l = nullptr; + ind34r = nullptr; + ind44l = nullptr; + ind44r = nullptr; + elemindex = nullptr; } /* ---------------------------------------------------------------------- */ PairPOD::~PairPOD() { - free_tempmemory(); - memory->destroy(podcoeff); - memory->destroy(newpodcoeff); - memory->destroy(gd); - memory->destroy(gdall); - memory->destroy(energycoeff); - memory->destroy(forcecoeff); - - delete podptr; - + memory->destroy(rij); + memory->destroy(fij); + memory->destroy(ei); + memory->destroy(typeai); + memory->destroy(numij); + memory->destroy(idxi); + memory->destroy(ai); + memory->destroy(aj); + memory->destroy(ti); + memory->destroy(tj); + memory->destroy(Phi); + memory->destroy(rbf); + memory->destroy(rbfx); + memory->destroy(rbfy); + memory->destroy(rbfz); + memory->destroy(abf); + memory->destroy(abfx); + memory->destroy(abfy); + memory->destroy(abfz); + memory->destroy(sumU); + memory->destroy(Centroids); + memory->destroy(Proj); + memory->destroy(bd); + memory->destroy(bdd); + memory->destroy(pd); + memory->destroy(pdd); + memory->destroy(coefficients); + memory->destroy(pn3); + memory->destroy(pc3); + memory->destroy(pa4); + memory->destroy(pb4); + memory->destroy(pc4); + memory->destroy(ind23); + memory->destroy(ind32); + memory->destroy(ind33l); + memory->destroy(ind33r); + memory->destroy(ind34l); + memory->destroy(ind34r); + memory->destroy(ind44l); + memory->destroy(ind44r); + memory->destroy(elemindex); + + delete fastpodptr; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); @@ -91,67 +175,94 @@ void PairPOD::compute(int eflag, int vflag) int *type = atom->type; int *ilist = list->ilist; int inum = list->inum; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; - // initialize global descriptors to zero - - int nd1234 = podptr->pod.nd1234; - podptr->podArraySetValue(gd, 0.0, nd1234); + double rcutsq = rcut*rcut; + double evdwl = 0.0; + int blockMode = 0; + if (blockMode==0) { for (int ii = 0; ii < inum; ii++) { int i = ilist[ii]; int jnum = numneigh[i]; // allocate temporary memory - if (nijmax < jnum) { nijmax = MAX(nijmax, jnum); - nablockmax = 1; - free_tempmemory(); - estimate_tempmemory(); - allocate_tempmemory(); + fastpodptr->free_temp_memory(); + fastpodptr->allocate_temp_memory(nijmax); } - // get neighbor pairs for atom i - - lammpsNeighPairs(x, firstneigh, type, map, numneigh, i); - - // compute global POD descriptors for atom i - - podptr->linear_descriptors_ij(gd, tmpmem, rij, &tmpmem[nd1234], numneighsum, typeai, idxi, ti, - tj, 1, nij); + double *rij1 = &fastpodptr->tmpmem[0]; + double *fij1 = &fastpodptr->tmpmem[3*nijmax]; + double *tmp = &fastpodptr->tmpmem[6*nijmax]; + int *ai1 = &fastpodptr->tmpint[0]; + int *aj1 = &fastpodptr->tmpint[nijmax]; + int *ti1 = &fastpodptr->tmpint[2*nijmax]; + int *tj1 = &fastpodptr->tmpint[3*nijmax]; + lammpsNeighborList(rij1, ai1, aj1, ti1, tj1, x, firstneigh, type, map, numneigh, rcutsq, i); + + evdwl = fastpodptr->peratomenergyforce(fij1, rij1, tmp, ti1, tj1, nij); + + // tally atomic energy to global energy + ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); + + // tally atomic force to global force + tallyforce(f, fij1, ai1, aj1, nij); + + // tally atomic stress + if (vflag) { + for (int jj = 0; jj < nij; jj++) { + int j = aj1[jj]; + ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, + fij1[0 + 3*jj],fij1[1 + 3*jj],fij1[2 + 3*jj], + -rij1[0 + 3*jj], -rij1[1 + 3*jj], -rij1[2 + 3*jj]); + } + } + } } - - int nd22 = podptr->pod.nd22; - int nd23 = podptr->pod.nd23; - int nd24 = podptr->pod.nd24; - int nd33 = podptr->pod.nd33; - int nd34 = podptr->pod.nd34; - int nd44 = podptr->pod.nd44; - int nd = podptr->pod.nd; - bigint natom = atom->natoms; - - for (int j = nd1234; j < (nd1234 + nd22 + nd23 + nd24 + nd33 + nd34 + nd44); j++) - newpodcoeff[j] = podcoeff[j] / (natom); - - for (int j = (nd1234 + nd22 + nd23 + nd24 + nd33 + nd34 + nd44); j < nd; j++) - newpodcoeff[j] = podcoeff[j] / (natom * natom); - - // compute energy and effective coefficients - eng_vdwl = podptr->calculate_energy(energycoeff, forcecoeff, gd, gdall, newpodcoeff); - - for (int ii = 0; ii < inum; ii++) { - int i = ilist[ii]; - - // get neighbor pairs for atom i - - lammpsNeighPairs(x, firstneigh, type, map, numneigh, i); - - // compute atomic force for atom i - - podptr->calculate_force(f, forcecoeff, rij, tmpmem, numneighsum, typeai, idxi, ai, aj, ti, tj, - 1, nij); + else if (blockMode == 1) { + // determine the number of atom blocks and divide atoms into blocks + nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize); + if (nAtomBlocks > 100) nAtomBlocks = 100; + divideInterval(atomBlocks, inum, nAtomBlocks); + + int nmax = 0; + for (int block =0; blockdestroy(podcoeff); - memory->destroy(newpodcoeff); - memory->destroy(energycoeff); - memory->destroy(forcecoeff); - memory->destroy(gd); - memory->destroy(gdall); - memory->create(podcoeff, podptr->pod.nd, "pair:podcoeff"); - memory->create(newpodcoeff, podptr->pod.nd, "pair:newpodcoeff"); - memory->create(energycoeff, podptr->pod.nd1234, "pair:energycoeff"); - memory->create(forcecoeff, podptr->pod.nd1234, "pair:forcecoeff"); - memory->create(gd, podptr->pod.nd1234, "pair:gd"); - memory->create(gdall, podptr->pod.nd1234, "pair:gdall"); - podptr->podArrayCopy(podcoeff, podptr->pod.coeff, podptr->pod.nd); - podptr->podArrayCopy(newpodcoeff, podptr->pod.coeff, podptr->pod.nd); + std::string proj_file = std::string(arg[4]); // projection matrix file + std::string centroid_file = std::string(arg[5]); // centroid matrix file + map_element2type(narg - 6, arg + 6); + + delete fastpodptr; + fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file); + + if (fastpodptr->nClusters > 1) { + if (proj_file == "") error->all(FLERR,"The projection file name can not be empty when the number of clusters is greater than 1."); + if (centroid_file == "") error->all(FLERR,"The centroids file name can not be empty when the number of clusters is greater than 1."); } + + copy_data_from_pod_class(); + rcut = fastpodptr->rcut; + + memory->destroy(fastpodptr->tmpmem); + memory->destroy(fastpodptr->tmpint); for (int ii = 0; ii < np1; ii++) - for (int jj = 0; jj < np1; jj++) cutsq[ii][jj] = podptr->pod.rcut * podptr->pod.rcut; + for (int jj = 0; jj < np1; jj++) cutsq[ii][jj] = fastpodptr->rcut * fastpodptr->rcut; } /* ---------------------------------------------------------------------- @@ -222,7 +329,7 @@ void PairPOD::init_style() neighbor->add_request(this, NeighConst::REQ_FULL); // reset flag to print warning about per-atom energies or stresses - peratom_warn = true; + peratom_warn = false; } /* ---------------------------------------------------------------------- @@ -232,7 +339,16 @@ void PairPOD::init_style() double PairPOD::init_one(int i, int j) { if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set"); - return podptr->pod.rcut; + + double rcut = 0.0; + rcut = fastpodptr->rcut; + + return rcut; +} + +void PairPOD::allocate() +{ + allocated = 1; } /* ---------------------------------------------------------------------- @@ -245,86 +361,1307 @@ double PairPOD::memory_usage() return bytes; } -void PairPOD::free_tempmemory() +void PairPOD::lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, + double **x, int **firstneigh, int *atomtypes, int *map, + int *numneigh, double rcutsq, int gi) { - memory->destroy(rij); - memory->destroy(idxi); - memory->destroy(ai); - memory->destroy(aj); - memory->destroy(ti); - memory->destroy(tj); - memory->destroy(numneighsum); - memory->destroy(typeai); - memory->destroy(tmpmem); + nij = 0; + int itype = map[atomtypes[gi]] + 1; + int m = numneigh[gi]; + for (int l = 0; l < m; l++) { // loop over each atom around atom i + int gj = firstneigh[gi][l]; // atom j + double delx = x[gj][0] - x[gi][0]; // xj - xi + double dely = x[gj][1] - x[gi][1]; // xj - xi + double delz = x[gj][2] - x[gi][2]; // xj - xi + double rsq = delx * delx + dely * dely + delz * delz; + if (rsq < rcutsq && rsq > 1e-20) { + rij1[nij * 3 + 0] = delx; + rij1[nij * 3 + 1] = dely; + rij1[nij * 3 + 2] = delz; + ai1[nij] = gi; + aj1[nij] = gj; + ti1[nij] = itype; + tj1[nij] = map[atomtypes[gj]] + 1; + nij++; + } + } +} + +void PairPOD::NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int gi1, int gi2) +{ + for (int i=0; i 1e-20) n++; + } + numij[1+i] = n; + } +} + +int PairPOD::numberOfNeighbors() +{ + int n = 0; + for (int i=1; i<=ni; i++) { + n += numij[i]; + numij[i] += numij[i-1]; + } + return n; +} + +void PairPOD::NeighborList(double **x, int **firstneigh, int *atomtypes, int *map, + int *ilist, int *numneigh, double rcutsq, int gi1, int gi2) +{ + for (int i=0; i 1e-20) { + int nij1 = nij0 + k; + rij[nij1 * 3 + 0] = delx; + rij[nij1 * 3 + 1] = dely; + rij[nij1 * 3 + 2] = delz; + idxi[nij1] = i; + ai[nij1] = gi; + aj[nij1] = gj; + ti[nij1] = itype; + tj[nij1] = map[atomtypes[gj]] + 1; + k++; + } + } + } +} + +void PairPOD::tallyforce(double **force, double *fij, int *ai, int *aj, int N) +{ + for (int n=0; ncreate(rij, dim * nijmax, "pair:rij"); - memory->create(idxi, nijmax, "pair:idxi"); - memory->create(ai, nijmax, "pair:ai"); - memory->create(aj, nijmax, "pair:aj"); - memory->create(ti, nijmax, "pair:ti"); - memory->create(tj, nijmax, "pair:tj"); - memory->create(numneighsum, nablockmax + 1, "pair:numneighsum"); - memory->create(typeai, nablockmax, "pair:typeai"); - memory->create(tmpmem, szd, "pair:tmpmem"); + nelements = fastpodptr->nelements; // number of elements + onebody = fastpodptr->onebody; // one-body descriptors + besseldegree = fastpodptr->besseldegree; // degree of Bessel functions + inversedegree = fastpodptr->inversedegree; // degree of inverse functions + nbesselpars = fastpodptr->nbesselpars; // number of Bessel parameters + nCoeffPerElement = fastpodptr->nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) + ns = fastpodptr->ns; // number of snapshots for radial basis functions + nl1 = fastpodptr->nl1; // number of one-body descriptors + nl2 = fastpodptr->nl2; // number of two-body descriptors + nl3 = fastpodptr->nl3; // number of three-body descriptors + nl4 = fastpodptr->nl4; // number of four-body descriptors + nl23 = fastpodptr->nl23; // number of two-body x three-body descriptors + nl33 = fastpodptr->nl33; // number of three-body x three-body descriptors + nl34 = fastpodptr->nl34; // number of three-body x four-body descriptors + nl44 = fastpodptr->nl44; // number of four-body x four-body descriptors + n23 = fastpodptr->n23; + n32 = fastpodptr->n32; + nl = fastpodptr->nl; // number of local descriptors + nrbf2 = fastpodptr->nrbf2; + nrbf3 = fastpodptr->nrbf3; + nrbf4 = fastpodptr->nrbf4; + nrbfmax = fastpodptr->nrbfmax; // number of radial basis functions + nabf3 = fastpodptr->nabf3; // number of three-body angular basis functions + nabf4 = fastpodptr->nabf4; // number of four-body angular basis functions + K3 = fastpodptr->K3; // number of three-body monomials + K4 = fastpodptr->K4; // number of four-body monomials + Q4 = fastpodptr->Q4; // number of four-body monomial coefficients + nClusters = fastpodptr->nClusters; // number of environment clusters + nComponents = fastpodptr->nComponents; // number of principal components + Mdesc = fastpodptr->Mdesc; // number of base descriptors + + rin = fastpodptr->rin; + rcut = fastpodptr->rcut; + rmax = rcut - rin; + besselparams[0] = fastpodptr->besselparams[0]; + besselparams[1] = fastpodptr->besselparams[1]; + besselparams[2] = fastpodptr->besselparams[2]; + + memory->create(abftm, 4*K3, "abftm"); + memory->create(elemindex, nelements*nelements, "elemindex"); + for (int i=0; ielemindex[i]; + + memory->create(Phi, ns * ns, "pair_pod:Phi"); + for (int i=0; iPhi[i]; + + memory->create(coefficients, nCoeffPerElement * nelements, "pair_pod:coefficients"); + for (int i=0; icoeff[i]; + + if (nClusters > 1) { + memory->create(Proj, Mdesc * nComponents * nelements, "pair_pod:Proj"); + for (int i=0; iProj[i]; + + memory->create(Centroids, nClusters * nComponents * nelements, "pair_pod:Centroids"); + for (int i=0; iCentroids[i]; + } + + memory->create(pn3, nabf3+1, "pn3"); // array stores the number of monomials for each degree + memory->create(pq3, K3*2, "pq3"); // array needed for the recursive computation of the angular basis functions + memory->create(pc3, K3, "pc3"); // array needed for the computation of the three-body descriptors + memory->create(pa4, nabf4+1, "pa4"); // this array is a subset of the array {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345} + memory->create(pb4, Q4*3, "pb4"); // array stores the indices of the monomials needed for the computation of the angular basis functions + memory->create(pc4, Q4, "pc4"); // array of monomial coefficients needed for the computation of the four-body descriptors + for (int i=0; ipn3[i]; + for (int i=0; ipc3[i]; + for (int i=0; ipq3[i]; + for (int i=0; ipa4[i]; + for (int i=0; ipb4[i]; + for (int i=0; ipc4[i]; + + memory->create(ind23, n23, "pair_pod:ind23"); + memory->create(ind32, n32, "pair_pod:ind32"); + memory->create(ind33l, nl33, "pair_pod:ind33l"); + memory->create(ind33r, nl33, "pair_pod:ind33r"); + memory->create(ind34l, nl34, "pair_pod:ind34l"); + memory->create(ind34r, nl34, "pair_pod:ind34r"); + memory->create(ind44l, nl44, "pair_pod:ind44l"); + memory->create(ind44r, nl44, "pair_pod:ind44r"); + for (int i=0; iind23[i]; + for (int i=0; iind32[i]; + for (int i=0; iind33l[i]; + for (int i=0; iind33r[i]; + for (int i=0; iind34l[i]; + for (int i=0; iind34r[i]; + for (int i=0; iind44l[i]; + for (int i=0; iind44r[i]; } -void PairPOD::estimate_tempmemory() +void PairPOD::grow_atoms(int Ni) { - int nrbf2 = podptr->pod.nbf2; - int nabf3 = podptr->pod.nabf3; - int nrbf3 = podptr->pod.nrbf3; - int ns2 = podptr->pod.ns2; - int ns3 = podptr->pod.ns3; + if (Ni > nimax) { + memory->destroy(ei); + memory->destroy(typeai); + memory->destroy(numij); + memory->destroy(sumU); + memory->destroy(bd); + memory->destroy(pd); + nimax = Ni; + memory->create(ei, nimax, "pair_pod:ei"); + memory->create(typeai, nimax, "pair_pod:typeai"); + memory->create(numij, nimax+1, "pair_pod:typeai"); + int n = nimax * nelements * K3 * nrbfmax; + if (nClusters>1) n = (n > nimax*Mdesc) ? n : nimax*Mdesc; + memory->create(sumU, n , "pair_pod:sumU"); + memory->create(bd, nimax * Mdesc, "pair_pod:bd"); + memory->create(pd, nimax * nClusters, "pair_pod:pd"); + + for (int i=0; i<=nimax; i++) numij[i] = 0; + } +} - szd = dim * nijmax + (1 + dim) * nijmax * MAX(nrbf2 + ns2, nrbf3 + ns3) + (nabf3 + 1) * 7; - int szsnap = 0; - if (podptr->sna.twojmax > 0) { - szsnap += nijmax * dim; - szsnap += MAX(2 * podptr->sna.idxu_max * nijmax, - 2 * podptr->sna.idxz_max * podptr->sna.ndoubles * - nablockmax); // (Ur, Ui) and (Zr, Zi) - szsnap += 2 * podptr->sna.idxu_max * dim * nijmax; // dUr, dUi - szsnap += MAX(podptr->sna.idxb_max * podptr->sna.ntriples * dim * nijmax, - 2 * podptr->sna.idxu_max * podptr->sna.nelements * - nablockmax); // dblist and (Utotr, Utoti) +void PairPOD::grow_pairs(int Nij) +{ + if (Nij > nijmax) { + memory->destroy(rij); + memory->destroy(fij); + memory->destroy(idxi); + memory->destroy(ai); + memory->destroy(aj); + memory->destroy(ti); + memory->destroy(tj); + memory->destroy(rbf); + memory->destroy(rbfx); + memory->destroy(rbfy); + memory->destroy(rbfz); + memory->destroy(abf); + memory->destroy(abfx); + memory->destroy(abfy); + memory->destroy(abfz); + memory->destroy(bdd); + memory->destroy(pdd); + nijmax = Nij; + memory->create(rij, 3 * nijmax, "pair_pod:r_ij"); + memory->create(fij, 3 * nijmax, "pair_pod:f_ij"); + memory->create(idxi, nijmax, "pair_pod:idxi"); + memory->create(ai, nijmax, "pair_pod:ai"); + memory->create(aj, nijmax, "pair_pod:aj"); + memory->create(ti, nijmax, "pair_pod:ti"); + memory->create(tj, nijmax, "pair_pod:tj"); + memory->create(rbf, nijmax * nrbfmax, "pair_pod:rbf"); + memory->create(rbfx, nijmax * nrbfmax, "pair_pod:rbfx"); + memory->create(rbfy, nijmax * nrbfmax, "pair_pod:rbfy"); + memory->create(rbfz, nijmax * nrbfmax, "pair_pod:rbfz"); + int kmax = (K3 > ns) ? K3 : ns; + memory->create(abf, nijmax * kmax, "pair_pod:abf"); + memory->create(abfx, nijmax * kmax, "pair_pod:abfx"); + memory->create(abfy, nijmax * kmax, "pair_pod:abfy"); + memory->create(abfz, nijmax * kmax, "pair_pod:abfz"); + memory->create(bdd, 3 * nijmax * Mdesc, "pair_pod:bdd"); + memory->create(pdd, 3 * nijmax * nClusters, "pair_pod:pdd"); } +} - szd = MAX(szsnap, szd); - szd = nablockmax * (podptr->pod.nd1234) + szd; +void PairPOD::divideInterval(int *intervals, int N, int M) +{ + int intervalSize = N / M; // Basic size of each interval + int remainder = N % M; // Remainder to distribute + intervals[0] = 1; // Start of the first interval + for (int i = 1; i <= M; i++) { + intervals[i] = intervals[i - 1] + intervalSize + (remainder > 0 ? 1 : 0); + if (remainder > 0) { + remainder--; + } + } } -void PairPOD::lammpsNeighPairs(double **x, int **firstneigh, int *atomtypes, int *map, - int *numneigh, int gi) +int PairPOD::calculateNumberOfIntervals(int N, int intervalSize) { + if (intervalSize <= 0) { + printf("Interval size must be a positive integer.\n"); + return -1; + } - double rcutsq = podptr->pod.rcut * podptr->pod.rcut; + int M = N / intervalSize; + if (N % intervalSize != 0) { + M++; // Add an additional interval to cover the remainder + } - nij = 0; - int itype = map[atomtypes[gi]] + 1; - int m = numneigh[gi]; - typeai[0] = itype; - for (int l = 0; l < m; l++) { // loop over each atom around atom i - int gj = firstneigh[gi][l]; // atom j - double delx = x[gj][0] - x[gi][0]; // xj - xi - double dely = x[gj][1] - x[gi][1]; // xj - xi - double delz = x[gj][2] - x[gi][2]; // xj - xi - double rsq = delx * delx + dely * dely + delz * delz; - if (rsq < rcutsq && rsq > 1e-20) { - rij[nij * 3 + 0] = delx; - rij[nij * 3 + 1] = dely; - rij[nij * 3 + 2] = delz; - idxi[nij] = 0; - ai[nij] = gi; - aj[nij] = gj; - ti[nij] = itype; - tj[nij] = map[atomtypes[gj]] + 1; - nij++; + return M; +} + +void PairPOD::radialbasis(double *rbft, double *rbftx, double *rbfty, double *rbftz, double *rij, int Nij) +{ + // Loop over all neighboring atoms + for (int n=0; n0) && (Nij>0)) { + twobodydescderiv(d2, dd2, Ni, Nij); + } + + if ((nl3 > 0) && (Nij>1)) { + angularbasis(abftm, &abftm[K3], &abftm[2*K3], &abftm[3*K3], Nij); + radialangularsum2(Ni, Nij); + + threebodydesc(d3, Ni); + threebodydescderiv(dd3, Ni, Nij); + + if ((nl23>0) && (Nij>2)) { + fourbodydesc23(d23, d2, d3, Ni); + fourbodydescderiv23(dd23, d2, d3, dd2, dd3, idxi, Ni, Nij); + } + + if ((nl33>0) && (Nij>3)) { + crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni); + crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, idxi, nl33, Ni, Nij); + } + + if ((nl4 > 0) && (Nij>2)) { + if (K4 < K3) { + fourbodydesc(d4, Ni); + fourbodydescderiv(dd4, Ni, Nij); + } + + if ((nl34>0) && (Nij>4)) { + crossdesc(d34, d3, d4, ind34l, ind34r, nl34, Ni); + crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, idxi, nl34, Ni, Nij); + } + + if ((nl44>0) && (Nij>5)) { + crossdesc(d44, d4, d4, ind44l, ind44r, nl44, Ni); + crossdescderiv(dd44, d4, d4, dd4, dd4, ind44l, ind44r, idxi, nl44, Ni, Nij); + } } } +} - numneighsum[0] = 0; - numneighsum[1] = nij; +void PairPOD::environment_descriptors(double *ei, double *cb, double *B, int Ni) +{ + double *P = &abf[0]; + double *cp = &abfx[0]; + double *pca = &abfy[0]; // Ni*nComponents + double *D = &abfz[0]; // Ni*nClusters + double *sumD = &rbf[0]; // Ni + + double *proj = &Proj[0]; + double *cent = &Centroids[0]; + double *cefs = &coefficients[0]; + int *tyai = &typeai[0]; + + int nCom = nComponents; + int nCls = nClusters; + int nDes = Mdesc; + int nCoeff = nCoeffPerElement; + + for (int idx=0; idx 1) { + double *cb = &sumU[0]; + environment_descriptors(ei, cb, bd, Ni); + + int N3 = 3*Nij; + for (int n=0; n( sz ), sizeof(double) * (2), 1, fp); + fwrite( reinterpret_cast( A ), sizeof(double) * (nrows*ncols), 1, fp); + fclose(fp); +} + +void PairPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols) +{ + FILE *fp = fopen(filename.c_str(), "wb"); + int sz[2]; + sz[0] = nrows; + sz[1] = ncols; + fwrite( reinterpret_cast( sz ), sizeof(int) * (2), 1, fp); + fwrite( reinterpret_cast( A ), sizeof(int) * (nrows*ncols), 1, fp); + fclose(fp); +} + +void PairPOD::savedatafordebugging() +{ + saveintmatrix2binfile("podtypeai.bin", typeai, ni, 1); + saveintmatrix2binfile("podnumij.bin", numij, ni+1, 1); + saveintmatrix2binfile("podai.bin", ai, nij, 1); + saveintmatrix2binfile("podaj.bin", aj, nij, 1); + saveintmatrix2binfile("podti.bin", ti, nij, 1); + saveintmatrix2binfile("podtj.bin", tj, nij, 1); + saveintmatrix2binfile("podidxi.bin", idxi, nij, 1); + savematrix2binfile("podrbf.bin", rbf, nrbfmax, nij); + savematrix2binfile("podrbfx.bin", rbfx, nrbfmax, nij); + savematrix2binfile("podrbfy.bin", rbfy, nrbfmax, nij); + savematrix2binfile("podrbfz.bin", rbfz, nrbfmax, nij); + int kmax = (K3 > ns) ? K3 : ns; + savematrix2binfile("podabf.bin", abf, kmax, nij); + savematrix2binfile("podabfx.bin", abfx, kmax, nij); + savematrix2binfile("podabfy.bin", abfy, kmax, nij); + savematrix2binfile("podabfz.bin", abfz, kmax, nij); + savematrix2binfile("podbdd.bin", bdd, 3*nij, Mdesc); + savematrix2binfile("podbd.bin", bd, ni, Mdesc); + savematrix2binfile("podsumU.bin", sumU, nelements * K3 * nrbfmax, ni); + savematrix2binfile("podrij.bin", rij, 3, nij); + savematrix2binfile("podfij.bin", fij, 3, nij); + savematrix2binfile("podei.bin", ei, ni, 1); + error->all(FLERR, "Save data and stop the run for debugging"); +} + diff --git a/src/ML-POD/pair_pod.h b/src/ML-POD/pair_pod.h index 62b6e99f3bd..90600e7e136 100644 --- a/src/ML-POD/pair_pod.h +++ b/src/ML-POD/pair_pod.h @@ -25,7 +25,7 @@ PairStyle(pod,PairPOD); namespace LAMMPS_NS { class PairPOD : public Pair { - public: +public: PairPOD(class LAMMPS *); ~PairPOD() override; void compute(int, int) override; @@ -36,42 +36,122 @@ class PairPOD : public Pair { double init_one(int, int) override; double memory_usage() override; - int dim; // typically 3 - - double *gd; // global linear descriptors - double *gdall; // global linear descriptors summed over all MPI ranks - double *podcoeff; // POD coefficients - double *newpodcoeff; // normalized POD coefficients - double *energycoeff; // energy coefficients - double *forcecoeff; // force coefficients - - void estimate_tempmemory(); - void free_tempmemory(); - void allocate_tempmemory(); - - void lammpsNeighPairs(double **x, int **firstneigh, int *atomtype, int *map, int *numneigh, - int i); - - protected: - int nablockmax; // maximum number of atoms per computation block - int nij; // number of atom pairs - int nijmax; // maximum number of atom pairs - int szd; // size of tmpmem - - class MLPOD *podptr; - - // temporary arrays for computation blocks - - double *tmpmem; // temporary memory - int *typeai; // types of atoms I only - int *numneighsum; // cumulative sum for an array of numbers of neighbors + void lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, double **x, int **firstneigh, int *atomtype, int *map, int *numneigh, + double rcutsq, int i); + void NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int i1, int i2); + void NeighborList(double **x, int **firstneigh, int *atomtype, int *map, int *ilist, int *numneigh, + double rcutsq, int i1, int i2); + void tallyenergy(double *ei, int istart, int Ni); + void tallystress(double *fij, double *rij, int *ai, int *aj, int nlocal, int N); + void tallyforce(double **force, double *fij, int *ai, int *aj, int N); + void divideInterval(int *intervals, int N, int M); + int calculateNumberOfIntervals(int N, int intervalSize); + int numberOfNeighbors(); + + void copy_data_from_pod_class(); + void radialbasis(double *rbft, double *rbftx, double *rbfty, double *rbftz, double *rij, int Nij); + void orthogonalradialbasis(int Nij); + void angularbasis(double *tm, double *tmu, double *tmv, double *tmw, int N); + void radialangularsum(int Ni, int Nij); + void radialangularsum2(int Ni, int Nij); + void twobodydescderiv(double *d2, double *dd2, int Ni, int Nij); + void threebodydesc(double *d3, int Ni); + void threebodydescderiv(double *dd3, int Ni, int Nij); + void extractsumU(int Ni); + void fourbodydesc(double *d4, int Ni); + void fourbodydescderiv(double *dd4, int Ni, int Nij); + void fourbodydesc23(double *d23, double *d2, double *d3, int Ni); + void fourbodydescderiv23(double* dd23, double *d2, double *d3, double *dd2, double *dd3, int *idxi, int Ni, int N); + void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12, int Ni); + void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2, + int *ind1, int *ind2, int *idxi, int n12, int Ni, int Nij); + void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, + int n12, int nd1, int nd2, int Ni); + void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2, + int *ind1, int *ind2, int *idxi, int n12, int nd1, int nd2, int Ni, int Nij); + void blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int Nij); + void environment_descriptors(double *ei, double *cb, double *B, int Ni); + void blockatomenergyforce(double *ei, double *fij, int Ni, int Nij); + + void savematrix2binfile(std::string filename, double *A, int nrows, int ncols); + void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols); + void savedatafordebugging(); + +protected: + class EAPOD *fastpodptr; + virtual void allocate(); + void grow_atoms(int Ni); + void grow_pairs(int Nij); + + int atomBlockSize; // size of each atom block + int nAtomBlocks; // number of atoms blocks + int atomBlocks[101]; // atom blocks + + int ni; // total number of atoms i + int nij; // total number of pairs (i,j) + int nimax; // maximum number of atoms i + int nijmax; // maximum number of pairs (i,j) + + int nelements; // number of elements + int onebody; // one-body descriptors + int besseldegree; // degree of Bessel functions + int inversedegree; // degree of inverse functions + int nbesselpars; // number of Bessel parameters + int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters) + int ns; // number of snapshots for radial basis functions + int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, n23, n32, nl; // number of local descriptors + int nrbf2, nrbf3, nrbf4, nrbfmax; // number of radial basis functions + int nabf3, nabf4; // number of angular basis functions + int K3, K4, Q4; // number of monomials + + // environmental variables + int nClusters; // number of environment clusters + int nComponents; // number of principal components + int Mdesc; // number of base descriptors + + double rin; // inner cut-off radius + double rcut; // outer cut-off radius + double rmax; // rcut - rin + double *rij; // (xj - xi) for all pairs (I, J) - int *idxi; // storing linear indices for all pairs (I, J) + double *fij; // force for all pairs (I, J) + double *ei; // energy for each atom I + int *typeai; // types of atoms I only + int *numij; // number of pairs (I, J) for each atom I + int *idxi; // storing linear indices of atom I for all pairs (I, J) int *ai; // IDs of atoms I for all pairs (I, J) int *aj; // IDs of atoms J for all pairs (I, J) int *ti; // types of atoms I for all pairs (I, J) - int *tj; // types of atoms J for all pairs (I, J) - + int *tj; // types of atoms J for all pairs (I, J) + + double besselparams[3]; + double *Phi ; // eigenvectors matrix ns x ns + double *rbf; // radial basis functions nij x nrbfmax + double *rbfx; // x-derivatives of radial basis functions nij x nrbfmax + double *rbfy; // y-derivatives of radial basis functions nij x nrbfmax + double *rbfz; // z-derivatives of radial basis functions nij x nrbfmax + double *abf; // angular basis functions nij x K3 + double *abfx; // x-derivatives of angular basis functions nij x K3 + double *abfy; // y-derivatives of angular basis functions nij x K3 + double *abfz; // z-derivatives of angular basis functions nij x K3 + double *abftm ; // angular basis functions 4 x K3 + double *sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements + double *Proj; // PCA Projection matrix + double *Centroids; // centroids of the clusters + double *bd; // base descriptors ni x Mdesc + double *bdd; // base descriptors derivatives 3 x nij x Mdesc + double *pd; // environment probability descriptors ni x nClusters + double *pdd; // environment probability descriptors derivatives 3 x nij x nClusters + double *coefficients; // coefficients nCoeffPerElement x nelements + int *pq3, *pn3, *pc3; // arrays to compute 3-body angular basis functions + int *pa4, *pb4, *pc4; // arrays to compute 4-body angular basis functions + int *ind23; // n23 + int *ind32; // n32 + int *ind33l, *ind33r; // nl33 + int *ind34l, *ind34r; // nl34 + int *ind44l, *ind44r; // nl44 + int *elemindex; + bool peratom_warn; // print warning about missing per-atom energies or stresses };