diff --git a/src/KOKKOS/pair_pod_kokkos.cpp b/src/KOKKOS/pair_pod_kokkos.cpp
new file mode 100644
index 00000000000..31962f6a2ff
--- /dev/null
+++ b/src/KOKKOS/pair_pod_kokkos.cpp
@@ -0,0 +1,1785 @@
+// clang-format off
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   aE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "eapod.h" 
+#include "pair_pod_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+#include "neigh_request.h"
+
+#include <cstring>
+#include <chrono>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using MathSpecial::powint;
+
+enum{FS,FS_SHIFTEDSCALED};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairPODKokkos<DeviceType>::PairPODKokkos(LAMMPS *lmp) : PairPOD(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+
+  ni = 0;
+  nimax = 0;
+  nij = 0;
+  nijmax = 0;  
+  atomBlockSize = 2048;
+  nAtomBlocks = 0;
+  timing = 1;
+  for (int i=0; i<100; i++) comptime[i] = 0;
+  
+  host_flag = (execution_space == Host);
+}
+
+/* ----------------------------------------------------------------------
+   check if allocated, since class can be destructed when incomplete
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairPODKokkos<DeviceType>::~PairPODKokkos()
+{  
+  if (timing == 1) {
+    printf("\n begin timing \n");
+    for (int i=0; i<10; i++) printf("%g  ", comptime[i]); 
+    printf("\n");
+    for (int i=10; i<20; i++) printf("%g  ", comptime[i]); 
+    printf("\n");
+    for (int i=20; i<30; i++) printf("%g  ", comptime[i]); 
+    printf("\n end timing \n");
+  }    
+  
+  if (copymode) return;
+
+  memoryKK->destroy_kokkos(k_eatom,eatom);
+  memoryKK->destroy_kokkos(k_vatom,vatom);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::init_style()
+{
+  if (host_flag) {
+    if (lmp->kokkos->nthreads > 1)
+      error->all(FLERR,"Pair style pod/kk can currently only run on a single "
+                         "CPU thread");
+
+    PairPOD::init_style();
+    return;
+  }
+
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style POD requires atom IDs");
+  if (force->newton_pair == 0) error->all(FLERR, "Pair style POD requires newton pair on");
+
+  // neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+
+  auto request = neighbor->add_request(this, NeighConst::REQ_FULL);
+  request->set_kokkos_host(std::is_same_v<DeviceType,LMPHostType> &&
+                           !std::is_same_v<DeviceType,LMPDeviceType>);
+  request->set_kokkos_device(std::is_same_v<DeviceType,LMPDeviceType>);
+  if (neighflag == FULL)
+    error->all(FLERR,"Must use half neighbor list style with pair pace/kk");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairPODKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairPOD::init_one(i,j);
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  if (narg < 7) utils::missing_cmd_args(FLERR, "pair_coeff", error);
+    
+  PairPOD::coeff(narg,arg); // create a PairPOD object
+    
+  copy_from_pod_class(PairPOD::fastpodptr); // copy parameters and arrays from pod class 
+
+  int n = atom->ntypes + 1;
+  MemKK::realloc_kokkos(d_map, "pair_pod:map", n);
+
+  MemKK::realloc_kokkos(k_cutsq, "pair_pod:cutsq", n, n);
+  d_cutsq = k_cutsq.template view<DeviceType>();
+
+  MemKK::realloc_kokkos(k_scale, "pair_pod:scale", n, n);
+  d_scale = k_scale.template view<DeviceType>();
+  
+  // Set up element lists  
+  
+  auto h_map = Kokkos::create_mirror_view(d_map);
+  
+  for (int i = 1; i <= atom->ntypes; i++)
+    h_map(i) = map[i];
+  
+  Kokkos::deep_copy(d_map,h_map);      
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::allocate()
+{
+  PairPOD::allocate();
+}
+
+template<class DeviceType>
+struct FindMaxNumNeighs {
+  typedef DeviceType device_type;
+  NeighListKokkos<DeviceType> k_list;
+
+  FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}
+  ~FindMaxNumNeighs() {k_list.copymode = 1;}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const int& ii, int& max_neighs) const {
+    const int i = k_list.d_ilist[ii];
+    const int num_neighs = k_list.d_numneigh[i];
+    if (max_neighs<num_neighs) max_neighs = num_neighs;
+  }
+};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+//   if (host_flag) {
+//     atomKK->sync(Host,X_MASK|TYPE_MASK);
+//     PairPOD::compute(eflag_in,vflag_in);
+//     atomKK->modified(Host,F_MASK);
+//     return;
+//   }
+  
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+  
+  // reallocate per-atom arrays if necessary
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  copymode = 1;  
+  int newton_pair = force->newton_pair;
+  if (newton_pair == false)
+    error->all(FLERR,"PairPODKokkos requires 'newton on'");
+  
+  atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK);    
+  x = atomKK->k_x.view<DeviceType>();    
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  //k_cutsq.template sync<DeviceType>();
+ 
+  maxneigh = 0;
+  if (host_flag) {        
+    inum = list->inum;    
+    d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("pair_pod:numneigh",inum);
+    for (int i=0; i<inum; i++) d_numneigh(i) = list->numneigh[i];    
+    d_ilist = typename ArrayTypes<DeviceType>::t_int_1d("pair_pod:ilist",inum);
+    for (int i=0; i<inum; i++) d_ilist(i) = list->ilist[i];    
+    
+    int maxn = 0;
+    for (int i=0; i<inum; i++) 
+      if (maxn < list->numneigh[i]) maxn = list->numneigh[i];    
+    MemoryKokkos::realloc_kokkos(d_neighbors,"neighlist:neighbors",inum,maxn);
+    for (int i=0; i<inum; i++) {
+      int gi = list->ilist[i];
+      int m = list->numneigh[gi];   
+      if (maxneigh<m) maxneigh = m;
+      for (int l = 0; l < m; l++) {           // loop over each atom around atom i        
+        d_neighbors(gi, l) = list->firstneigh[gi][l];
+      }
+    }    
+  }
+  else {
+    NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);  
+    d_numneigh = k_list->d_numneigh; 
+    d_neighbors = k_list->d_neighbors;
+    d_ilist = k_list->d_ilist;
+    inum = list->inum;
+    int maxneighs;
+    Kokkos::parallel_reduce("PairPODKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Max<int>(maxneighs));
+    maxneigh = maxneighs;
+  }
+
+  auto begin = std::chrono::high_resolution_clock::now(); 
+  auto end = std::chrono::high_resolution_clock::now();
+              
+  // determine the number of atom blocks and divide atoms into blocks
+  nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize);
+  if (nAtomBlocks > 100) nAtomBlocks = 100; 
+  divideInterval(atomBlocks, inum, nAtomBlocks);
+      
+  int nmax = 0;
+  for (int block=0; block<nAtomBlocks; block++) {    
+    int n = atomBlocks[block+1] - atomBlocks[block]; 
+    if (nmax < n) nmax = n;
+  }        
+  grow_atoms(nmax); 
+  grow_pairs(nmax*maxneigh); 
+  
+  rcutsq = rcut*rcut;  
+  for (int block=0; block<nAtomBlocks; block++) {
+    int gi1 = atomBlocks[block]-1;
+    int gi2 = atomBlocks[block+1]-1;
+    ni = gi2 - gi1; // total number of atoms in the current atom block
+    
+    begin = std::chrono::high_resolution_clock::now();               
+    // calculate the total number of pairs (i,j) in the current atom block        
+    nij = NeighborCount(numij, rcutsq, gi1, ni);                           
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[1] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;        
+                           
+    begin = std::chrono::high_resolution_clock::now();     
+    // obtain the neighbors within rcut     
+    NeighborList(rij, numij, typeai, idxi, ai, aj, ti, tj, rcutsq, gi1, ni);              
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[2] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;        
+        
+    // compute atomic energy and force for the current atom block
+    begin = std::chrono::high_resolution_clock::now();       
+    blockatomenergyforce(ni, nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[0] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;        
+
+    // tally atomic energy to global energy
+    tallyenergy(gi1, ni);
+        
+    // tally atomic force to global force
+    tallyforce(nij);
+    
+    // tally atomic stress
+    if (vflag) {
+      tallystress(nij);
+    }    
+    //savedatafordebugging();
+  }    
+    
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  atomKK->modified(execution_space,F_MASK);
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::copy_from_pod_class(EAPOD *podptr) 
+{
+  nelements = podptr->nelements; // number of elements 
+  onebody = podptr->onebody;   // one-body descriptors
+  besseldegree = podptr->besseldegree; // degree of Bessel functions
+  inversedegree = podptr->inversedegree; // degree of inverse functions
+  nbesselpars = podptr->nbesselpars;  // number of Bessel parameters
+  nCoeffPerElement = podptr->nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters)
+  ns = podptr->ns;      // number of snapshots for radial basis functions
+  nl1 = podptr->nl1;  // number of one-body descriptors
+  nl2 = podptr->nl2;  // number of two-body descriptors
+  nl3 = podptr->nl3;  // number of three-body descriptors
+  nl4 = podptr->nl4;  // number of four-body descriptors
+  nl23 = podptr->nl23; // number of two-body x three-body descriptors
+  nl33 = podptr->nl33; // number of three-body x three-body descriptors
+  nl34 = podptr->nl34; // number of three-body x four-body descriptors
+  nl44 = podptr->nl44; // number of four-body x four-body descriptors
+  n23 = podptr->n23;
+  n32 = podptr->n32;
+  nl = podptr->nl;   // number of local descriptors
+  nrbf2 = podptr->nrbf2;
+  nrbf3 = podptr->nrbf3;
+  nrbf4 = podptr->nrbf4;
+  nrbfmax = podptr->nrbfmax; // number of radial basis functions
+  nabf3 = podptr->nabf3;     // number of three-body angular basis functions
+  nabf4 = podptr->nabf4;     // number of four-body angular basis functions  
+  K3 = podptr->K3;           // number of three-body monomials
+  K4 = podptr->K4;           // number of four-body monomials
+  Q4 = podptr->Q4;           // number of four-body monomial coefficients
+  nClusters = podptr->nClusters; // number of environment clusters
+  nComponents = podptr->nComponents; // number of principal components
+  Mdesc = podptr->Mdesc; // number of base descriptors 
+
+  rin = podptr->rin;
+  rcut = podptr->rcut;
+  rmax = rcut - rin;  
+    
+  MemKK::realloc_kokkos(besselparams, "pair_pod:besselparams", 3); 
+  auto h_besselparams = Kokkos::create_mirror_view(besselparams);  
+  h_besselparams[0] = podptr->besselparams[0];
+  h_besselparams[1] = podptr->besselparams[1];
+  h_besselparams[2] = podptr->besselparams[2];   
+  Kokkos::deep_copy(besselparams, h_besselparams);    
+   
+  MemKK::realloc_kokkos(elemindex, "pair_pod:elemindex", nelements*nelements);
+  auto h_elemindex = Kokkos::create_mirror_view(elemindex);
+  for (int i=0; i<nelements*nelements; i++) h_elemindex[i] = podptr->elemindex[i];
+  Kokkos::deep_copy(elemindex, h_elemindex);
+  
+  MemKK::realloc_kokkos(Phi, "pair_pod:Phi", ns*ns);
+  auto h_Phi = Kokkos::create_mirror_view(Phi);
+  for (int i=0; i<ns*ns; i++) h_Phi[i] = podptr->Phi[i];
+  Kokkos::deep_copy(Phi, h_Phi);
+
+  MemKK::realloc_kokkos(coefficients, "pair_pod:coefficients", nCoeffPerElement * nelements);
+  auto h_coefficients = Kokkos::create_mirror_view(coefficients);
+  for (int i=0; i<nCoeffPerElement * nelements; i++) h_coefficients[i] = podptr->coeff[i];
+  Kokkos::deep_copy(coefficients, h_coefficients);
+
+  if (nClusters > 1) {
+    MemKK::realloc_kokkos(Proj, "pair_pod:Proj",  Mdesc * nComponents * nelements);
+    auto h_Proj = Kokkos::create_mirror_view(Proj);
+    for (int i=0; i<Mdesc * nComponents * nelements; i++) h_Proj[i] = podptr->Proj[i];
+    Kokkos::deep_copy(Proj, h_Proj);
+
+    MemKK::realloc_kokkos(Centroids, "pair_pod:Centroids",  nClusters * nComponents * nelements);
+    auto h_Centroids = Kokkos::create_mirror_view(Centroids);
+    for (int i=0; i<nClusters * nComponents * nelements; i++) h_Centroids[i] = podptr->Centroids[i];
+    Kokkos::deep_copy(Centroids, h_Centroids);    
+  }
+  
+  MemKK::realloc_kokkos(pn3, "pair_pod:pn3", nabf3+1); // array stores the number of monomials for each degree
+  MemKK::realloc_kokkos(pq3, "pair_pod:pq3", K3*2); // array needed for the recursive computation of the angular basis functions
+  MemKK::realloc_kokkos(pc3, "pair_pod:pc3", K3);   // array needed for the computation of the three-body descriptors
+  MemKK::realloc_kokkos(pa4, "pair_pod:pa4", nabf4+1); // this array is a subset of the array {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345}
+  MemKK::realloc_kokkos(pb4, "pair_pod:pb4", Q4*3); // array stores the indices of the monomials needed for the computation of the angular basis functions
+  MemKK::realloc_kokkos(pc4, "pair_pod:pc4", Q4);   // array of monomial coefficients needed for the computation of the four-body descriptors  
+
+  auto h_pn3 = Kokkos::create_mirror_view(pn3);
+  for (int i=0; i<nabf3+1; i++) h_pn3[i] = podptr->pn3[i];
+  Kokkos::deep_copy(pn3, h_pn3);
+
+  auto h_pq3 = Kokkos::create_mirror_view(pq3);
+  for (int i = 0; i < K3*2; i++) h_pq3[i] = podptr->pq3[i];
+  Kokkos::deep_copy(pq3, h_pq3);
+
+  auto h_pc3 = Kokkos::create_mirror_view(pc3);
+  for (int i = 0; i < K3; i++) h_pc3[i] = podptr->pc3[i];
+  Kokkos::deep_copy(pc3, h_pc3);
+
+  auto h_pa4 = Kokkos::create_mirror_view(pa4);
+  for (int i = 0; i < nabf4+1; i++) h_pa4[i] = podptr->pa4[i];
+  Kokkos::deep_copy(pa4, h_pa4);
+
+  auto h_pb4 = Kokkos::create_mirror_view(pb4);
+  for (int i = 0; i < Q4*3; i++) h_pb4[i] = podptr->pb4[i];
+  Kokkos::deep_copy(pb4, h_pb4);
+
+  auto h_pc4 = Kokkos::create_mirror_view(pc4);
+  for (int i = 0; i < Q4; i++) h_pc4[i] = podptr->pc4[i];
+  Kokkos::deep_copy(pc4, h_pc4);
+
+  MemKK::realloc_kokkos(ind23, "pair_pod:ind23", n23);
+  MemKK::realloc_kokkos(ind32, "pair_pod:ind32", n32);
+  MemKK::realloc_kokkos(ind33l, "pair_pod:ind33l", nl33);
+  MemKK::realloc_kokkos(ind33r, "pair_pod:ind33r", nl33);
+  MemKK::realloc_kokkos(ind34l, "pair_pod:ind34l", nl34);
+  MemKK::realloc_kokkos(ind34r, "pair_pod:ind34r", nl34);
+  MemKK::realloc_kokkos(ind44l, "pair_pod:ind44l", nl44);
+  MemKK::realloc_kokkos(ind44r, "pair_pod:ind44r", nl44);
+  
+  auto h_ind23 = Kokkos::create_mirror_view(ind23);
+  for (int i = 0; i < n23; i++) h_ind23[i] = podptr->ind23[i];
+  Kokkos::deep_copy(ind23, h_ind23);
+
+  auto h_ind32 = Kokkos::create_mirror_view(ind32);
+  for (int i = 0; i < n32; i++) h_ind32[i] = podptr->ind32[i];
+  Kokkos::deep_copy(ind32, h_ind32);
+
+  auto h_ind33l = Kokkos::create_mirror_view(ind33l);
+  for (int i = 0; i < nl33; i++) h_ind33l[i] = podptr->ind33l[i];
+  Kokkos::deep_copy(ind33l, h_ind33l);
+
+  auto h_ind33r = Kokkos::create_mirror_view(ind33r);
+  for (int i = 0; i < nl33; i++) h_ind33r[i] = podptr->ind33r[i];
+  Kokkos::deep_copy(ind33r, h_ind33r);
+
+  auto h_ind34l = Kokkos::create_mirror_view(ind34l);
+  for (int i = 0; i < nl34; i++) h_ind34l[i] = podptr->ind34l[i];
+  Kokkos::deep_copy(ind34l, h_ind34l);
+
+  auto h_ind34r = Kokkos::create_mirror_view(ind34r);
+  for (int i = 0; i < nl34; i++) h_ind34r[i] = podptr->ind34r[i];
+  Kokkos::deep_copy(ind34r, h_ind34r);
+
+  auto h_ind44l = Kokkos::create_mirror_view(ind44l);
+  for (int i = 0; i < nl44; i++) h_ind44l[i] = podptr->ind44l[i];
+  Kokkos::deep_copy(ind44l, h_ind44l);
+
+  auto h_ind44r = Kokkos::create_mirror_view(ind44r);
+  for (int i = 0; i < nl44; i++) h_ind44r[i] = podptr->ind44r[i];
+  Kokkos::deep_copy(ind44r, h_ind44r); 
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::divideInterval(int *intervals, int N, int M) 
+{
+  int intervalSize = N / M; // Basic size of each interval
+  int remainder = N % M;    // Remainder to distribute
+  intervals[0] = 1;         // Start of the first interval
+  for (int i = 1; i <= M; i++) {
+    intervals[i] = intervals[i - 1] + intervalSize + (remainder > 0 ? 1 : 0);
+    if (remainder > 0) {
+      remainder--;
+    }
+  }  
+}
+
+template<class DeviceType>
+int PairPODKokkos<DeviceType>::calculateNumberOfIntervals(int N, int intervalSize) 
+{
+  if (intervalSize <= 0) {
+    printf("Interval size must be a positive integer.\n");
+    return -1;
+  }
+
+  int M = N / intervalSize;
+  if (N % intervalSize != 0) {
+    M++; // Add an additional interval to cover the remainder
+  }
+
+  return M;
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::grow_atoms(int Ni)
+{
+  if (Ni > nimax) {
+    nimax = Ni;
+    MemKK::realloc_kokkos(numij, "pair_pod:numij", nimax+1);        
+    MemKK::realloc_kokkos(ei, "pair_pod:ei", nimax);
+    MemKK::realloc_kokkos(typeai, "pair_pod:typeai", nimax);
+    int n = nimax * nelements * K3 * nrbfmax;
+    if (nClusters>1) n = (n > nimax*Mdesc) ? n : nimax*Mdesc;
+    MemKK::realloc_kokkos(sumU, "pair_pod:sumU", n);
+    MemKK::realloc_kokkos(bd, "pair_pod:bd", nimax * Mdesc);
+    
+    Kokkos::deep_copy(numij, 0);       
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::grow_pairs(int Nij)
+{
+  if (Nij > nijmax) {
+    nijmax = Nij;
+    MemKK::realloc_kokkos(rij, "pair_pod:r_ij", 3 * nijmax);
+    MemKK::realloc_kokkos(fij, "pair_pod:f_ij", 3 * nijmax);  
+    MemKK::realloc_kokkos(idxi, "pair_pod:idxi", nijmax);
+    MemKK::realloc_kokkos(ai, "pair_pod:ai", nijmax);
+    MemKK::realloc_kokkos(aj, "pair_pod:aj", nijmax);
+    MemKK::realloc_kokkos(ti, "pair_pod:ti", nijmax);
+    MemKK::realloc_kokkos(tj, "pair_pod:tj", nijmax);
+    MemKK::realloc_kokkos(rbf, "pair_pod:rbf", nijmax * nrbfmax);
+    MemKK::realloc_kokkos(rbfx, "pair_pod:rbfx", nijmax * nrbfmax);
+    MemKK::realloc_kokkos(rbfy, "pair_pod:rbfy", nijmax * nrbfmax);
+    MemKK::realloc_kokkos(rbfz, "pair_pod:rbfz", nijmax * nrbfmax);
+    int kmax = (K3 > ns) ? K3 : ns;
+    MemKK::realloc_kokkos(abf, "pair_pod:abf", nijmax * kmax);
+    MemKK::realloc_kokkos(abfx, "pair_pod:abfx", nijmax * kmax);
+    MemKK::realloc_kokkos(abfy, "pair_pod:abfy", nijmax * kmax);
+    MemKK::realloc_kokkos(abfz, "pair_pod:abfz", nijmax * kmax);
+    MemKK::realloc_kokkos(bdd, "pair_pod:bdd", 3 * nijmax * Mdesc);
+  }
+}
+
+template<class DeviceType>
+int PairPODKokkos<DeviceType>::NeighborCount(t_pod_1i l_numij, double l_rcutsq, int gi1, int Ni)
+{
+  // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for   
+  auto l_ilist = d_ilist;
+  auto l_x = x;
+  auto l_numneigh = d_numneigh;
+  auto l_neighbors = d_neighbors;
+
+  // compute number of pairs for each atom i
+  Kokkos::parallel_for("NeighborCount", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
+    int i = team.league_rank();
+    int gi = l_ilist(gi1 + i);
+    double xi0 = l_x(gi, 0);    
+    double xi1 = l_x(gi, 1);    
+    double xi2 = l_x(gi, 2);        
+    int jnum = l_numneigh(gi);   
+    int ncount = 0;
+    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
+        [&] (const int jj, int& count) {
+      int j = l_neighbors(gi,jj);
+      j &= NEIGHMASK;
+      double delx = xi0 - l_x(j,0);
+      double dely = xi1 - l_x(j,1);
+      double delz = xi2 - l_x(j,2);
+      double rsq = delx*delx + dely*dely + delz*delz;
+      if (rsq < l_rcutsq) count++;    
+    },ncount);
+
+    l_numij(i+1) = ncount;    
+  });
+      
+  // accumalative sum
+  Kokkos::parallel_scan("InclusivePrefixSum", Ni + 1, KOKKOS_LAMBDA(int i, int& update, const bool final) {
+    if (i > 0) { 
+      update += l_numij(i);
+      if (final) {
+        l_numij(i) = update;
+      }
+    }
+  });      
+
+  int total_neighbors = 0;
+  Kokkos::deep_copy(Kokkos::View<int,Kokkos::HostSpace>(&total_neighbors), Kokkos::subview(l_numij, Ni));  
+  
+  return total_neighbors;
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,  t_pod_1i l_typeai, 
+  t_pod_1i l_idxi, t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni)
+{  
+  // create local shadow views for KOKKOS_LAMBDA to pass them into parallel_for   
+  auto l_ilist = d_ilist;
+  auto l_x = x;
+  auto l_numneigh = d_numneigh;
+  auto l_neighbors = d_neighbors;
+  auto l_map = d_map; 
+  auto l_type = type;     
+  
+  Kokkos::parallel_for("NeighborList", Kokkos::TeamPolicy<>(Ni, Kokkos::AUTO), KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
+    int i = team.league_rank();
+    int gi = l_ilist(gi1 + i);
+    double xi0 = l_x(gi, 0);    
+    double xi1 = l_x(gi, 1);    
+    double xi2 = l_x(gi, 2);        
+    int itype = l_map(l_type(gi)) + 1; //map[atomtypes[gi]] + 1;
+    l_typeai(i) = itype;        
+    int jnum = l_numneigh(gi);   
+    int nij0 = l_numij(i);    
+    Kokkos::parallel_scan(Kokkos::TeamThreadRange(team,jnum),
+        [&] (const int jj, int& offset, bool final) {
+      int gj = l_neighbors(gi,jj);
+      gj &= NEIGHMASK;
+      double delx = l_x(gj,0) - xi0;
+      double dely = l_x(gj,1) - xi1;
+      double delz = l_x(gj,2) - xi2;
+      double rsq = delx*delx + dely*dely + delz*delz;
+      if (rsq >= l_rcutsq) return;
+      if (final) {
+        int nij1 = nij0 + offset;
+        l_rij(nij1 * 3 + 0) = delx;
+        l_rij(nij1 * 3 + 1) = dely;
+        l_rij(nij1 * 3 + 2) = delz;
+        l_idxi(nij1) = i;
+        l_ai(nij1) = gi;
+        l_aj(nij1) = gj;
+        l_ti(nij1) = itype;
+        l_tj(nij1) = l_map(l_type(gj)) + 1; //map[atomtypes[gj)) + 1;      
+      }
+      offset++;
+    });    
+  });    
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, 
+    t_pod_1d l_rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, 
+    int l_inversedegree, int l_nbesselpars, int l_ns,  int Nij) 
+{
+  Kokkos::parallel_for("ComputeRadialBasis", Nij, KOKKOS_LAMBDA(int n) {
+    double xij1 = l_rij(0+3*n);
+    double xij2 = l_rij(1+3*n);
+    double xij3 = l_rij(2+3*n);
+
+    double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3);
+    double dr1 = xij1/dij;
+    double dr2 = xij2/dij;
+    double dr3 = xij3/dij;
+
+    double r = dij - l_rin;
+    double y = r/l_rmax;
+    double y2 = y*y;
+
+    double y3 = 1.0 - y2*y;
+    double y4 = y3*y3 + 1e-6;
+    double y5 = sqrt(y4);
+    double y6 = exp(-1.0/y5);
+    double y7 = y4*sqrt(y4);
+
+    // Calculate the final cutoff function as y6/exp(-1)
+    double fcut = y6/exp(-1.0);
+
+    // Calculate the derivative of the final cutoff function
+    double dfcut = ((3.0/(l_rmax*exp(-1.0)))*(y2)*y6*(y*y2 - 1.0))/y7;
+
+    // Calculate fcut/r, fcut/r^2, and dfcut/r
+    double f1 = fcut/r;
+    double f2 = f1/r;
+    double df1 = dfcut/r;
+
+    double alpha = l_besselparams(0);
+    double t1 = (1.0-exp(-alpha));
+    double t2 = exp(-alpha*r/l_rmax);
+    double x0 =  (1.0 - t2)/t1;
+    double dx0 = (alpha/l_rmax)*t2/t1;
+
+    alpha = l_besselparams(1);
+    t1 = (1.0-exp(-alpha));
+    t2 = exp(-alpha*r/l_rmax);
+    double x1 =  (1.0 - t2)/t1;
+    double dx1 = (alpha/l_rmax)*t2/t1;
+
+    alpha = l_besselparams(2);
+    t1 = (1.0-exp(-alpha));
+    t2 = exp(-alpha*r/l_rmax);
+    double x2 =  (1.0 - t2)/t1;
+    double dx2 = (alpha/l_rmax)*t2/t1;
+
+    for (int i=0; i<l_besseldegree; i++) {
+      double a = (i+1)*MY_PI;
+      double b = (sqrt(2.0/(l_rmax))/(i+1));
+      double af1 = a*f1;
+
+      double sinax = sin(a*x0);
+      int idxni = n + Nij*i;
+      rbft(idxni) = b*f1*sinax;
+      double drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x0)*dx0);
+      rbftx(idxni) = drbftdr*dr1;
+      rbfty(idxni) = drbftdr*dr2;
+      rbftz(idxni) = drbftdr*dr3;
+
+      sinax = sin(a*x1);
+      idxni = n + Nij*i + Nij*l_besseldegree*1;
+      rbft(idxni) = b*f1*sinax;
+      drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x1)*dx1);
+      rbftx(idxni) = drbftdr*dr1;
+      rbfty(idxni) = drbftdr*dr2;
+      rbftz(idxni) = drbftdr*dr3;
+
+      sinax = sin(a*x2);
+      idxni = n + Nij*i + Nij*l_besseldegree*2;
+      rbft(idxni) = b*f1*sinax;
+      drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x2)*dx2);
+      rbftx(idxni) = drbftdr*dr1;
+      rbfty(idxni) = drbftdr*dr2;
+      rbftz(idxni) = drbftdr*dr3;
+    }
+  
+    // Calculate fcut/dij and dfcut/dij
+    f1 = fcut/dij;
+    double a = 1.0;
+    for (int i=0; i<l_inversedegree; i++) {
+      int p = l_besseldegree*l_nbesselpars + i;
+      int idxni = n + Nij*p;      
+      a = a*dij;
+
+      rbft(idxni) = fcut/a;
+
+      double drbftdr = (dfcut - (i+1.0)*f1)/a;
+      rbftx(idxni) = drbftdr*dr1;
+      rbfty(idxni) = drbftdr*dr2;
+      rbftz(idxni) = drbftdr*dr3;
+    }
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2) 
+{
+    Kokkos::parallel_for("MatrixMultiply", r1 * c2, KOKKOS_LAMBDA(int idx) {
+        int j = idx / r1;  // Calculate column index
+        int i = idx % r1;  // Calculate row index
+        double sum = 0.0;
+        for (int k = 0; k < c1; ++k) {
+            sum += a(i + r1*k) * b(k + c1*j);  // Manually calculate the 1D index
+        }
+        c(i + r1*j) = sum;  // Manually calculate the 1D index for c
+    });        
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
+        t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N) 
+{  
+  Kokkos::parallel_for("AngularBasis", N, KOKKOS_LAMBDA(int j) {
+    double x = l_rij(j*3 + 0);
+    double y = l_rij(j*3 + 1);
+    double z = l_rij(j*3 + 2);
+
+    double xx = x*x;
+    double yy = y*y;
+    double zz = z*z;
+    double xy = x*y;
+    double xz = x*z;
+    double yz = y*z;
+
+    double dij = sqrt(xx + yy + zz);
+    const double u = x / dij;
+    const double v = y / dij;
+    const double w = z / dij;
+
+    double dij3 = dij * dij * dij;
+    const double dudx = (yy + zz) / dij3;
+    const double dudy = -xy / dij3;
+    const double dudz = -xz / dij3;
+
+    const double dvdx = -xy / dij3;
+    const double dvdy = (xx + zz) / dij3;
+    const double dvdz = -yz / dij3;
+
+    const double dwdx = -xz / dij3;
+    const double dwdy = -yz / dij3;
+    const double dwdz = (xx + yy) / dij3;
+
+    int idxa = j;
+    l_abf(idxa) = 1.0;
+    l_abfx(idxa) = 0.0;
+    l_abfy(idxa) = 0.0;
+    l_abfz(idxa) = 0.0;    
+    
+    // Loop over all angular basis functions
+    for (int n=1; n<l_K3; n++) {
+      // Get indices for angular basis function
+      int d = l_pq3(n + l_K3);      
+      int mj = j + N*(l_pq3(n)-1);
+      idxa = j + N*n;
+      // Calculate angular basis function and its derivatives using recursion relation
+      if (d==1) {
+        l_abf(idxa) = l_abf(mj)*u;
+        l_abfx(idxa) = l_abfx(mj)*u + l_abf(mj);
+        l_abfy(idxa) = l_abfy(mj)*u;
+        l_abfz(idxa) = l_abfz(mj)*u;
+      }
+      else if (d==2) {
+        l_abf(idxa) = l_abf(mj)*v;
+        l_abfx(idxa) = l_abfx(mj)*v;
+        l_abfy(idxa) = l_abfy(mj)*v + l_abf(mj);
+        l_abfz(idxa) = l_abfz(mj)*v;
+      }
+      else if (d==3) {
+        l_abf(idxa) = l_abf(mj)*w;
+        l_abfx(idxa) = l_abfx(mj)*w;
+        l_abfy(idxa) = l_abfy(mj)*w;
+        l_abfz(idxa) = l_abfz(mj)*w + l_abf(mj);
+      }            
+    }
+    for (int n=1; n<l_K3; n++) {      
+      idxa = j + N*n;
+      x = l_abfx(idxa);
+      y = l_abfy(idxa);
+      z = l_abfz(idxa);
+      l_abfx(idxa) = x*dudx + y*dvdx + z*dwdx;
+      l_abfy(idxa) = x*dudy + y*dvdy + z*dwdy;
+      l_abfz(idxa) = x*dudz + y*dvdz + z*dwdz;      
+    }
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, 
+    t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij) 
+{  
+  int totalIterations = l_nrbf3 * l_K3 * Ni;
+  if (l_nelements==1) {
+    Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int k = idx % l_K3;
+      int temp = idx / l_K3;
+      int m = temp % l_nrbf3;
+      int i = temp / l_nrbf3;
+      int kmi = k + l_K3*m + l_K3*l_nrbf3*i;
+
+      int start = l_numij(i);
+      int nj = l_numij(i+1)-start;    
+      double sum=0.0;
+      for (int j=0; j<nj; j++) {
+        int n = start + j;
+        sum += l_rbf(n + Nij * m) * l_abf(n + Nij * k);    
+      }    
+      l_sumU(kmi) = sum;
+    });    
+  }
+  else {
+    Kokkos::parallel_for("RadialAngularSum", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int k = idx % l_K3;
+      int temp = idx / l_K3;
+      int m = temp % l_nrbf3;
+      int i = temp / l_nrbf3;
+      int kmi = l_nelements*k + l_nelements*l_K3*m + l_nelements*l_K3*l_nrbf3*i;
+      int start = l_numij(i);
+      int nj = l_numij(i+1)-start;    
+
+      double tm[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+      for (int j=0; j<nj; j++) {
+        int n = start + j;
+        int ia = n + Nij * k;
+        int ib = n + Nij * m;            
+        int tn = l_tj(n) - 1; // offset the atom type by 1, since atomtype is 1-based
+        tm[tn] += l_rbf(ib) * l_abf(ia);    
+      }        
+      for (int j=0; j<l_nelements; j++) l_sumU(j + kmi) = tm[j];
+    });
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::twobodydescderiv(t_pod_1d d2, t_pod_1d dd2, t_pod_1d l_rbf, t_pod_1d l_rbfx, 
+      t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbfmax, int l_nrbf2, const int Ni, const int Nij) 
+{
+  int totalIterations = l_nrbf2 * Nij;    
+  Kokkos::parallel_for("TwoBodyDescDeriv", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int n = idx / l_nrbf2; // pair index
+    int m = idx % l_nrbf2; // rbd index
+    int i2 = n + Nij * m; // Index of the radial basis function for atom n and RBF m
+    int i1 = 3*(n + Nij * m + Nij * l_nrbf2 * (l_tj(n) - 1)); // Index of the descriptor for atom n, RBF m, and atom type tj(n)
+    Kokkos::atomic_add(&d2(l_idxi(n) + Ni * (m + l_nrbf2 * (l_tj(n) - 1))), l_rbf(i2)); // Add the radial basis function to the corresponding descriptor
+    dd2(0 + i1) = l_rbfx(i2); // Add the derivative with respect to x to the corresponding descriptor derivative
+    dd2(1 + i1) = l_rbfy(i2); // Add the derivative with respect to y to the corresponding descriptor derivative
+    dd2(2 + i1) = l_rbfz(i2); // Add the derivative with respect to z to the corresponding descriptor derivative    
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, 
+        int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni) 
+{
+  int totalIterations = l_nrbf3 * Ni;
+  Kokkos::parallel_for("ThreeBodyDesc", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int m = idx % l_nrbf3;
+    int i = idx / l_nrbf3;        
+    int nmi = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3*i;
+    for (int p = 0; p < l_nabf3; p++) {
+      int n1 = l_pn3(p);
+      int n2 = l_pn3(p + 1);
+      int nn = n2 - n1;
+      int ipm = i + Ni * (p + l_nabf3 * m);
+      int k = 0;
+      for (int i1 = 0; i1 < l_nelements; i1++) {                  
+        for (int i2 = i1; i2 < l_nelements; i2++) {
+          double tmp=0;
+          for (int q = 0; q < nn; q++) {            
+            tmp += l_pc3(n1 + q) * l_sumU(i1 + l_nelements * (n1 + q) + nmi) * l_sumU(i2 + l_nelements * (n1 + q) + nmi);            
+          }
+          d3(ipm + totalIterations * l_nabf3 * k) = tmp;
+          k += 1;
+        }
+      }
+    }
+  });  
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::threebodydescderiv(t_pod_1d dd3, t_pod_1d l_rbf, t_pod_1d l_rbfx, 
+    t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, 
+    t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, int l_nelements, 
+    int l_nrbfmax, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij)
+{
+  int totalIterations = l_nrbf3 * Nij;
+  if (l_nelements==1) {
+    Kokkos::parallel_for("ThreeBodyDescDeriv1", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int j = idx / l_nrbf3;       // Calculate j using integer division
+      int m = idx % l_nrbf3;       // Calculate m using modulo operation
+      int idxR = j + Nij * m;  // Pre-compute the index for rbf
+      double rbfBase = l_rbf(idxR);
+      double rbfxBase = l_rbfx(idxR);
+      double rbfyBase = l_rbfy(idxR);
+      double rbfzBase = l_rbfz(idxR);
+
+      for (int p = 0; p < l_nabf3; p++) {
+        int n1 = l_pn3(p);        
+        int nn = l_pn3(p + 1) - n1;
+        int baseIdx = 3 * j + 3 * Nij * (p + l_nabf3 * m);  // Pre-compute the base index for dd3        
+        int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);
+        double tmp1 = 0;
+        double tmp2 = 0;
+        double tmp3 = 0;        
+        for (int q = 0; q < nn; q++) {                  
+          int idxNQ = n1 + q;  // Combine n1 and q into a single index for pc3 and sumU          
+          double f = 2.0 * l_pc3(idxNQ) * l_sumU(idxNQ + idxU);                             
+          int idxA = j + Nij*idxNQ;  // Pre-compute the index for abf          
+          double abfA = l_abf(idxA);  
+
+          // Use the pre-computed indices to update dd3
+          tmp1 += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA);
+          tmp2 += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA);
+          tmp3 += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA);          
+        }
+        dd3(baseIdx)     = tmp1;
+        dd3(baseIdx + 1) = tmp2;
+        dd3(baseIdx + 2) = tmp3;                          
+      }
+    });
+  }
+  else {
+    int N3 = 3 * Nij *  l_nabf3 * l_nrbf3;
+    Kokkos::parallel_for("ThreeBodyDescDeriv2", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int j = idx / l_nrbf3;  // Derive the original j value
+      int m = idx % l_nrbf3;  // Derive the original m value      
+      int i2 = l_tj(j) - 1;
+      int idxK = l_nelements * l_K3 * m + l_nelements*l_K3*l_nrbf3*l_idxi(j);      
+      int idxR = j + Nij * m;  // Pre-compute the index for rbf      
+      double rbfBase = l_rbf(idxR);
+      double rbfxBase = l_rbfx(idxR);
+      double rbfyBase = l_rbfy(idxR);
+      double rbfzBase = l_rbfz(idxR);
+      for (int p = 0; p < l_nabf3; p++) {
+        int n1 = l_pn3(p);
+        int nn = l_pn3(p + 1) - n1;
+        int jmp = 3 * j + 3 * Nij * (p + l_nabf3 * m);        
+        for (int i1 = 0; i1 < l_nelements; i1++) {          
+          int c3 = (i1 == i2) ? 2 : 1;
+          double tmp1 = 0;
+          double tmp2 = 0;
+          double tmp3 = 0;                            
+          for (int q = 0; q < nn; q++) {
+            int idxNQ = n1 + q;  // Combine n1 and q into a single index            
+            int idxA = j + Nij*idxNQ;  // Pre-compute the index for abf          
+            double abfA = l_abf(idxA);   
+            double f = c3*l_pc3(idxNQ) * l_sumU(i1 + l_nelements * idxNQ + idxK);                                    
+            tmp1 += f * (l_abfx(idxA) * rbfBase + rbfxBase * abfA);
+            tmp2 += f * (l_abfy(idxA) * rbfBase + rbfyBase * abfA);
+            tmp3 += f * (l_abfz(idxA) * rbfBase + rbfzBase * abfA);          
+          }          
+          int ii = jmp + N3 * l_elemindex(i2 + l_nelements * i1);         
+          dd3(0 + ii) = tmp1;
+          dd3(1 + ii) = tmp2;
+          dd3(2 + ii) = tmp3;                    
+        }
+      }
+    });    
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::fourbodydesc(t_pod_1d d4,  t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, 
+    t_pod_1i l_pc4, int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni)
+{
+  int totalIterations = l_nrbf4 * Ni;
+  Kokkos::parallel_for("fourbodydesc", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int m = idx % l_nrbf4;
+    int i = idx / l_nrbf4;            
+    int idxU = l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * i;
+    for (int p = 0; p < l_nabf4; p++) {
+      int n1 = l_pa4(p);
+      int n2 = l_pa4(p + 1);
+      int nn = n2 - n1;
+      int k = 0;
+      for (int i1 = 0; i1 < l_nelements; i1++) {                  
+        for (int i2 = i1; i2 < l_nelements; i2++) {                                
+          for (int i3 = i2; i3 < l_nelements; i3++) {     
+            double tmp = 0.0;
+            for (int q = 0; q < nn; q++) {         
+              int c = l_pc4(n1 + q);
+              int j1 = l_pb4(n1 + q);
+              int j2 = l_pb4(n1 + q + l_Q4);
+              int j3 = l_pb4(n1 + q + 2 * l_Q4);              
+              tmp += c * l_sumU(idxU + i1 + l_nelements * j1) * l_sumU(idxU + i2 + l_nelements * j2) * l_sumU(idxU + i3 + l_nelements * j3);
+            }
+            int kk = p + l_nabf4 * m + l_nabf4 * l_nrbf4 * k;              
+            d4(i + Ni * kk) = tmp;
+            k += 1;            
+          }
+        }
+      }
+    }
+  });  
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::fourbodydescderiv(t_pod_1d dd4, t_pod_1d l_rbf, t_pod_1d l_rbfx, 
+    t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, 
+    t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, 
+    int l_nelements, int l_nrbfmax, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij)
+{
+  int totalIterations = l_nrbf4 * Nij;
+  if (l_nelements==1) {
+    Kokkos::parallel_for("fourbodydescderiv1", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int j = idx / l_nrbf4;  // Derive the original j value
+      int m = idx % l_nrbf4;  // Derive the original m value      
+      int idxU = l_K3 * m + l_K3*l_nrbf3*l_idxi(j);      
+      int baseIdxJ = j + Nij * m;  // Pre-compute the index for rbf
+      double rbfBase = l_rbf(baseIdxJ);
+      double rbfxBase = l_rbfx(baseIdxJ);
+      double rbfyBase = l_rbfy(baseIdxJ);
+      double rbfzBase = l_rbfz(baseIdxJ);
+
+      for (int p = 0; p < l_nabf4; p++) {
+        int n1 = l_pa4(p);
+        int n2 = l_pa4(p + 1);
+        int nn = n2 - n1;
+        int kk = p + l_nabf4 * m;
+        int ii = 3 * Nij * kk;
+        int baseIdx = 3 * j + ii;
+        double tmp1 = 0;
+        double tmp2 = 0;
+        double tmp3 = 0;
+        for (int q = 0; q < nn; q++) {
+          int idxNQ = n1 + q;  // Combine n1 and q into a single index
+          int c = l_pc4(idxNQ);
+          int j1 = l_pb4(idxNQ);
+          int j2 = l_pb4(idxNQ + l_Q4);
+          int j3 = l_pb4(idxNQ + 2 * l_Q4);
+          double c1 = l_sumU(idxU + j1);
+          double c2 = l_sumU(idxU + j2);
+          double c3 = l_sumU(idxU + j3);                    
+          double t12 = c * c1 * c2;          
+          double t13 = c * c1 * c3;
+          double t23 = c * c2 * c3;
+          
+          // Pre-calculate commonly used indices          
+          int baseIdxJ3 = j + Nij * j3; // Common index for j3 terms
+          int baseIdxJ2 = j + Nij * j2; // Common index for j2 terms
+          int baseIdxJ1 = j + Nij * j1; // Common index for j1 terms
+                    
+          // Temporary variables to store repeated calculations
+          double abfBaseJ1 = l_abf(baseIdxJ1);
+          double abfBaseJ2 = l_abf(baseIdxJ2);
+          double abfBaseJ3 = l_abf(baseIdxJ3);
+          // Update dd4 using pre-computed indices
+          tmp1 += t12 * (l_abfx(baseIdxJ3) * rbfBase + rbfxBase * abfBaseJ3)
+                            + t13 * (l_abfx(baseIdxJ2) * rbfBase + rbfxBase * abfBaseJ2)
+                            + t23 * (l_abfx(baseIdxJ1) * rbfBase + rbfxBase * abfBaseJ1);
+          tmp2 += t12 * (l_abfy(baseIdxJ3) * rbfBase + rbfyBase * abfBaseJ3)
+                            + t13 * (l_abfy(baseIdxJ2) * rbfBase + rbfyBase * abfBaseJ2)
+                            + t23 * (l_abfy(baseIdxJ1) * rbfBase + rbfyBase * abfBaseJ1);
+          tmp3 += t12 * (l_abfz(baseIdxJ3) * rbfBase + rbfzBase * abfBaseJ3)
+                            + t13 * (l_abfz(baseIdxJ2) * rbfBase + rbfzBase * abfBaseJ2)
+                            + t23 * (l_abfz(baseIdxJ1) * rbfBase + rbfzBase * abfBaseJ1);
+        }
+        dd4(baseIdx)     = tmp1;
+        dd4(baseIdx + 1) = tmp2;
+        dd4(baseIdx + 2) = tmp3;                                  
+      }
+    });
+  }
+  else {        
+    int N3 = 3*Nij * l_nabf4 * l_nrbf4;
+    Kokkos::parallel_for("fourbodydescderiv2", totalIterations, KOKKOS_LAMBDA(int idx) {
+      int j = idx / l_nrbf4;  // Derive the original j value
+      int m = idx % l_nrbf4;  // Derive the original m value          
+      int idxM = j + Nij * m;
+      double rbfM = l_rbf(idxM);
+      double rbfxM = l_rbfx(idxM);
+      double rbfyM = l_rbfy(idxM);
+      double rbfzM = l_rbfz(idxM);
+      int typej = l_tj(j) - 1;      
+      for (int p = 0; p < l_nabf4; p++)  {
+        int n1 = l_pa4(p);
+        int n2 = l_pa4(p + 1);
+        int nn = n2 - n1;
+        int jpm = 3 * j + 3 * Nij * (p + l_nabf4 * m);
+        int k = 0;          
+        for (int i1 = 0; i1 < l_nelements; i1++) {                                 
+          for (int i2 = i1; i2 < l_nelements; i2++) {                          
+            for (int i3 = i2; i3 < l_nelements; i3++) {       
+              double tmp1 = 0;
+              double tmp2 = 0;
+              double tmp3 = 0;                                          
+              for (int q = 0; q < nn; q++) {  
+                int c = l_pc4(n1 + q);
+                int j1 = l_pb4(n1 + q);
+                int j2 = l_pb4(n1 + q + l_Q4);
+                int j3 = l_pb4(n1 + q + 2 * l_Q4);
+                
+                int idx1 = i1 + l_nelements * j1 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j);
+                int idx2 = i2 + l_nelements * j2 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j);
+                int idx3 = i3 + l_nelements * j3 + l_nelements * l_K3 * m + l_nelements * l_K3 * l_nrbf3 * l_idxi(j);                
+                double c1 = l_sumU(idx1);
+                double c2 = l_sumU(idx2 );
+                double c3 = l_sumU(idx3);     
+                double t12 = c*(c1 * c2);                  
+                double t13 = c*(c1 * c3);
+                double t23 = c*(c2 * c3);                
+                                
+                int idxJ3 = j + Nij * j3;
+                int idxJ2 = j + Nij * j2;
+                int idxJ1 = j + Nij * j1;                          
+                double abfJ1 = l_abf(idxJ1);
+                double abfJ2 = l_abf(idxJ2);
+                double abfJ3 = l_abf(idxJ3);
+                double abfxJ1 = l_abfx(idxJ1);
+                double abfxJ2 = l_abfx(idxJ2);
+                double abfxJ3 = l_abfx(idxJ3);
+                double abfyJ1 = l_abfy(idxJ1);
+                double abfyJ2 = l_abfy(idxJ2);
+                double abfyJ3 = l_abfy(idxJ3);
+                double abfzJ1 = l_abfz(idxJ1);
+                double abfzJ2 = l_abfz(idxJ2);
+                double abfzJ3 = l_abfz(idxJ3);
+                
+                // Compute contributions for each condition
+                if (typej == i3) {
+                    tmp1 += t12 * (abfxJ3 * rbfM + rbfxM * abfJ3);
+                    tmp2 += t12 * (abfyJ3 * rbfM + rbfyM * abfJ3);
+                    tmp3 += t12 * (abfzJ3 * rbfM + rbfzM * abfJ3);
+                }
+                if (typej == i2) {
+                    tmp1 += t13 * (abfxJ2 * rbfM + rbfxM * abfJ2);
+                    tmp2 += t13 * (abfyJ2 * rbfM + rbfyM * abfJ2);
+                    tmp3 += t13 * (abfzJ2 * rbfM + rbfzM * abfJ2);
+                }
+                if (typej == i1) {
+                    tmp1 += t23 * (abfxJ1 * rbfM + rbfxM * abfJ1);
+                    tmp2 += t23 * (abfyJ1 * rbfM + rbfyM * abfJ1);
+                    tmp3 += t23 * (abfzJ1 * rbfM + rbfzM * abfJ1);
+                }                
+              }
+              int baseIdx = jpm + N3 * k;
+              dd4(0 + baseIdx) = tmp1;
+              dd4(1 + baseIdx) = tmp2;
+              dd4(2 + baseIdx) = tmp3;              
+              k += 1;
+            }
+          }
+        }
+      }
+    });    
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::fourbodydesc23(t_pod_1d d23, t_pod_1d d2, t_pod_1d d3, t_pod_1i l_ind23,
+    t_pod_1i l_ind32, int l_n23, int l_n32, int Ni)
+{
+  int totalIterations = l_n32 * l_n23 * Ni;
+  Kokkos::parallel_for("fourbodydesc23", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int n = idx % Ni;
+    int temp = idx / Ni;
+    int i = temp % l_n23;
+    int j = temp / l_n23;
+
+    int indexDst = n + Ni * i + Ni * l_n23 * j;
+    int indexSrc2 = n + Ni * l_ind23(i);
+    int indexSrc3 = n + Ni * l_ind32(j);
+    d23(indexDst) = d2(indexSrc2) * d3(indexSrc3);
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::fourbodydescderiv23(t_pod_1d dd23, t_pod_1d d2, t_pod_1d d3, t_pod_1d dd2, 
+    t_pod_1d dd3,  t_pod_1i l_idxi, t_pod_1i l_ind23, t_pod_1i l_ind32, int l_n23, int l_n32, int Ni, int N)
+{
+  int totalIterations = l_n32 * l_n23 * Ni;
+  Kokkos::parallel_for("fourbodydescderiv23", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int n = idx % N;
+    int temp = idx / N;
+    int i = temp % l_n23;
+    int j = temp / l_n23;
+
+    int k = 3 * (n + N * i + N * l_n23 * j);        
+    int k1 = 3 * n + 3 * N * l_ind23(i);
+    int k2 = 3 * n + 3 * N * l_ind32(i);
+    int m1 = l_idxi(n) + Ni * l_ind23(i);
+    int m2 = l_idxi(n) + Ni * l_ind32(i);
+    dd23(0 + k) = d2(m1) * dd3(0 + k2) + dd2(0 + k1) * d3(m2);
+    dd23(1 + k) = d2(m1) * dd3(1 + k2) + dd2(1 + k1) * d3(m2);
+    dd23(2 + k) = d2(m1) * dd3(2 + k2) + dd2(2 + k1) * d3(m2);
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni)
+{
+  int totalIterations = n12 * Ni;
+  Kokkos::parallel_for("crossdesc", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int n = idx % Ni;
+    int i = idx / Ni;
+
+    d12(n + Ni * i) = d1(n + Ni * ind1(i)) * d2(n + Ni * ind2(i));
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::crossdescderiv(t_pod_1d dd12, t_pod_1d d1, t_pod_1d d2, t_pod_1d dd1, t_pod_1d dd2,
+        t_pod_1i l_idxi, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni, int Nij)
+{        
+  int totalIterations = 3*n12*Nij;
+  Kokkos::parallel_for("crossdescderiv", totalIterations, KOKKOS_LAMBDA(int idx) {
+    int d = idx % 3;    
+    int tmp = idx / 3;    
+    int n = tmp % Nij;
+    int i = tmp / Nij;
+    int k1 = d + 3 * n + 3 * Nij * ind1(i);
+    int k2 = d + 3 * n + 3 * Nij * ind2(i);
+    dd12(idx) = d1(l_idxi(n) + Ni * ind1(i)) * dd2(k2) + dd1(k1) * d2(l_idxi(n) + Ni * ind2(i));    
+  });    
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::set_array_to_zero(t_pod_1d a, int N)
+{
+  Kokkos::parallel_for("initialize_array", N, KOKKOS_LAMBDA(int i) {
+    a(i) = 0.0;
+  });  
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::blockatom_base_descriptors(t_pod_1d bd, t_pod_1d bdd, int Ni, int Nij)
+{  
+  auto begin = std::chrono::high_resolution_clock::now();           
+  auto end = std::chrono::high_resolution_clock::now();       
+     
+  auto d2 = Kokkos::subview(bd, std::make_pair(0, Ni * nl2));
+  auto d3 = Kokkos::subview(bd, std::make_pair(Ni * nl2, Ni * (nl2 + nl3)));
+  auto d4 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3), Ni * (nl2 + nl3 + nl4)));
+  auto d23 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4), Ni * (nl2 + nl3 + nl4 + nl23)));
+  auto d33 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23), Ni * (nl2 + nl3 + nl4 + nl23 + nl33)));
+  auto d34 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23 + nl33), Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34)));
+  auto d44 = Kokkos::subview(bd, std::make_pair(Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34), Ni * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44)));
+  auto dd2 = Kokkos::subview(bdd, std::make_pair(0, 3 * Nij * nl2));
+  auto dd3 = Kokkos::subview(bdd, std::make_pair(3 * Nij * nl2, 3 * Nij * (nl2 + nl3)));
+  auto dd4 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3), 3 * Nij * (nl2 + nl3 + nl4)));
+  auto dd23 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4), 3 * Nij * (nl2 + nl3 + nl4 + nl23)));
+  auto dd33 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33)));
+  auto dd34 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34)));
+  auto dd44 = Kokkos::subview(bdd, std::make_pair(3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34), 3 * Nij * (nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44)));    
+  
+  begin = std::chrono::high_resolution_clock::now();           
+  radialbasis(abf, abfx, abfy, abfz, rij, besselparams, rin, rmax, 
+        besseldegree, inversedegree, nbesselpars, ns, Nij);
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[10] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;              
+  
+  begin = std::chrono::high_resolution_clock::now();             
+  matrixMultiply(abf,  Phi, rbf, Nij, ns,  nrbfmax); 
+  matrixMultiply(abfx, Phi, rbfx, Nij, ns,  nrbfmax); 
+  matrixMultiply(abfy, Phi, rbfy, Nij, ns,  nrbfmax); 
+  matrixMultiply(abfz, Phi, rbfz, Nij, ns,  nrbfmax);
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[11] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                          
+
+  begin = std::chrono::high_resolution_clock::now();       
+  set_array_to_zero(d2, Ni*nl2);
+  set_array_to_zero(dd2, 3*Nij*nl2);
+  twobodydescderiv(d2, dd2, rbf, rbfx, rbfy, rbfz, idxi, tj, nrbfmax, nrbf2, Ni, Nij);    
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[12] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                  
+  
+  if ((nl3 > 0) && (Nij>1)) {      
+    begin = std::chrono::high_resolution_clock::now();   
+    angularbasis(abf, abfx, abfy, abfz, rij, pq3, K3, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();       
+    comptime[13] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                  
+
+    begin = std::chrono::high_resolution_clock::now();  
+    set_array_to_zero(sumU, nelements * nrbf3 * K3 * Ni);    
+    radialangularsum(sumU, rbf, abf, tj, numij, nelements, nrbf3, K3, Ni, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[14] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                  
+
+    begin = std::chrono::high_resolution_clock::now();   
+    //set_array_to_zero(d3, Ni*nl3);
+    threebodydesc(d3, sumU, pc3, pn3, nelements, nrbf3, nabf3, K3, Ni);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();       
+    comptime[15] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                  
+
+    begin = std::chrono::high_resolution_clock::now();   
+    set_array_to_zero(dd3, 3*Nij*nl3);
+    threebodydescderiv(dd3, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, 
+          idxi, tj, pc3, pn3, elemindex, nelements, nrbfmax, nrbf3, nabf3, K3, Ni, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[16] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                  
+  }
+  
+  if ((nl4 > 0) && (Nij>2)) {
+    begin = std::chrono::high_resolution_clock::now();   
+    //set_array_to_zero(d4, Ni*nl4);           
+    fourbodydesc(d4, sumU, pa4, pb4, pc4, nelements, nrbf3, nrbf4, nabf4, K3, Q4, Ni);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[17] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+
+    begin = std::chrono::high_resolution_clock::now();   
+    //set_array_to_zero(dd4, 3*Nij*nl4);
+    fourbodydescderiv(dd4, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz, sumU, idxi, tj, 
+      pa4, pb4, pc4, elemindex, nelements, nrbfmax, nrbf3, nrbf4, nabf4, K3, Q4, Ni, Nij);        
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[18] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                                
+  }
+  
+  if ((nl23>0) && (Nij>2)) {
+    fourbodydesc23(d23, d2, d3, ind23, ind32, n23, n32, Ni);
+    fourbodydescderiv23(dd23, d2, d3, dd2, dd3, idxi, ind23, ind32, n23, n32, Ni, Nij);
+  }
+
+  if ((nl33>0) && (Nij>3)) {
+    begin = std::chrono::high_resolution_clock::now();   
+    crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni);
+    crossdescderiv(dd33, d3, d3, dd3, dd3, idxi, ind33l, ind33r, nl33, Ni, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[19] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+  }  
+  
+  if ((nl34>0) && (Nij>4)) {
+    begin = std::chrono::high_resolution_clock::now();   
+    crossdesc(d34, d3, d4, ind34l, ind34r, nl34, Ni);
+    crossdescderiv(dd34, d3, d4, dd3, dd4, idxi, ind34l, ind34r, nl34, Ni, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[20] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                                
+  }
+
+  if ((nl44>0) && (Nij>5)) {
+    begin = std::chrono::high_resolution_clock::now();   
+    crossdesc(d44, d4, d4, ind44l, ind44r, nl44, Ni);
+    crossdescderiv(dd44, d4, d4, dd4, dd4, idxi, ind44l, ind44r, nl44, Ni, Nij);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[21] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+  }    
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::blockatomenv_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni)
+{
+  auto P = abf;
+  auto cp = abfx;  
+  auto pca = abfy; // Ni*nComponents 
+  auto D = abfz;   // Ni*nClusters
+  auto sumD = rbf; // Ni
+  
+  auto proj = Proj;
+  auto cent = Centroids;
+  auto cefs = coefficients;  
+  auto tyai = typeai;  
+  
+  int nCom = nComponents;
+  int nCls = nClusters;
+  int nDes = Mdesc;
+  int nCoeff = nCoeffPerElement;
+  
+  int totalIterations = Ni*nCom;
+  Kokkos::parallel_for("pca", totalIterations, KOKKOS_LAMBDA(int idx) {  
+    int i = idx % Ni;
+    int k = idx / Ni;         
+    double sum = 0.0;
+    int typei = tyai[i]-1;
+    for (int m = 0; m < nDes; m++) {
+      sum += proj[k + nCom*m + nCom*nDes*typei] * B[i + Ni*m];
+    }
+    pca[i + Ni*k] = sum;    
+  });
+  
+  totalIterations = Ni*nCls;
+  Kokkos::parallel_for("inverse_square_distances", totalIterations, KOKKOS_LAMBDA(int idx) {    
+    int i = idx % Ni;
+    int j = idx / Ni;         
+    int typei = tyai[i]-1;
+    double sum = 1e-20; 
+    for (int k = 0; k < nCom; k++) {
+      double c = cent[k + j * nCom + nCls*nCom*typei];
+      double p = pca[i + Ni*k];
+      sum += (p - c) * (p - c);
+    }
+    D[i + Ni*j] = 1.0 / sum;
+  });
+  
+  Kokkos::parallel_for("Probabilities", Ni, KOKKOS_LAMBDA(int i) {    
+    double sum = 0; 
+    for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];    
+    sumD[i] = sum;
+    for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;    
+  });
+  
+  Kokkos::parallel_for("atomic_energies", Ni, KOKKOS_LAMBDA(int n) {    
+    int nc = nCoeff*(tyai[n]-1);
+    ei[n] = cefs[0 + nc];
+    for (int k = 0; k<nCls; k++)
+      for (int m=0; m<nDes; m++)     
+        ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
+  });    
+  
+  Kokkos::parallel_for("env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {    
+    int n = idx % Ni;
+    int k = idx / Ni;
+    int nc = nCoeff*(tyai[n]-1);            
+    double sum = 0;
+    for (int m = 0; m<nDes; m++)     
+      sum += cefs[1 + m + k*nDes + nc]*B[n + Ni*m];
+    cp[n + Ni*k] = sum;
+  });            
+
+  totalIterations = Ni*nDes;
+  Kokkos::parallel_for("base_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {    
+    int n = idx % Ni;
+    int m = idx / Ni;
+    int nc = nCoeff*(tyai[n]-1);            
+    double sum = 0.0;    
+    for (int k = 0; k<nCls; k++)    
+      sum += cefs[1 + m + k*nDes + nc]*P[n + Ni*k];      
+    cb[n + Ni*m] = sum;
+  });              
+  
+  Kokkos::parallel_for("base_env_coefficients", totalIterations, KOKKOS_LAMBDA(int idx) {    
+    int i = idx % Ni;
+    int m = idx / Ni;
+    int typei = tyai[i]-1;
+    double S1 = 1/sumD[i];
+    double S2 = sumD[i]*sumD[i];   
+    double sum = 0.0;
+    for (int j=0; j<nCls; j++) {
+      double dP_dB = 0.0;
+      for (int k = 0; k < nCls; k++) {
+        double dP_dD = -D[i + Ni*j] / S2;
+        if (k==j) dP_dD += S1;
+        double dD_dB = 0.0;
+        double D2 = 2 * D[i + Ni*k] * D[i + Ni*k];
+        for (int n = 0; n < nCom; n++) {
+          double dD_dpca = D2 * (cent[n + k * nCom + nCls*nCom*typei] - pca[i + Ni*n]);        
+          dD_dB += dD_dpca * proj[n + m * nCom + nCom*nDes*typei];
+        }                
+        dP_dB += dP_dD * dD_dB;
+      }      
+      sum += cp[i + Ni*j]*dP_dB;      
+    }
+    cb[i + Ni*m] += sum;
+  });  
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::blockatomenergyforce(int Ni, int Nij)
+{  
+  auto begin = std::chrono::high_resolution_clock::now();           
+  auto end = std::chrono::high_resolution_clock::now();       
+  
+  // calculate base descriptors and their derivatives with respect to atom coordinates
+  begin = std::chrono::high_resolution_clock::now();  
+  blockatom_base_descriptors(bd, bdd, Ni, Nij);  
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[22] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+  
+  // local shadow copies of member variables
+  auto l_coefficients = coefficients;
+  auto l_typeai = typeai;
+  auto l_ei = ei;
+  auto l_fij = fij;
+  auto l_bd = bd;
+  auto l_bdd = bdd;  
+  auto l_nCoeffPerElement = nCoeffPerElement;
+  auto l_Mdesc = Mdesc;
+  auto l_ti = ti; 
+  auto l_idxi = idxi; 
+
+  if (nClusters > 1) {    
+    begin = std::chrono::high_resolution_clock::now();       
+    auto cb = sumU;
+    blockatomenv_descriptors(ei, cb, bd, Ni);
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[25] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+    
+    begin = std::chrono::high_resolution_clock::now(); 
+    int N3 = 3*Nij;
+    Kokkos::parallel_for("compute_fij", N3, KOKKOS_LAMBDA(const int idx) {
+      int n = idx / 3; 
+      int i = l_idxi[n];
+      double f0 = 0.0;
+      for (int m = 0; m < l_Mdesc; m++) {      
+        f0 += cb(i + Ni*m) * l_bdd(idx + N3*m);
+      }
+      l_fij(idx) = f0;
+    });        
+    Kokkos::fence();
+    end = std::chrono::high_resolution_clock::now();   
+    comptime[26] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+    
+    return;
+  }
+  
+  begin = std::chrono::high_resolution_clock::now();   
+  Kokkos::parallel_for("compute_ei", Ni, KOKKOS_LAMBDA(const int n) {
+    int nc = l_nCoeffPerElement * (l_typeai(n) - 1);
+    double sum = l_coefficients(0 + nc);
+    for (int m = 0; m < l_Mdesc; ++m) {
+      sum += l_coefficients(1 + m + nc) * l_bd(n + Ni * m);
+    }
+    l_ei(n) = sum;
+  });
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[23] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                        
+
+  begin = std::chrono::high_resolution_clock::now();   
+  int N3 = 3*Nij;
+  Kokkos::parallel_for("compute_fij", N3, KOKKOS_LAMBDA(const int idx) {
+    int n = idx / 3; 
+    int nc = l_nCoeffPerElement * (l_ti(n) - 1);  // Assuming ti is a 1-D Kokkos::View    
+    double f0 = 0.0;
+    for (int m = 0; m < l_Mdesc; m++) {      
+      f0 += l_coefficients(1 + m + nc) * l_bdd(idx + N3*m);
+    }
+    l_fij(idx) = f0;
+  });
+  Kokkos::fence();
+  end = std::chrono::high_resolution_clock::now();   
+  comptime[24] += std::chrono::duration_cast<std::chrono::nanoseconds>(end-begin).count()/1e6;                          
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::tallyforce(int Nij) {
+  auto l_f = f;
+  auto l_fij = fij;
+  auto l_ai = ai;
+  auto l_aj = aj;
+  Kokkos::parallel_for("TallyForce", Nij, KOKKOS_LAMBDA(int n) {
+    int im = l_ai(n);
+    int jm = l_aj(n);
+    int n3 = 3*n;
+    double fx = l_fij(n3 + 0);
+    double fy = l_fij(n3 + 1);
+    double fz = l_fij(n3 + 2);
+    Kokkos::atomic_add(&l_f(im, 0), fx);
+    Kokkos::atomic_add(&l_f(im, 1), fy);
+    Kokkos::atomic_add(&l_f(im, 2), fz);
+    Kokkos::atomic_sub(&l_f(jm, 0), fx);
+    Kokkos::atomic_sub(&l_f(jm, 1), fy);
+    Kokkos::atomic_sub(&l_f(jm, 2), fz);
+  });
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::tallyenergy(int istart, int Ni) 
+{
+    auto l_ei = ei;
+  auto l_eatom = d_eatom;
+
+  // For global energy tally
+  if (eflag_global) {
+    double local_eng_vdwl = 0.0;
+    Kokkos::parallel_reduce("GlobalEnergyTally", Ni, KOKKOS_LAMBDA(int k, E_FLOAT& update) {
+        update += l_ei(k);
+      }, local_eng_vdwl);
+
+    // Update global energy on the host after the parallel region
+    eng_vdwl += local_eng_vdwl;
+  }
+
+  // For per-atom energy tally
+  if (eflag_atom) {
+    Kokkos::parallel_for("PerAtomEnergyTally", Ni, KOKKOS_LAMBDA(int k) {
+        l_eatom(istart + k) += l_ei(k);
+      });
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::tallystress(int Nij) 
+{  
+  auto l_fij = fij;
+  auto l_rij = rij;
+  auto l_ai = ai;
+  auto l_aj = aj;
+  auto l_vatom = d_vatom;
+
+  if (vflag_global) {
+    for (int j=0; j<3; j++) {
+      F_FLOAT sum = 0.0;  
+      Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {          
+          int k3 = 3*k;
+          update += l_rij(j + k3) * l_fij(j + k3);
+        }, sum);
+      virial[j] -= sum;    
+    }
+
+    F_FLOAT sum = 0.0;  
+    Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
+        int k3 = 3*k;
+        update += l_rij(k3) * l_fij(1 + k3);
+      }, sum);
+    virial[3] -= sum;    
+    
+    sum = 0.0;  
+    Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
+        int k3 = 3*k;
+        update += l_rij(k3) * l_fij(2 + k3);
+      }, sum);
+    virial[4] -= sum;    
+    
+    sum = 0.0;  
+    Kokkos::parallel_reduce("GlobalStressTally", Nij, KOKKOS_LAMBDA(int k, F_FLOAT& update) {
+        int k3 = 3*k;
+        update += l_rij(1+k3) * l_fij(2+k3);
+      }, sum);
+    virial[5] -= sum;    
+  }
+
+  if (vflag_atom) {
+    Kokkos::parallel_for("PerAtomStressTally", Nij, KOKKOS_LAMBDA(int k) {
+        int i = l_ai(k);
+        int j = l_aj(k);
+        int k3 = 3*k;
+        double v_local[6];
+        v_local[0] = -l_rij(k3) * l_fij(k3 + 0);
+        v_local[1] = -l_rij(k3 + 1) * l_fij(k3 + 1);
+        v_local[2] = -l_rij(k3 + 2) * l_fij(k3 + 2);
+        v_local[3] = -l_rij(k3 + 0) * l_fij(k3 + 1);
+        v_local[4] = -l_rij(k3 + 0) * l_fij(k3 + 2);
+        v_local[5] = -l_rij(k3 + 1) * l_fij(k3 + 2);
+        
+        for (int d = 0; d < 6; ++d) {
+          Kokkos::atomic_add(&l_vatom(i, d), 0.5 * v_local[d]);
+        }
+
+        for (int d = 0; d < 6; ++d) {
+          Kokkos::atomic_add(&l_vatom(j, d), 0.5 * v_local[d]);
+        }
+        
+      });
+  }
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols)
+{
+  auto A = Kokkos::create_mirror_view(d_A);  
+  Kokkos::deep_copy(A, d_A);          
+  
+  FILE *fp = fopen(filename.c_str(), "wb");
+  double sz[2];
+  sz[0] = (double) nrows;
+  sz[1] = (double) ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A.data() ), sizeof(double) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols)
+{
+  auto A = Kokkos::create_mirror_view(d_A);  
+  Kokkos::deep_copy(A, d_A);          
+  
+  FILE *fp = fopen(filename.c_str(), "wb");
+  int sz[2];
+  sz[0] = nrows;
+  sz[1] = ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(int) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A.data() ), sizeof(int) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+template<class DeviceType>
+void PairPODKokkos<DeviceType>::savedatafordebugging()
+{
+  saveintmatrix2binfile("podkktypeai.bin", typeai, ni, 1);  
+  saveintmatrix2binfile("podkknumij.bin", numij, ni+1, 1);  
+  saveintmatrix2binfile("podkkai.bin", ai, nij, 1);  
+  saveintmatrix2binfile("podkkaj.bin", aj, nij, 1);  
+  saveintmatrix2binfile("podkkti.bin", ti, nij, 1);  
+  saveintmatrix2binfile("podkktj.bin", tj, nij, 1);  
+  saveintmatrix2binfile("podkkidxi.bin", idxi, nij, 1);     
+  savematrix2binfile("podkkrbf.bin", rbf, nrbfmax, nij);
+  savematrix2binfile("podkkrbfx.bin", rbfx, nrbfmax, nij);
+  savematrix2binfile("podkkrbfy.bin", rbfy, nrbfmax, nij);
+  savematrix2binfile("podkkrbfz.bin", rbfz, nrbfmax, nij);      
+  int kmax = (K3 > ns) ? K3 : ns;
+  savematrix2binfile("podkkabf.bin", abf,   kmax, nij);
+  savematrix2binfile("podkkabfx.bin", abfx, kmax, nij);
+  savematrix2binfile("podkkabfy.bin", abfy, kmax, nij);
+  savematrix2binfile("podkkabfz.bin", abfz, kmax, nij);            
+  savematrix2binfile("podkkbdd.bin", bdd, 3*nij, Mdesc);      
+  savematrix2binfile("podkkbd.bin", bd, ni, Mdesc);      
+  savematrix2binfile("podkksumU.bin", sumU, nelements * K3 * nrbfmax, ni);      
+  savematrix2binfile("podkkrij.bin", rij, 3, nij);
+  savematrix2binfile("podkkfij.bin", fij, 3, nij);
+  savematrix2binfile("podkkei.bin", ei, ni, 1);      
+  
+  error->all(FLERR, "Save data and stop the run for debugging");
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairPODKokkos<DeviceType>::memory_usage()
+{
+  double bytes = 0;
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class PairPODKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairPODKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/pair_pod_kokkos.h b/src/KOKKOS/pair_pod_kokkos.h
new file mode 100644
index 00000000000..944f6c2a9f6
--- /dev/null
+++ b/src/KOKKOS/pair_pod_kokkos.h
@@ -0,0 +1,231 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(pod/kk,PairPODKokkos<LMPDeviceType>);
+PairStyle(pod/kk/device,PairPODKokkos<LMPDeviceType>);
+PairStyle(pod/kk/host,PairPODKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_POD_KOKKOS_H
+#define LMP_PAIR_POD_KOKKOS_H
+
+#include "eapod.h"
+#include "pair_pod.h"
+#include "kokkos_type.h"
+#include "pair_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairPODKokkos : public PairPOD {
+ public:   
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  PairPODKokkos(class LAMMPS *);
+  ~PairPODKokkos() override;
+
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  
+ //protected:
+  int inum, maxneigh;
+  int host_flag;
+
+  int eflag, vflag;
+  int neighflag;
+
+  typename AT::t_neighbors_2d d_neighbors;
+  typename AT::t_int_1d d_ilist;
+  typename AT::t_int_1d d_numneigh;  
+//   typename AT::t_int_1d_randomread d_ilist;
+//   typename AT::t_int_1d_randomread d_numneigh;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+
+  typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
+  tdual_fparams k_cutsq, k_scale;
+  typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
+  t_fparams d_cutsq, d_scale;
+  typename AT::t_int_1d d_map;
+
+  friend void pair_virial_fdotr_compute<PairPODKokkos>(PairPODKokkos*);
+
+  void grow(int, int);  
+  void copy_from_pod_class(EAPOD *podptr);
+  void divideInterval(int *intervals, int N, int M);
+  int calculateNumberOfIntervals(int N, int intervalSize);     
+  void grow_atoms(int Ni);
+  void grow_pairs(int Nij);
+   
+  void allocate() override;
+  double memory_usage() override;
+
+  typedef Kokkos::View<int*, DeviceType> t_pod_1i;
+  typedef Kokkos::View<int**, DeviceType> t_pod_2i;
+  typedef Kokkos::View<double*, DeviceType> t_pod_1d;
+  typedef Kokkos::View<double**, DeviceType> t_pod_2d;
+  typedef Kokkos::View<double**[3], DeviceType> t_pod_3d3;
+  
+    
+  int atomBlockSize;        // size of each atom block
+  int nAtomBlocks;          // number of atoms blocks
+  int atomBlocks[101];      // atom blocks
+  double comptime[100];
+  int timing;
+  
+  int ni;            // number of atoms i in the current atom block 
+  int nij;           // number of pairs (i,j) in the current atom block 
+  int nimax;         // maximum number of atoms i
+  int nijmax;        // maximum number of pairs (i,j) 
+  
+  int nelements; // number of elements 
+  int onebody;   // one-body descriptors
+  int besseldegree; // degree of Bessel functions
+  int inversedegree; // degree of inverse functions
+  int nbesselpars;  // number of Bessel parameters
+  int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters)
+  int ns;      // number of snapshots for radial basis functions
+  int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, n23, n32, nl;   // number of local descriptors
+  int nrbf2, nrbf3, nrbf4, nrbfmax;            // number of radial basis functions
+  int nabf3, nabf4;                            // number of angular basis functions  
+  int K3, K4, Q4;                                  // number of monomials
+    
+  // environmental variables
+  int nClusters; // number of environment clusters
+  int nComponents; // number of principal components
+  int Mdesc; // number of base descriptors 
+
+  double rin;  // inner cut-off radius
+  double rcut; // outer cut-off radius
+  double rmax; // rcut - rin  
+  double rcutsq;
+  
+  t_pod_1d rij;         // (xj - xi) for all pairs (I, J)
+  t_pod_1d fij;         // force for all pairs (I, J)
+  t_pod_1d ei;          // energy for each atom I
+  t_pod_1i typeai;         // types of atoms I only
+  t_pod_1i numij;          // number of pairs (I, J) for each atom I   
+  t_pod_1i idxi;           // storing linear indices of atom I for all pairs (I, J)
+  t_pod_1i ai;             // IDs of atoms I for all pairs (I, J)
+  t_pod_1i aj;             // IDs of atoms J for all pairs (I, J)
+  t_pod_1i ti;             // types of atoms I for all pairs (I, J)
+  t_pod_1i tj;             // types of atoms J for all pairs (I, J)  
+
+  t_pod_1d besselparams;
+  t_pod_1d Phi;  // eigenvectors matrix ns x ns
+  t_pod_1d rbf;  // radial basis functions nij x nrbfmax  
+  t_pod_1d rbfx; // x-derivatives of radial basis functions nij x nrbfmax 
+  t_pod_1d rbfy; // y-derivatives of radial basis functions nij x nrbfmax
+  t_pod_1d rbfz; // z-derivatives of radial basis functions nij x nrbfmax   
+  t_pod_1d abf;  // angular basis functions nij x K3
+  t_pod_1d abfx; // x-derivatives of angular basis functions nij x K3
+  t_pod_1d abfy; // y-derivatives of angular basis functions nij x K3  
+  t_pod_1d abfz; // z-derivatives of angular basis functions nij x K3
+  t_pod_1d sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements
+  t_pod_1d Proj; // PCA Projection matrix
+  t_pod_1d Centroids; // centroids of the clusters  
+  t_pod_1d bd;   // base descriptors ni x Mdesc
+  t_pod_1d bdd;  // base descriptors derivatives 3 x nij x Mdesc 
+  t_pod_1d coefficients; // coefficients nCoeffPerElement x nelements
+  t_pod_1i pq3, pn3, pc3; // arrays to compute 3-body angular basis functions
+  t_pod_1i pa4, pb4, pc4; // arrays to compute 4-body angular basis functions  
+  t_pod_1i ind23; // n23 
+  t_pod_1i ind32; // n32
+  t_pod_1i ind33l, ind33r; // nl33
+  t_pod_1i ind34l, ind34r; // nl34
+  t_pod_1i ind44l, ind44r; // nl44
+  t_pod_1i elemindex;  
+  
+  void set_array_to_zero(t_pod_1d a, int N);
+  
+  int NeighborCount(t_pod_1i, double, int, int);
+  int NeighborCount(t_pod_1i, int);
+    
+  void NeighborList(t_pod_1d l_rij, t_pod_1i l_numij,  t_pod_1i l_typeai, t_pod_1i l_idxi, 
+    t_pod_1i l_ai, t_pod_1i l_aj, t_pod_1i l_ti, t_pod_1i l_tj, double l_rcutsq, int gi1, int Ni);
+   
+  void radialbasis(t_pod_1d rbft, t_pod_1d rbftx, t_pod_1d rbfty, t_pod_1d rbftz, 
+    t_pod_1d rij, t_pod_1d l_besselparams, double l_rin, double l_rmax, int l_besseldegree, 
+    int l_inversedegree, int l_nbesselpars, int l_ns,  int Nij); 
+      
+  void matrixMultiply(t_pod_1d a, t_pod_1d b, t_pod_1d c, int r1, int c1, int c2); 
+  
+  void angularbasis(t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz,
+        t_pod_1d l_rij, t_pod_1i l_pq3, int l_K3, int N);  
+   
+  void radialangularsum(t_pod_1d l_sumU, t_pod_1d l_rbf, t_pod_1d l_abf, t_pod_1i l_tj, 
+    t_pod_1i l_numij, const int l_nelements, const int l_nrbf3, const int l_K3, const int Ni, const int Nij);
+  
+  void twobodydescderiv(t_pod_1d d2, t_pod_1d dd2, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, 
+    t_pod_1d l_rbfz, t_pod_1i l_idxi, t_pod_1i l_tj, int l_nrbfmax, int l_nrbf2, const int Ni, const int Nij); 
+  
+  void threebodydesc(t_pod_1d d3, t_pod_1d l_sumU, t_pod_1i l_pc3, t_pod_1i l_pn3, 
+        int l_nelements, int l_nrbf3, int l_nabf3, int l_K3, const int Ni);
+  
+  void threebodydescderiv(t_pod_1d dd3, t_pod_1d l_rbf, t_pod_1d l_rbfx, 
+    t_pod_1d l_rbfy, t_pod_1d l_rbfz, t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, 
+    t_pod_1d l_sumU, t_pod_1i l_idxi, t_pod_1i l_tj, t_pod_1i l_pc3, t_pod_1i l_pn3, t_pod_1i l_elemindex, 
+    int l_nelements, int l_nrbfmax, int l_nrbf3, int l_nabf3, int l_K3, int Ni, int Nij);
+    
+  void fourbodydesc(t_pod_1d d4,  t_pod_1d l_sumU, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, 
+      int l_nelements, int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni);
+    
+  void fourbodydescderiv(t_pod_1d dd4, t_pod_1d l_rbf, t_pod_1d l_rbfx, t_pod_1d l_rbfy, t_pod_1d l_rbfz, 
+    t_pod_1d l_abf, t_pod_1d l_abfx, t_pod_1d l_abfy, t_pod_1d l_abfz, t_pod_1d l_sumU, t_pod_1i l_idxi, 
+    t_pod_1i l_tj, t_pod_1i l_pa4, t_pod_1i l_pb4, t_pod_1i l_pc4, t_pod_1i l_elemindex, int l_nelements, 
+    int l_nrbfmax,  int l_nrbf3, int l_nrbf4, int l_nabf4, int l_K3, int l_Q4, int Ni, int Nij);
+  
+  void fourbodydesc23(t_pod_1d d23, t_pod_1d d2, t_pod_1d d3, t_pod_1i l_ind23,
+    t_pod_1i l_ind32, int l_n23, int l_n32, int Ni);
+  
+  void fourbodydescderiv23(t_pod_1d dd23, t_pod_1d d2, t_pod_1d d3, t_pod_1d dd2, 
+    t_pod_1d dd3,  t_pod_1i l_idxi, t_pod_1i l_ind23, t_pod_1i l_ind32, int l_n23, int l_n32, int Ni, int N);
+  
+  void crossdesc(t_pod_1d d12, t_pod_1d d1, t_pod_1d d2, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni);
+  
+  void crossdescderiv(t_pod_1d dd12, t_pod_1d d1, t_pod_1d d2, t_pod_1d dd1, t_pod_1d dd2,
+        t_pod_1i l_idxi, t_pod_1i ind1, t_pod_1i ind2, int n12, int Ni, int Nij);
+  
+  void blockatom_base_descriptors(t_pod_1d bd, t_pod_1d bdd, int Ni, int Nij);
+  void blockatomenv_descriptors(t_pod_1d ei, t_pod_1d cb, t_pod_1d B, int Ni);
+  
+  void blockatomenergyforce(int Ni, int Nij);
+  
+  void tallyforce(int Nij);
+  
+  void tallyenergy(int istart, int Ni);
+
+  void tallystress(int Nij);  
+  
+  void savematrix2binfile(std::string filename, t_pod_1d d_A, int nrows, int ncols);
+  void saveintmatrix2binfile(std::string filename, t_pod_1i d_A, int nrows, int ncols);
+  void savedatafordebugging();
+};
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-POD/compute_pod_atom.cpp b/src/ML-POD/compute_pod_atom.cpp
new file mode 100644
index 00000000000..df298257210
--- /dev/null
+++ b/src/ML-POD/compute_pod_atom.cpp
@@ -0,0 +1,274 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "compute_pod_atom.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "eapod.h"
+#include "update.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+enum{SCALAR,VECTOR,ARRAY};
+
+ComputePODAtom::ComputePODAtom(LAMMPS *lmp, int narg, char **arg) :
+  Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr)
+{  
+
+  int nargmin = 7;
+
+  if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style);
+  if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors");
+  
+  std::string pod_file = std::string(arg[3]);      // pod input file
+  std::string coeff_file = "";    // coefficient input file
+  std::string proj_file = std::string(arg[4]);    // coefficient input file
+  std::string centroid_file = std::string(arg[5]);    // coefficient input file              
+  podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file);   
+  
+  int ntypes = atom->ntypes;
+  memory->create(map, ntypes + 1, "compute_pod_global:map");
+  map_element2type(narg - 6, arg + 6, podptr->nelements);    
+      
+  //size_array_rows = 1 + 3*atom->natoms;  
+  //size_array_cols = podptr->nCoeffAll;
+  
+  cutmax = podptr->rcut;
+
+  nmax = 0;  
+  nijmax = 0;
+  pod = nullptr;
+  elements = nullptr;  
+  
+  size_peratom_cols = podptr->Mdesc * podptr->nClusters;
+  peratom_flag = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputePODAtom::~ComputePODAtom()
+{
+  memory->destroy(map);
+  memory->destroy(pod);
+  delete podptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODAtom::init()
+{
+  if (force->pair == nullptr)
+    error->all(FLERR,"Compute pod requires a pair style be defined");
+
+  if (cutmax > force->pair->cutforce)
+    error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff");
+
+  // need an occasional full neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+
+  if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one compute pod");
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODAtom::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODAtom::compute_peratom()
+{
+  invoked_peratom = update->ntimestep;
+
+  // grow pod array if necessary
+
+  if (atom->natoms > nmax) {
+    memory->destroy(pod);
+    nmax = atom->natoms;
+    int numdesc = podptr->Mdesc * podptr->nClusters;
+    memory->create(pod, nmax, numdesc,"sna/atom:sna");
+    array_atom = pod;
+  }
+
+  // invoke full neighbor list (will copy or build if necessary)
+
+  neighbor->build_one(list);
+  
+  double **x = atom->x;
+  int **firstneigh = list->firstneigh;
+  int *numneigh = list->numneigh;
+  int *type = atom->type;
+  int *ilist = list->ilist;
+  int inum = list->inum;      
+  int nClusters = podptr->nClusters;
+  int Mdesc = podptr->Mdesc;
+  double rcutsq = podptr->rcut*podptr->rcut;
+  
+  for (int ii = 0; ii < inum; ii++) {
+    int i = ilist[ii];    
+    int jnum = numneigh[i];
+
+    // allocate temporary memory
+    if (nijmax < jnum) {
+      nijmax = MAX(nijmax, jnum);
+      podptr->free_temp_memory();
+      podptr->allocate_temp_memory(nijmax);
+    }
+    
+    rij = &podptr->tmpmem[0];    
+    tmpmem = &podptr->tmpmem[3*nijmax]; 
+    ai = &podptr->tmpint[0];      
+    aj = &podptr->tmpint[nijmax]; 
+    ti = &podptr->tmpint[2*nijmax];
+    tj = &podptr->tmpint[3*nijmax];
+
+    // get neighbor list for atom i
+    lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i);
+    
+    // peratom base descriptors
+    double *bd = &podptr->bd[0];
+    double *bdd = &podptr->bdd[0];
+    podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij);        
+        
+    if (nClusters>1) {
+      // peratom env descriptors
+      double *pd = &podptr->pd[0];
+      double *pdd = &podptr->pdd[0];
+      podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1,  nij);    
+      for (int k = 0; k < nClusters; k++)
+        for (int m = 0; m < Mdesc; m++) {
+          int mk = m + Mdesc*k;
+          pod[i][mk] = pd[k]*bd[m];     
+        //   for (int n=0; n<nij; n++) {
+        //     int ain = 3*ai[n];
+        //     int ajn = 3*aj[n];
+        //     int nm = 3*n + 3*nij*m;
+        //     int nk = 3*n + 3*nij*k;
+        //     pod[1 + ain][imk] += bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+        //     pod[2 + ain][imk] += bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+        //     pod[3 + ain][imk] += bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+        //     pod[1 + ajn][imk] -= bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+        //     pod[2 + ajn][imk] -= bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+        //     pod[3 + ajn][imk] -= bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+        //   }                  
+        }
+    }
+    else {
+      for (int m = 0; m < Mdesc; m++) {
+       pod[i][m] = bd[m];
+      //  for (int n=0; n<nij; n++) {
+      //     int ain = 3*ai[n];
+      //     int ajn = 3*aj[n];
+      //     int nm = 3*n + 3*nij*m;
+      //     pod[1 + ain][im] += bdd[0 + nm];
+      //     pod[2 + ain][im] += bdd[1 + nm];
+      //     pod[3 + ain][im] += bdd[2 + nm];
+      //     pod[1 + ajn][im] -= bdd[0 + nm];
+      //     pod[2 + ajn][im] -= bdd[1 + nm];
+      //     pod[3 + ajn][im] -= bdd[2 + nm];
+      //   }       
+      }
+    }    
+  }  
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ComputePODAtom::memory_usage()
+{
+  double bytes = 0.0;
+
+  return bytes;
+}
+
+
+void ComputePODAtom::lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtypes, 
+                               int *numneigh, double rcutsq, int gi)
+{
+  nij = 0;
+  int itype = map[atomtypes[gi]] + 1;
+  int m = numneigh[gi];
+  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+    int gj = firstneigh[gi][l];           // atom j
+    double delx = x[gj][0] - x[gi][0];    // xj - xi
+    double dely = x[gj][1] - x[gi][1];    // xj - xi
+    double delz = x[gj][2] - x[gi][2];    // xj - xi
+    double rsq = delx * delx + dely * dely + delz * delz;
+    if (rsq < rcutsq && rsq > 1e-20) {
+      rij[nij * 3 + 0] = delx;
+      rij[nij * 3 + 1] = dely;
+      rij[nij * 3 + 2] = delz;
+      ai[nij] = atomid[gi]-1;
+      aj[nij] = atomid[gj]-1;
+      ti[nij] = itype;
+      tj[nij] = map[atomtypes[gj]] + 1;
+      nij++;
+    }
+  }
+}
+
+void ComputePODAtom::map_element2type(int narg, char **arg, int nelements)
+{
+  int i,j;
+  const int ntypes = atom->ntypes;
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if "NULL"
+  // nelements = # of unique elements
+  // elements = list of element names
+
+  if (narg != ntypes)
+    error->all(FLERR, "Number of element to type mappings does not match number of atom types");
+
+  if (elements) {
+    for (i = 0; i < nelements; i++) delete[] elements[i];
+    delete[] elements;
+  }
+  elements = new char*[ntypes];
+  for (i = 0; i < ntypes; i++) elements[i] = nullptr;
+
+  nelements = 0;
+  map[0] = -1;
+  for (i = 1; i <= narg; i++) {
+    std::string entry = arg[i-1];
+    if (entry == "NULL") {
+      map[i] = -1;
+      continue;
+    }
+    for (j = 0; j < nelements; j++)
+      if (entry == elements[j]) break;
+    map[i] = j;
+    if (j == nelements) {
+      elements[j] = utils::strdup(entry);
+      nelements++;
+    }
+  }
+}
diff --git a/src/ML-POD/compute_pod_atom.h b/src/ML-POD/compute_pod_atom.h
new file mode 100644
index 00000000000..0dcd46cbba2
--- /dev/null
+++ b/src/ML-POD/compute_pod_atom.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(pod/atom,ComputePODAtom);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_POD_ATOM_H
+#define LMP_COMPUTE_POD_ATOM_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputePODAtom : public Compute {
+ public:
+  ComputePODAtom(class LAMMPS *, int, char **);
+  ~ComputePODAtom() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_peratom() override;
+  double memory_usage() override;
+  void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh,
+                        double rcutsq, int i);
+  void map_element2type(int narg, char **arg, int nelements);
+  
+ private:
+  class NeighList *list;
+  class EAPOD *podptr;
+  double **pod;
+  double cutmax;
+  int nmax;
+  int nij;
+  int nijmax;
+
+  double *tmpmem;      // temporary memory
+  double *rij;         // (xj - xi) for all pairs (I, J)  
+  char **elements;
+  int *map;
+  int *ai;             // IDs of atoms I for all pairs (I, J)
+  int *aj;             // IDs of atoms J for all pairs (I, J)
+  int *ti;             // types of atoms I for all pairs (I, J)
+  int *tj;             // types of atoms J  for all pairs (I, J)  
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
\ No newline at end of file
diff --git a/src/ML-POD/compute_pod_global.cpp b/src/ML-POD/compute_pod_global.cpp
new file mode 100644
index 00000000000..f8cff059b43
--- /dev/null
+++ b/src/ML-POD/compute_pod_global.cpp
@@ -0,0 +1,279 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "compute_pod_global.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "eapod.h"
+#include "update.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+enum{SCALAR,VECTOR,ARRAY};
+
+ComputePODGlobal::ComputePODGlobal(LAMMPS *lmp, int narg, char **arg) :
+  Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr)
+{  
+  array_flag = 1;
+  extarray = 0;
+  
+  int nargmin = 7;
+
+  if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style); 
+  if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors");
+          
+  std::string pod_file = std::string(arg[3]);      // pod input file
+  std::string coeff_file = "";    // coefficient input file
+  std::string proj_file = std::string(arg[4]);    // coefficient input file
+  std::string centroid_file = std::string(arg[5]);    // coefficient input file              
+  podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file);   
+  
+  int ntypes = atom->ntypes;
+  memory->create(map, ntypes + 1, "compute_pod_global:map");
+  map_element2type(narg - 6, arg + 6, podptr->nelements);    
+      
+  size_array_rows = 1 + 3*atom->natoms;  
+  size_array_cols = podptr->nCoeffAll;
+  cutmax = podptr->rcut;
+    
+  nijmax = 0;
+  pod = nullptr;
+  elements = nullptr;  
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputePODGlobal::~ComputePODGlobal()
+{
+  memory->destroy(map);
+  memory->destroy(pod);
+  delete podptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODGlobal::init()
+{
+  if (force->pair == nullptr)
+    error->all(FLERR,"Compute pod requires a pair style be defined");
+
+  if (cutmax > force->pair->cutforce)
+    error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff");
+
+  // need an occasional full neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+
+  if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one compute pod");
+  
+ // allocate memory for global array
+  memory->create(pod,size_array_rows,size_array_cols,
+                 "compute_pod_global:pod");
+  array = pod;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODGlobal::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODGlobal::compute_array()
+{
+  // int ntotal = atom->nlocal + atom->nghost;  
+  invoked_peratom = update->ntimestep;
+
+  // clear global array
+
+  for (int irow = 0; irow < size_array_rows; irow++)
+    for (int icoeff = 0; icoeff < size_array_cols; icoeff++)
+      pod[irow][icoeff] = 0.0;
+    
+  // invoke full neighbor list (will copy or build if necessary)
+
+  neighbor->build_one(list);
+  
+  double **x = atom->x;
+  int **firstneigh = list->firstneigh;
+  int *numneigh = list->numneigh;
+  int *type = atom->type;
+  int *ilist = list->ilist;
+  int inum = list->inum;  
+  int nClusters = podptr->nClusters;
+  int Mdesc = podptr->Mdesc;
+  int nCoeffPerElement = podptr->nCoeffPerElement;
+  
+  double rcutsq = podptr->rcut*podptr->rcut;
+  
+  for (int ii = 0; ii < inum; ii++) {
+    int i = ilist[ii];
+    int jnum = numneigh[i];
+
+    // allocate temporary memory
+    if (nijmax < jnum) {
+      nijmax = MAX(nijmax, jnum);
+      podptr->free_temp_memory();
+      podptr->allocate_temp_memory(nijmax);
+    }
+    
+    rij = &podptr->tmpmem[0];    
+    tmpmem = &podptr->tmpmem[3*nijmax]; 
+    ai = &podptr->tmpint[0];      
+    aj = &podptr->tmpint[nijmax]; 
+    ti = &podptr->tmpint[2*nijmax];
+    tj = &podptr->tmpint[3*nijmax];
+        
+    // get neighbor list for atom i
+    lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i);
+    
+    // peratom base descriptors
+    double *bd = &podptr->bd[0];
+    double *bdd = &podptr->bdd[0];    
+    podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij);        
+        
+    pod[0][nCoeffPerElement*(ti[0]-1)] += 1.0; // one-body descriptor
+      
+    if (nClusters>1) {
+      // peratom env descriptors
+      double *pd = &podptr->pd[0];
+      double *pdd = &podptr->pdd[0];
+      podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1,  nij);    
+      
+      for (int j = 0; j < nClusters; j++) {
+        for (int m=0; m<Mdesc; m++) {
+          int k = nCoeffPerElement*(ti[0]-1) + 1 + m + j*Mdesc; // increment by 1 because of the one-body descriptor
+          pod[0][k] += pd[j]*bd[m];
+          for (int n=0; n<nij; n++) {
+            int ain = 3*ai[n];
+            int ajn = 3*aj[n];     
+            int nm = 3*n + 3*nij*m;
+            int nj = 3*n + 3*nij*j;
+            pod[1+ain][k] += bdd[0 + nm]*pd[j] + bd[m]*pdd[0 + nj];
+            pod[2+ain][k] += bdd[1 + nm]*pd[j] + bd[m]*pdd[1 + nj];
+            pod[3+ain][k] += bdd[2 + nm]*pd[j] + bd[m]*pdd[2 + nj];
+            pod[1+ajn][k] -= bdd[0 + nm]*pd[j] + bd[m]*pdd[0 + nj];
+            pod[2+ajn][k] -= bdd[1 + nm]*pd[j] + bd[m]*pdd[1 + nj];
+            pod[3+ajn][k] -= bdd[2 + nm]*pd[j] + bd[m]*pdd[2 + nj];
+          }
+        }
+      }      
+    }
+    else {
+      for (int m=0; m<Mdesc; m++) {
+        int k = nCoeffPerElement*(ti[0]-1) + 1 + m; // increment by 1 because of the one-body descriptor
+        pod[0][k] += bd[m];
+        for (int n=0; n<nij; n++) {
+          int ain = 3*ai[n];
+          int ajn = 3*aj[n];     
+          int nm = 3*n + 3*nij*m;
+          pod[1+ain][k] += bdd[0 + nm];
+          pod[2+ain][k] += bdd[1 + nm];
+          pod[3+ain][k] += bdd[2 + nm];
+          pod[1+ajn][k] -= bdd[0 + nm];
+          pod[2+ajn][k] -= bdd[1 + nm];
+          pod[3+ajn][k] -= bdd[2 + nm];
+        }
+      }      
+    }    
+  }    
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ComputePODGlobal::memory_usage()
+{
+  double bytes = 0.0;
+
+  return bytes;
+}
+
+
+void ComputePODGlobal::lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtypes, 
+                               int *numneigh, double rcutsq, int gi)
+{
+  nij = 0;
+  int itype = map[atomtypes[gi]] + 1;
+  int m = numneigh[gi];
+  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+    int gj = firstneigh[gi][l];           // atom j
+    double delx = x[gj][0] - x[gi][0];    // xj - xi
+    double dely = x[gj][1] - x[gi][1];    // xj - xi
+    double delz = x[gj][2] - x[gi][2];    // xj - xi
+    double rsq = delx * delx + dely * dely + delz * delz;
+    if (rsq < rcutsq && rsq > 1e-20) {
+      rij[nij * 3 + 0] = delx;
+      rij[nij * 3 + 1] = dely;
+      rij[nij * 3 + 2] = delz;
+      ai[nij] = atomid[gi]-1;
+      aj[nij] = atomid[gj]-1;
+      ti[nij] = itype;
+      tj[nij] = map[atomtypes[gj]] + 1;
+      nij++;
+    }
+  }
+}
+
+void ComputePODGlobal::map_element2type(int narg, char **arg, int nelements)
+{
+  int i,j;
+  const int ntypes = atom->ntypes;
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if "NULL"
+  // nelements = # of unique elements
+  // elements = list of element names
+
+  if (narg != ntypes)
+    error->all(FLERR, "Number of element to type mappings does not match number of atom types");
+
+  if (elements) {
+    for (i = 0; i < nelements; i++) delete[] elements[i];
+    delete[] elements;
+  }
+  elements = new char*[ntypes];
+  for (i = 0; i < ntypes; i++) elements[i] = nullptr;
+
+  nelements = 0;
+  map[0] = -1;
+  for (i = 1; i <= narg; i++) {
+    std::string entry = arg[i-1];
+    if (entry == "NULL") {
+      map[i] = -1;
+      continue;
+    }
+    for (j = 0; j < nelements; j++)
+      if (entry == elements[j]) break;
+    map[i] = j;
+    if (j == nelements) {
+      elements[j] = utils::strdup(entry);
+      nelements++;
+    }
+  }
+}
diff --git a/src/ML-POD/compute_pod_global.h b/src/ML-POD/compute_pod_global.h
new file mode 100644
index 00000000000..001c01eb5ec
--- /dev/null
+++ b/src/ML-POD/compute_pod_global.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(pod/gdd,ComputePODGlobal);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_POD_GLOBAL_H
+#define LMP_COMPUTE_POD_GLOBAL_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputePODGlobal : public Compute {
+ public:
+  ComputePODGlobal(class LAMMPS *, int, char **);
+  ~ComputePODGlobal() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_array() override;
+  double memory_usage() override;
+  void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh,
+                        double rcutsq, int i);
+  void map_element2type(int narg, char **arg, int nelements);
+  
+ private:
+  class NeighList *list;
+  class EAPOD *podptr;
+  double **pod;
+  double cutmax;
+  int nij;
+  int nijmax;
+     
+  double *tmpmem;      // temporary memory
+  double *rij;         // (xj - xi) for all pairs (I, J)
+  char **elements;
+  int *map;
+  int *ai;             // IDs of atoms I for all pairs (I, J)
+  int *aj;             // IDs of atoms J for all pairs (I, J)
+  int *ti;             // types of atoms I for all pairs (I, J)
+  int *tj;             // types of atoms J  for all pairs (I, J)  
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-POD/compute_pod_local.cpp b/src/ML-POD/compute_pod_local.cpp
new file mode 100644
index 00000000000..a563448d5e1
--- /dev/null
+++ b/src/ML-POD/compute_pod_local.cpp
@@ -0,0 +1,276 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "compute_pod_local.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "eapod.h"
+#include "update.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+enum{SCALAR,VECTOR,ARRAY};
+
+ComputePODLocal::ComputePODLocal(LAMMPS *lmp, int narg, char **arg) :
+  Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr)
+{  
+  array_flag = 1;
+  extarray = 0;
+  
+  int nargmin = 7;
+
+  if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style);
+  if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors");
+  
+  std::string pod_file = std::string(arg[3]);      // pod input file
+  std::string coeff_file = "";    // coefficient input file
+  std::string proj_file = std::string(arg[4]);    // coefficient input file
+  std::string centroid_file = std::string(arg[5]);    // coefficient input file              
+  podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file);   
+    
+  int ntypes = atom->ntypes;
+  memory->create(map, ntypes + 1, "compute_pod_local:map");
+    
+  map_element2type(narg - 6, arg + 6, podptr->nelements);    
+      
+  int numdesc = podptr->Mdesc * podptr->nClusters;
+  size_array_rows = 1 + 3*atom->natoms;  
+  size_array_cols = atom->natoms*numdesc;
+  cutmax = podptr->rcut;
+    
+  nijmax = 0;
+  pod = nullptr;
+  elements = nullptr;  
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputePODLocal::~ComputePODLocal()
+{
+  memory->destroy(map);
+  memory->destroy(pod);
+  delete podptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODLocal::init()
+{
+  if (force->pair == nullptr)
+    error->all(FLERR,"Compute pod requires a pair style be defined");
+
+  if (cutmax > force->pair->cutforce)
+    error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff");
+
+  // need an occasional full neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+
+  if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one compute pod");
+  
+ // allocate memory for global array
+  memory->create(pod,size_array_rows,size_array_cols,
+                 "compute_pod_local:pod");
+  array = pod;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODLocal::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODLocal::compute_array()
+{
+  // int ntotal = atom->nlocal + atom->nghost;    
+  invoked_peratom = update->ntimestep;
+
+  // clear global array
+
+  for (int irow = 0; irow < size_array_rows; irow++)
+    for (int icoeff = 0; icoeff < size_array_cols; icoeff++)
+      pod[irow][icoeff] = 0.0;
+    
+  // invoke full neighbor list (will copy or build if necessary)
+
+  neighbor->build_one(list);
+  
+  double **x = atom->x;
+  int **firstneigh = list->firstneigh;
+  int *numneigh = list->numneigh;
+  int *type = atom->type;
+  int *ilist = list->ilist;
+  int inum = list->inum;  
+  int nClusters = podptr->nClusters;
+  int Mdesc = podptr->Mdesc;
+  
+  double rcutsq = podptr->rcut*podptr->rcut;
+  
+  for (int ii = 0; ii < inum; ii++) {
+    int i = ilist[ii];
+    int jnum = numneigh[i];
+
+    // allocate temporary memory
+    if (nijmax < jnum) {
+      nijmax = MAX(nijmax, jnum);
+      podptr->free_temp_memory();
+      podptr->allocate_temp_memory(nijmax);
+    }
+    
+    rij = &podptr->tmpmem[0];    
+    tmpmem = &podptr->tmpmem[3*nijmax]; 
+    ai = &podptr->tmpint[0];      
+    aj = &podptr->tmpint[nijmax]; 
+    ti = &podptr->tmpint[2*nijmax];
+    tj = &podptr->tmpint[3*nijmax];
+        
+    // get neighbor list for atom i
+    lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i);
+    
+    // peratom base descriptors
+    double *bd = &podptr->bd[0];
+    double *bdd = &podptr->bdd[0];    
+    podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij);        
+        
+    if (nClusters>1) {
+      // peratom env descriptors
+      double *pd = &podptr->pd[0];
+      double *pdd = &podptr->pdd[0];
+      podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1,  nij);    
+      for (int k = 0; k < nClusters; k++)
+        for (int m = 0; m < Mdesc; m++) {
+          int imk = m + Mdesc*k +  Mdesc*nClusters*i;
+          pod[0][imk] = pd[k]*bd[m];     
+          for (int n=0; n<nij; n++) {
+            int ain = 3*ai[n];
+            int ajn = 3*aj[n];
+            int nm = 3*n + 3*nij*m;
+            int nk = 3*n + 3*nij*k;
+            pod[1 + ain][imk] += bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+            pod[2 + ain][imk] += bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+            pod[3 + ain][imk] += bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+            pod[1 + ajn][imk] -= bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+            pod[2 + ajn][imk] -= bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+            pod[3 + ajn][imk] -= bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+          }                  
+        }
+    }
+    else {
+      for (int m = 0; m < Mdesc; m++) {
+       int im = m + Mdesc*i;
+       pod[0][im] = bd[m];
+       for (int n=0; n<nij; n++) {
+          int ain = 3*ai[n];
+          int ajn = 3*aj[n];
+          int nm = 3*n + 3*nij*m;
+          pod[1 + ain][im] += bdd[0 + nm];
+          pod[2 + ain][im] += bdd[1 + nm];
+          pod[3 + ain][im] += bdd[2 + nm];
+          pod[1 + ajn][im] -= bdd[0 + nm];
+          pod[2 + ajn][im] -= bdd[1 + nm];
+          pod[3 + ajn][im] -= bdd[2 + nm];
+        }       
+      }
+    }    
+  }        
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ComputePODLocal::memory_usage()
+{
+  double bytes = 0.0;
+
+  return bytes;
+}
+
+
+void ComputePODLocal::lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtypes, 
+                               int *numneigh, double rcutsq, int gi)
+{
+  nij = 0;
+  int itype = map[atomtypes[gi]] + 1;
+  int m = numneigh[gi];
+  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+    int gj = firstneigh[gi][l];           // atom j
+    double delx = x[gj][0] - x[gi][0];    // xj - xi
+    double dely = x[gj][1] - x[gi][1];    // xj - xi
+    double delz = x[gj][2] - x[gi][2];    // xj - xi
+    double rsq = delx * delx + dely * dely + delz * delz;
+    if (rsq < rcutsq && rsq > 1e-20) {
+      rij[nij * 3 + 0] = delx;
+      rij[nij * 3 + 1] = dely;
+      rij[nij * 3 + 2] = delz;
+      ai[nij] = atomid[gi]-1;
+      aj[nij] = atomid[gj]-1;
+      ti[nij] = itype;
+      tj[nij] = map[atomtypes[gj]] + 1;
+      nij++;
+    }
+  }
+}
+
+void ComputePODLocal::map_element2type(int narg, char **arg, int nelements)
+{
+  int i,j;
+  const int ntypes = atom->ntypes;
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if "NULL"
+  // nelements = # of unique elements
+  // elements = list of element names
+
+  if (narg != ntypes)
+    error->all(FLERR, "Number of element to type mappings does not match number of atom types");
+
+  if (elements) {
+    for (i = 0; i < nelements; i++) delete[] elements[i];
+    delete[] elements;
+  }
+  elements = new char*[ntypes];
+  for (i = 0; i < ntypes; i++) elements[i] = nullptr;
+  
+  nelements = 0;
+  map[0] = -1;
+  for (i = 1; i <= narg; i++) {
+    std::string entry = arg[i-1];
+    if (entry == "NULL") {
+      map[i] = -1;
+      continue;
+    }
+    for (j = 0; j < nelements; j++)
+      if (entry == elements[j]) break;
+    map[i] = j;
+    if (j == nelements) {
+      elements[j] = utils::strdup(entry);
+      nelements++;
+    }
+  }  
+}
diff --git a/src/ML-POD/compute_pod_local.h b/src/ML-POD/compute_pod_local.h
new file mode 100644
index 00000000000..516d666e5ec
--- /dev/null
+++ b/src/ML-POD/compute_pod_local.h
@@ -0,0 +1,60 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(pod/ldd,ComputePODLocal);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_POD_LOCAL_H
+#define LMP_COMPUTE_POD_LOCAL_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputePODLocal : public Compute {
+ public:
+  ComputePODLocal(class LAMMPS *, int, char **);
+  ~ComputePODLocal() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_array() override;
+  double memory_usage() override;
+  void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh,
+                        double rcutsq, int i);
+  void map_element2type(int narg, char **arg, int nelements);
+  
+ private:
+  class NeighList *list;
+  class EAPOD *podptr;
+  double **pod;
+  double cutmax;
+  int nij;
+  int nijmax;
+     
+  double *tmpmem;      // temporary memory
+  double *rij;         // (xj - xi) for all pairs (I, J)
+  char **elements;
+  int *map;
+  int *ai;             // IDs of atoms I for all pairs (I, J)
+  int *aj;             // IDs of atoms J for all pairs (I, J)
+  int *ti;             // types of atoms I for all pairs (I, J)
+  int *tj;             // types of atoms J  for all pairs (I, J)  
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-POD/compute_podd_atom.cpp b/src/ML-POD/compute_podd_atom.cpp
new file mode 100644
index 00000000000..9e3f8c45d63
--- /dev/null
+++ b/src/ML-POD/compute_podd_atom.cpp
@@ -0,0 +1,275 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "compute_podd_atom.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "eapod.h"
+#include "update.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+enum{SCALAR,VECTOR,ARRAY};
+
+ComputePODDAtom::ComputePODDAtom(LAMMPS *lmp, int narg, char **arg) :
+  Compute(lmp, narg, arg), list(nullptr), map(nullptr), pod(nullptr), elements(nullptr)
+{  
+
+  int nargmin = 7;
+
+  if (narg < nargmin) error->all(FLERR, "Illegal compute {} command", style);
+  if (comm->nprocs > 1) error->all(FLERR, "compute command does not support multi processors");
+  
+  std::string pod_file = std::string(arg[3]);      // pod input file
+  std::string coeff_file = "";    // coefficient input file
+  std::string proj_file = std::string(arg[4]);    // coefficient input file
+  std::string centroid_file = std::string(arg[5]);    // coefficient input file              
+  podptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file);   
+  
+  int ntypes = atom->ntypes;
+  memory->create(map, ntypes + 1, "compute_pod_global:map");
+  map_element2type(narg - 6, arg + 6, podptr->nelements);    
+        
+  cutmax = podptr->rcut;
+
+  nmax = 0;  
+  nijmax = 0;
+  pod = nullptr;
+  elements = nullptr;  
+  
+  size_peratom_cols = podptr->Mdesc * podptr->nClusters*3*atom->natoms;
+  peratom_flag = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputePODDAtom::~ComputePODDAtom()
+{
+  memory->destroy(map);
+  memory->destroy(pod);
+  delete podptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODDAtom::init()
+{
+  if (force->pair == nullptr)
+    error->all(FLERR,"Compute pod requires a pair style be defined");
+
+  if (cutmax > force->pair->cutforce)
+    error->all(FLERR,"Compute pod cutoff is longer than pairwise cutoff");
+
+  // need an occasional full neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+
+  if (modify->get_compute_by_style("pod").size() > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one compute pod");
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODDAtom::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePODDAtom::compute_peratom()
+{
+  invoked_peratom = update->ntimestep;
+
+  // grow pod array if necessary
+
+  if (atom->natoms > nmax) {
+    memory->destroy(pod);
+    nmax = atom->natoms;
+    int numdesc = podptr->Mdesc * podptr->nClusters * 3 * atom->natoms;
+    memory->create(pod, nmax, numdesc,"sna/atom:sna");
+    array_atom = pod;
+  }
+
+  for (int i = 0; i < atom->natoms; i++)
+    for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) {
+      pod[i][icoeff] = 0.0;
+    }
+  
+  // invoke full neighbor list (will copy or build if necessary)
+
+  neighbor->build_one(list);
+  
+  double **x = atom->x;
+  int **firstneigh = list->firstneigh;
+  int *numneigh = list->numneigh;
+  int *type = atom->type;
+  int *ilist = list->ilist;
+  int inum = list->inum;  
+  int nClusters = podptr->nClusters;
+  int Mdesc = podptr->Mdesc;
+  double rcutsq = podptr->rcut*podptr->rcut;
+  
+  for (int ii = 0; ii < inum; ii++) {
+    int i = ilist[ii];    
+    int jnum = numneigh[i];
+
+    // allocate temporary memory
+    if (nijmax < jnum) {
+      nijmax = MAX(nijmax, jnum);
+      podptr->free_temp_memory();
+      podptr->allocate_temp_memory(nijmax);
+    }
+    
+    rij = &podptr->tmpmem[0];    
+    tmpmem = &podptr->tmpmem[3*nijmax]; 
+    ai = &podptr->tmpint[0];      
+    aj = &podptr->tmpint[nijmax]; 
+    ti = &podptr->tmpint[2*nijmax];
+    tj = &podptr->tmpint[3*nijmax];
+
+    // get neighbor list for atom i
+    lammpsNeighborList(x, firstneigh, atom->tag, type, numneigh, rcutsq, i);
+    
+    // peratom base descriptors
+    double *bd = &podptr->bd[0];
+    double *bdd = &podptr->bdd[0];
+    podptr->peratombase_descriptors(bd, bdd, rij, tmpmem, ti, tj, nij);        
+        
+    if (nClusters>1) {
+      // peratom env descriptors
+      double *pd = &podptr->pd[0];
+      double *pdd = &podptr->pdd[0];
+      podptr->peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1,  nij);    
+      for (int n=0; n<nij; n++) {
+        int ain = 3*ai[n];
+        int ajn = 3*aj[n];      
+        for (int k = 0; k < nClusters; k++) {
+          for (int m = 0; m < Mdesc; m++) {
+            int mk = m + Mdesc*k;          
+            int nm = 3*n + 3*nij*m;
+            int nk = 3*n + 3*nij*k;
+            pod[i][mk + Mdesc*nClusters*0 + Mdesc*nClusters*ain] += bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+            pod[i][mk + Mdesc*nClusters*1 + Mdesc*nClusters*ain] += bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+            pod[i][mk + Mdesc*nClusters*2 + Mdesc*nClusters*ain] += bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+            pod[i][mk + Mdesc*nClusters*0 + Mdesc*nClusters*ajn] -= bdd[0 + nm]*pd[k] + bd[m]*pdd[0+nk];
+            pod[i][mk + Mdesc*nClusters*1 + Mdesc*nClusters*ajn] -= bdd[1 + nm]*pd[k] + bd[m]*pdd[1+nk];
+            pod[i][mk + Mdesc*nClusters*2 + Mdesc*nClusters*ajn] -= bdd[2 + nm]*pd[k] + bd[m]*pdd[2+nk];
+          }                  
+        }
+      }
+    }
+    else {
+      for (int n=0; n<nij; n++) {
+        int ain = 3*ai[n];
+        int ajn = 3*aj[n];
+        for (int m = 0; m < Mdesc; m++) {
+          int nm = 3*n + 3*nij*m;
+          pod[i][m + Mdesc*0 + Mdesc*ain] += bdd[0 + nm];
+          pod[i][m + Mdesc*1 + Mdesc*ain] += bdd[1 + nm];
+          pod[i][m + Mdesc*2 + Mdesc*ain] += bdd[2 + nm];
+          pod[i][m + Mdesc*0 + Mdesc*ajn] -= bdd[0 + nm];
+          pod[i][m + Mdesc*1 + Mdesc*ajn] -= bdd[1 + nm];
+          pod[i][m + Mdesc*2 + Mdesc*ajn] -= bdd[2 + nm];
+        }       
+      }
+    }    
+  }  
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ComputePODDAtom::memory_usage()
+{
+  double bytes = 0.0;
+
+  return bytes;
+}
+
+
+void ComputePODDAtom::lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtypes, 
+                               int *numneigh, double rcutsq, int gi)
+{
+  nij = 0;
+  int itype = map[atomtypes[gi]] + 1;
+  int m = numneigh[gi];
+  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+    int gj = firstneigh[gi][l];           // atom j
+    double delx = x[gj][0] - x[gi][0];    // xj - xi
+    double dely = x[gj][1] - x[gi][1];    // xj - xi
+    double delz = x[gj][2] - x[gi][2];    // xj - xi
+    double rsq = delx * delx + dely * dely + delz * delz;
+    if (rsq < rcutsq && rsq > 1e-20) {
+      rij[nij * 3 + 0] = delx;
+      rij[nij * 3 + 1] = dely;
+      rij[nij * 3 + 2] = delz;
+      ai[nij] = atomid[gi]-1;
+      aj[nij] = atomid[gj]-1;
+      ti[nij] = itype;
+      tj[nij] = map[atomtypes[gj]] + 1;
+      nij++;
+    }
+  }
+}
+
+void ComputePODDAtom::map_element2type(int narg, char **arg, int nelements)
+{
+  int i,j;
+  const int ntypes = atom->ntypes;
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if "NULL"
+  // nelements = # of unique elements
+  // elements = list of element names
+
+  if (narg != ntypes)
+    error->all(FLERR, "Number of element to type mappings does not match number of atom types");
+
+  if (elements) {
+    for (i = 0; i < nelements; i++) delete[] elements[i];
+    delete[] elements;
+  }
+  elements = new char*[ntypes];
+  for (i = 0; i < ntypes; i++) elements[i] = nullptr;
+
+  nelements = 0;
+  map[0] = -1;
+  for (i = 1; i <= narg; i++) {
+    std::string entry = arg[i-1];
+    if (entry == "NULL") {
+      map[i] = -1;
+      continue;
+    }
+    for (j = 0; j < nelements; j++)
+      if (entry == elements[j]) break;
+    map[i] = j;
+    if (j == nelements) {
+      elements[j] = utils::strdup(entry);
+      nelements++;
+    }
+  }
+}
diff --git a/src/ML-POD/compute_podd_atom.h b/src/ML-POD/compute_podd_atom.h
new file mode 100644
index 00000000000..1339ed9d269
--- /dev/null
+++ b/src/ML-POD/compute_podd_atom.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(podd/atom,ComputePODDAtom);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_PODD_ATOM_H
+#define LMP_COMPUTE_PODD_ATOM_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputePODDAtom : public Compute {
+ public:
+  ComputePODDAtom(class LAMMPS *, int, char **);
+  ~ComputePODDAtom() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_peratom() override;
+  double memory_usage() override;
+  void lammpsNeighborList(double **x, int **firstneigh, int *atomid, int *atomtype, int *numneigh,
+                        double rcutsq, int i);
+  void map_element2type(int narg, char **arg, int nelements);
+  
+ private:
+  class NeighList *list;
+  class EAPOD *podptr;
+  double **pod;
+  double cutmax;
+  int nmax;
+  int nij;
+  int nijmax;
+
+  double *tmpmem;      // temporary memory
+  double *rij;         // (xj - xi) for all pairs (I, J)  
+  char **elements;
+  int *map;
+  int *ai;             // IDs of atoms I for all pairs (I, J)
+  int *aj;             // IDs of atoms J for all pairs (I, J)
+  int *ti;             // types of atoms I for all pairs (I, J)
+  int *tj;             // types of atoms J  for all pairs (I, J)  
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
\ No newline at end of file
diff --git a/src/ML-POD/eapod.cpp b/src/ML-POD/eapod.cpp
new file mode 100644
index 00000000000..414b9616d31
--- /dev/null
+++ b/src/ML-POD/eapod.cpp
@@ -0,0 +1,4201 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/ Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Ngoc Cuong Nguyen (MIT)
+------------------------------------------------------------------------- */
+
+// POD header file
+#include "eapod.h"
+
+// LAMMPS header files
+
+#include "comm.h"
+#include "error.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "memory.h"
+#include "tokenizer.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+using MathConst::MY_PI;
+using MathSpecial::cube;
+using MathSpecial::powint;
+
+#define MAXLINE 1024
+
+// constructor
+EAPOD::EAPOD(LAMMPS *_lmp, const std::string &pod_file, const std::string &coeff_file, const std::string &proj_file, const std::string &centroids_file) :
+        Pointers(_lmp), elemindex(nullptr), Phi(nullptr), Lambda(nullptr), Proj(nullptr),
+        Centroids(nullptr),  bd(nullptr), bdd(nullptr), pd(nullptr), pdd(nullptr), coeff(nullptr), tmpmem(nullptr), tmpint(nullptr),
+        pn3(nullptr), pq3(nullptr), pc3(nullptr), pq4(nullptr), pa4(nullptr), pb4(nullptr), pc4(nullptr),
+        ind23(nullptr), ind32(nullptr), ind33(nullptr), ind34(nullptr), ind43(nullptr), ind44(nullptr)
+{
+  ind33l = nullptr;
+  ind33r = nullptr;
+  ind34l = nullptr;
+  ind34r = nullptr;
+  ind44l = nullptr;
+  ind44r = nullptr;
+          
+  rin = 0.5;
+  rcut = 5.0;
+  nClusters = 1;
+  nComponents = 1;
+  nelements = 1;
+  onebody = 1;
+  besseldegree = 4;
+  inversedegree = 8;
+  nbesselpars = 3;
+  true4BodyDesc = 1;
+  ns = nbesselpars*besseldegree + inversedegree;
+  Njmax = 100;
+  nrbf2 = 6;
+  nrbf3 = 5;
+  nrbf4 = 4;
+  nabf3 = 5;
+  nabf4 = 4;
+  nrbf23 = 0;
+  nabf23 = 0;
+  nrbf33 = 0;
+  nabf33 = 0;
+  nrbf34 = 0;
+  nabf34 = 0;
+  nabf43 = 0;
+  nrbf44 = 0;
+  nabf44 = 0;
+  P3 = 4;
+  P4 = 3;
+  P23 = 0;
+  P33 = 0;
+  P34 = 0;
+  P44 = 0;
+  pdegree[0] = besseldegree;
+  pdegree[1] = inversedegree;
+  pbc[0] = 1;
+  pbc[1] = 1;
+  pbc[2] = 1;
+  besselparams[0] = 1e-3;
+  besselparams[1] = 2.0;
+  besselparams[2] = 4.0;
+
+  // read pod input file to podstruct
+  read_pod_file(pod_file);
+
+  // read pod coefficient file to podstruct
+  if (coeff_file != "") {
+    ncoeff = read_coeff_file(coeff_file);
+    if (ncoeff != nCoeffAll)
+      error->all(FLERR,"number of coefficients in the coefficient file is not correct");
+  }
+  if (nClusters > 1) {
+    // read projection matrix file to podstruct 
+    if (proj_file != "") {
+      nproj = read_projection_matrix(proj_file);
+      if (nproj != nComponents*Mdesc*nelements)
+        error->all(FLERR,"number of coefficients in the projection file is not correct");
+    }
+    
+    // read centroids file to podstruct
+    if (centroids_file != "") {
+      ncentroids = read_centroids(centroids_file);
+      if (ncentroids != nComponents*nClusters*nelements)
+        error->all(FLERR,"number of coefficients in the projection file is not correct");
+    }
+  }    
+}
+
+// destructor
+EAPOD::~EAPOD()
+{
+  memory->destroy(elemindex);
+  memory->destroy(Phi);
+  memory->destroy(Lambda);
+  memory->destroy(Proj);
+  memory->destroy(Centroids);
+  memory->destroy(bd);
+  memory->destroy(bdd);
+  memory->destroy(pd);
+  memory->destroy(pdd);
+  memory->destroy(coeff);   
+  memory->destroy(tmpmem);
+  memory->destroy(tmpint);
+  memory->destroy(pn3);
+  memory->destroy(pq3);
+  memory->destroy(pc3);
+  memory->destroy(pa4);
+  memory->destroy(pb4);
+  memory->destroy(pc4);
+  memory->destroy(pq4);
+  memory->destroy(ind23);
+  memory->destroy(ind32);
+  memory->destroy(ind33);
+  memory->destroy(ind34);
+  memory->destroy(ind43);
+  memory->destroy(ind44);
+  memory->destroy(ind33l);
+  memory->destroy(ind34l);
+  memory->destroy(ind44l);
+  memory->destroy(ind33r);
+  memory->destroy(ind34r);
+  memory->destroy(ind44r);    
+}
+
+void EAPOD::read_pod_file(std::string pod_file)
+{
+  std::string podfilename = pod_file;
+  FILE *fppod;
+  if (comm->me == 0) {
+
+    fppod = utils::open_potential(podfilename,lmp,nullptr);
+    if (fppod == nullptr)
+      error->one(FLERR,"Cannot open POD coefficient file {}: ",
+                                   podfilename, utils::getsyserror());
+  }
+
+  // loop through lines of POD file and parse keywords
+
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+
+  while (true) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fppod);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fppod);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // words = ptrs to all words in line
+    // strip single and double quotes from words
+
+    std::vector<std::string> words;
+    try {
+      words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
+    } catch (TokenizerException &) {
+      // ignore
+    }
+
+    if (words.size() == 0) continue;
+
+    auto keywd = words[0];
+
+    if (keywd == "species") {
+      nelements = words.size()-1;
+      for (int ielem = 1; ielem <= nelements; ielem++) {
+        species.push_back(words[ielem]);
+      }
+    }
+
+    if (keywd == "pbc") {
+      if (words.size() != 4)
+        error->one(FLERR,"Improper POD file.", utils::getsyserror());
+      pbc[0] = utils::inumeric(FLERR,words[1],false,lmp);
+      pbc[1] = utils::inumeric(FLERR,words[2],false,lmp);
+      pbc[2] = utils::inumeric(FLERR,words[3],false,lmp);
+    }
+
+    if ((keywd != "#") && (keywd != "species") && (keywd != "pbc")) {
+
+      if (words.size() != 2)
+        error->one(FLERR,"Improper POD file.", utils::getsyserror());
+
+      if (keywd == "rin") rin = utils::numeric(FLERR,words[1],false,lmp);
+      if (keywd == "rcut") rcut = utils::numeric(FLERR,words[1],false,lmp);
+      if (keywd == "number_of_enviroment_clusters")
+        nClusters = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "number_of_principal_components")
+        nComponents = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "bessel_polynomial_degree")
+        besseldegree = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "inverse_polynomial_degree")
+        inversedegree = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "onebody") onebody = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "twobody_number_radial_basis_functions")
+        nrbf2 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "threebody_number_radial_basis_functions")
+        nrbf3 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "threebody_angular_degree")
+        P3 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "fourbody_number_radial_basis_functions")
+        nrbf4 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "fourbody_angular_degree")
+        P4 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "true4BodyDesc")
+        true4BodyDesc = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "fivebody_number_radial_basis_functions")
+        nrbf33 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "fivebody_angular_degree")
+        P33 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "sixbody_number_radial_basis_functions")
+        nrbf34 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "sixbody_angular_degree")
+        P34 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "sevenbody_number_radial_basis_functions")
+        nrbf44 = utils::inumeric(FLERR,words[1],false,lmp);
+      if (keywd == "sevenbody_angular_degree")
+        P44 = utils::inumeric(FLERR,words[1],false,lmp);
+    }
+  }
+  // if (nrbf2 < nrbf3) error->all(FLERR,"number of three-body radial basis functions must be equal or less than number of two-body radial basis functions");
+  if (nrbf3 < nrbf4) error->all(FLERR,"number of four-body radial basis functions must be equal or less than number of three-body radial basis functions");
+  if (nrbf4 < nrbf33) error->all(FLERR,"number of five-body radial basis functions must be equal or less than number of four-body radial basis functions");
+  if (nrbf4 < nrbf34) error->all(FLERR,"number of six-body radial basis functions must be equal or less than number of four-body radial basis functions");
+  if (nrbf4 < nrbf44) error->all(FLERR,"number of seven-body radial basis functions must be equal or less than number of four-body radial basis functions");
+  nrbfmax = (nrbf2 < nrbf3) ? nrbf3 : nrbf2;
+  nrbfmax = (nrbfmax < nrbf4) ? nrbf4 : nrbfmax;
+  nrbfmax = (nrbfmax < nrbf33) ? nrbf33 : nrbfmax;
+  nrbfmax = (nrbfmax < nrbf34) ? nrbf34 : nrbfmax;
+  nrbfmax = (nrbfmax < nrbf44) ? nrbf44 : nrbfmax;
+
+  if (P3 < P4) error->all(FLERR,"four-body angular degree must be equal or less than three-body angular degree");
+  if (P4 < P33) error->all(FLERR,"five-body angular degree must be equal or less than four-body angular degree");
+  if (P4 < P34) error->all(FLERR,"six-body angular degree must be equal or less than four-body angular degree");
+  if (P4 < P44) error->all(FLERR,"seven-body angular degree must be equal or less than four-body angular degree");
+
+  if (P3 > 12) error->all(FLERR,"three-body angular degree must be equal or less than 12");
+  //if (P34 > 6) error->all(FLERR,"six-body angular degree must be equal or less than 6");
+  //if (P44 > 6) error->all(FLERR,"seven-body angular degree must be equal or less than 6");
+
+  // four-body potential
+  if ((nrbf4 > 0) && (nrbf33 == 0)) {
+    if (P4 > 6) {
+      nrbf23 = nrbf4;
+      P23 = P4;
+      nrbf4 = 0;
+      P4 = 0;
+    }
+    else {
+      if (true4BodyDesc == 1) {
+        nrbf23 = 0;
+        P23 = 0;
+      }
+      else {
+        nrbf23 = nrbf4;
+        P23 = P4;
+        nrbf4 = 0;
+        P4 = 0;
+      }
+    }
+  }
+
+  // five-body potential
+  if ((nrbf33 > 0) && (nrbf34 == 0)) {
+    if (true4BodyDesc == 1) {
+      nrbf23 = 0;
+      P23 = 0;
+    }
+    else {
+      nrbf23 = nrbf4;
+      P23 = P4;
+      nrbf4 = 0;
+      P4 = 0;
+    }
+  }
+
+  // six-body potential or seven-body potential
+  if (nrbf34 > 0) {
+    if (true4BodyDesc == 1) {
+      nrbf23 = 0;
+      P23 = 0;
+    }
+    else {
+      nrbf23 = nrbf4;
+      P23 = P4;
+      nrbf4 = nrbf34;
+      P4 = P34;
+    }
+  }
+
+  int Ne = nelements;
+
+  memory->create(elemindex, Ne*Ne, "elemindex");
+  int k = 0;
+  for (int i1 = 0; i1<Ne; i1++)
+    for (int i2 = i1; i2<Ne; i2++) {
+      elemindex[i2 + Ne*i1] = k;
+      elemindex[i1 + Ne*i2] = k;
+      k += 1;
+    }
+
+  init2body();
+  init3body(P3);
+  init4body(P4);
+
+  int nb[] = {1,     2,     4,     7,    11,    16,    23};
+  nabf23 = P23+1;
+  nabf33 = P33+1;
+  nabf34 = P34+1;
+  nabf43 = nb[P34];
+  nabf44 = nb[P44];
+
+  if (onebody==0)
+    nd1 = 0;
+  else
+    nd1 = Ne;
+
+  nl1 = nd1/Ne;
+  nl2 = nrbf2*Ne;
+  nl3 = nabf3*nrbf3*Ne*(Ne+1)/2;
+  nl4 = nabf4*nrbf4*Ne*(Ne+1)*(Ne+2)/6;
+
+  nd2 = nl2*Ne;
+  nd3 = nl3*Ne;
+  nd4 = nl4*Ne;
+
+  n23 = nrbf23*Ne;
+  n32 = nabf23*nrbf23*Ne*(Ne+1)/2;
+  n33 = nabf33*nrbf33*Ne*(Ne+1)/2;
+  n34 = nabf34*nrbf34*Ne*(Ne+1)/2;
+  n43 = nabf43*nrbf34*Ne*(Ne+1)*(Ne+2)/6;
+  n44 = nabf44*nrbf44*Ne*(Ne+1)*(Ne+2)/6;
+
+  nl23 = n23*n32;
+  nl34 = n34*n43;
+  nl33 = n33*(n33+1)/2;
+  nl44 = n44*(n44+1)/2;
+
+  nd23 = nl23*Ne;
+  nd33 = nl33*Ne;
+  nd34 = nl34*Ne;
+  nd44 = nl44*Ne;
+
+  memory->create(ind23, n23, "ind23");
+  memory->create(ind32, n32, "ind32");
+  memory->create(ind33, n33, "ind33");
+  memory->create(ind34, n34, "ind34");
+  memory->create(ind43, n43, "ind43");
+  memory->create(ind44, n44, "ind44");
+
+  indexmap3(ind23, 1, nrbf23, Ne, 1, nrbf2);
+  indexmap3(ind32, nabf23, nrbf23, Ne*(Ne+1)/2, nabf3, nrbf3);
+  indexmap3(ind33, nabf33, nrbf33, Ne*(Ne+1)/2, nabf3, nrbf3);
+  indexmap3(ind34, nabf34, nrbf34, Ne*(Ne+1)/2, nabf3, nrbf3);
+  indexmap3(ind43, nabf43, nrbf34, Ne*(Ne+1)*(Ne+2)/6, nabf4, nrbf4);
+  indexmap3(ind44, nabf44, nrbf44, Ne*(Ne+1)*(Ne+2)/6, nabf4, nrbf4);
+
+  nld33 = 0;
+  nld34 = 0;
+  nld44 = 0;
+  int nebf3 = Ne*(Ne+1)/2;
+  int nebf4 = Ne*(Ne+1)*(Ne+2)/6;
+  int dabf3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  int dabf4[] = {0, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6};
+  if (nrbf33>0) {
+    nld33 = crossindices(dabf3, nabf3, nrbf3, nebf3, dabf3, nabf3, nrbf3, nebf3, P33, nrbf33);
+    memory->create(ind33l, nld33, "ind33l");
+    memory->create(ind33r, nld33, "ind33r");
+    crossindices(ind33l, ind33r, dabf3, nabf3, nrbf3, nebf3, dabf3, nabf3, nrbf3, nebf3, P33, nrbf33);
+  }
+  if (nrbf34>0) {
+    nld34 = crossindices(dabf3, nabf3, nrbf3, nebf3, dabf4, nabf4, nrbf4, nebf4, P34, nrbf34);
+    memory->create(ind34l, nld34, "ind34l");
+    memory->create(ind34r, nld34, "ind34r");
+    crossindices(ind34l, ind34r, dabf3, nabf3, nrbf3, nebf3, dabf4, nabf4, nrbf4, nebf4, P34, nrbf34);
+  }
+  if (nrbf44>0) {
+    nld44 = crossindices(dabf4, nabf4, nrbf4, nebf4, dabf4, nabf4, nrbf4, nebf4, P44, nrbf44);
+    memory->create(ind44l, nld44, "ind44l");
+    memory->create(ind44r, nld44, "ind44r");
+    crossindices(ind44l, ind44r, dabf4, nabf4, nrbf4, nebf4, dabf4, nabf4, nrbf4, nebf4, P44, nrbf44);
+  }
+  ngd33 = nld33*Ne;
+  ngd34 = nld34*Ne;
+  ngd44 = nld44*Ne;
+  nl33 = nld33;
+  nl34 = nld34;
+  nl44 = nld44;
+  nd33 = ngd33;
+  nd34 = ngd34;
+  nd44 = ngd44;
+
+  Mdesc = nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44;
+  nl = nl1 + nl2 + nl3 + nl4 + nl23 + nl33 + nl34 + nl44;
+  nd = nd1 + nd2 + nd3 + nd4 + nd23 + nd33 + nd34 + nd44;
+  nCoeffPerElement = nl1 + Mdesc*nClusters;
+  nCoeffAll = nCoeffPerElement*nelements;
+
+  allocate_temp_memory(Njmax);
+
+  if (comm->me == 0) {
+    utils::logmesg(lmp, "**************** Begin of POD Potentials ****************\n");
+    utils::logmesg(lmp, "species: ");
+    for (int i=0; i<nelements; i++)
+      utils::logmesg(lmp, "{} ", species[i]);
+    utils::logmesg(lmp, "\n");
+    utils::logmesg(lmp, "periodic boundary conditions: {} {} {}\n", pbc[0], pbc[1], pbc[2]);
+    utils::logmesg(lmp, "number of enviroment clusters: {}\n", nClusters);
+    utils::logmesg(lmp, "number of principal compoments: {}\n", nComponents);
+    utils::logmesg(lmp, "inner cut-off radius: {}\n", rin);
+    utils::logmesg(lmp, "outer cut-off radius: {}\n", rcut);
+    utils::logmesg(lmp, "bessel polynomial degree: {}\n", besseldegree);
+    utils::logmesg(lmp, "inverse polynomial degree: {}\n",inversedegree);
+    utils::logmesg(lmp, "one-body potential: {}\n", onebody);
+    utils::logmesg(lmp, "two-body radial basis functions: {}\n", nrbf2);
+    utils::logmesg(lmp, "three-body radial basis functions: {}\n", nrbf3);
+    utils::logmesg(lmp, "three-body angular degree: {}\n", P3);
+    if (P23 < P4) {
+      utils::logmesg(lmp, "four-body radial basis functions: {}\n", nrbf4);
+      utils::logmesg(lmp, "four-body angular degree: {}\n", P4);
+    }
+    else {
+      utils::logmesg(lmp, "four-body radial basis functions: {}\n", nrbf23);
+      utils::logmesg(lmp, "four-body angular degree: {}\n", P23);
+    }
+    utils::logmesg(lmp, "five-body radial basis functions: {}\n", nrbf33);
+    utils::logmesg(lmp, "five-body angular degree: {}\n", P33);
+    utils::logmesg(lmp, "six-body radial basis functions: {}\n", nrbf34);
+    utils::logmesg(lmp, "six-body angular degree: {}\n", P34);
+    utils::logmesg(lmp, "seven-body radial basis functions: {}\n", nrbf44);
+    utils::logmesg(lmp, "seven-body angular degree: {}\n", P44);
+    utils::logmesg(lmp, "number of local descriptors per element for one-body potential: {}\n", nl1);
+    utils::logmesg(lmp, "number of local descriptors per element for two-body potential: {}\n", nl2);
+    utils::logmesg(lmp, "number of local descriptors per element for three-body potential: {}\n", nl3);
+    utils::logmesg(lmp, "number of local descriptors per element for four-body potential: {}\n", nl4+nl23);
+    utils::logmesg(lmp, "number of local descriptors per element for five-body potential: {}\n", nl33);
+    utils::logmesg(lmp, "number of local descriptors per element for six-body potential: {}\n", nl34);
+    utils::logmesg(lmp, "number of local descriptors per element for seven-body potential: {}\n", nl44);
+    utils::logmesg(lmp, "number of local descriptors per element for all potentials: {}\n", nl);    
+    utils::logmesg(lmp, "number of global descriptors: {}\n", nCoeffAll);
+    utils::logmesg(lmp, "**************** End of POD Potentials ****************\n\n");
+  }
+}
+
+int EAPOD::read_coeff_file(std::string coeff_file)
+{
+  std::string coefffilename = coeff_file;
+  FILE *fpcoeff;
+  if (comm->me == 0) {
+
+    fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
+    if (fpcoeff == nullptr)
+      error->one(FLERR,"Cannot open POD coefficient file {}: ",
+                                   coefffilename, utils::getsyserror());
+  }
+
+  // check format for first line of file
+
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+  int nwords = 0;
+  while (nwords == 0) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcoeff);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcoeff);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // strip comment, skip line if blank
+
+    nwords = utils::count_words(utils::trim_comment(line));
+  }
+
+  if (nwords != 2)
+    error->all(FLERR,"Incorrect format in POD coefficient file");
+
+  // strip single and double quotes from words
+
+  int ncoeffall;
+  std::string tmp_str;
+  try {
+    ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
+    tmp_str = words.next_string();
+    ncoeffall = words.next_int();
+  } catch (TokenizerException &e) {
+    error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what());
+  }
+
+  // loop over single block of coefficients and insert values in coeff
+
+  memory->create(coeff, ncoeffall, "pod:pod_coeff");
+
+  for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcoeff);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcoeff);
+      }
+    }
+
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof)
+      error->all(FLERR,"Incorrect format in POD coefficient file");
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    try {
+      ValueTokenizer cff(utils::trim_comment(line));
+      if (cff.count() != 1)
+        error->all(FLERR,"Incorrect format in POD coefficient file");
+
+      coeff[icoeff] = cff.next_double();
+    } catch (TokenizerException &e) {
+      error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what());
+    }
+  }
+  if (comm->me == 0) {
+    if (!eof) fclose(fpcoeff);
+  }
+
+  if (comm->me == 0) {
+    utils::logmesg(lmp, "**************** Begin of POD Coefficients ****************\n");
+    utils::logmesg(lmp, "total number of coefficients for POD potential: {}\n", ncoeffall);
+    utils::logmesg(lmp, "**************** End of POD Potentials ****************\n\n");
+  }
+
+  return ncoeffall;
+}
+
+//funcion to read the projection matrix from file.
+int EAPOD::read_projection_matrix(std::string proj_file)
+{
+  std::string projfilename = proj_file;
+  FILE *fpproj;
+  if (comm->me == 0) {
+
+    fpproj = utils::open_potential(projfilename,lmp,nullptr);
+    if (fpproj == nullptr)
+      error->one(FLERR,"Cannot open PCA projection matrix file {}: ",
+                                   projfilename, utils::getsyserror());
+  }
+
+  // check format for first line of file
+
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+  int nwords = 0;
+  while (nwords == 0) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpproj);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpproj);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // strip comment, skip line if blank
+
+    nwords = utils::count_words(utils::trim_comment(line));
+  }
+
+  if (nwords != 2)
+    error->all(FLERR,"Incorrect format in PCA projection matrix file");
+
+  // strip single and double quotes from words
+
+  int nprojall;
+  std::string tmp_str;
+  try {
+    ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
+    tmp_str = words.next_string();
+    nprojall = words.next_int();
+  } catch (TokenizerException &e) {
+    error->all(FLERR,"Incorrect format in PCA projection matrix file: {}", e.what());
+  }
+
+  // loop over single block of coefficients and insert values in coeff
+
+  memory->create(Proj, nprojall, "pod:pca_proj");
+
+  for (int iproj = 0; iproj < nprojall; iproj++) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpproj);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpproj);
+      }
+    }
+
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof)
+      error->all(FLERR,"Incorrect format in PCA projection matrix file");
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    try {
+      ValueTokenizer cff(utils::trim_comment(line));
+      if (cff.count() != 1)
+        error->all(FLERR,"Incorrect format in PCA projection matrix file");
+
+      Proj[iproj] = cff.next_double();
+    } catch (TokenizerException &e) {
+      error->all(FLERR,"Incorrect format in PCA projection matrix file: {}", e.what());
+    }
+  }
+  if (comm->me == 0) {
+    if (!eof) fclose(fpproj);
+  }
+
+  if (comm->me == 0) {
+    utils::logmesg(lmp, "**************** Begin of PCA projection matrix ****************\n");
+    utils::logmesg(lmp, "total number of elements for PCA projection matrix: {}\n", nprojall);
+    utils::logmesg(lmp, "**************** End of PCA projection matrix ****************\n\n");
+  }
+
+  return nprojall;
+}
+
+// read Centroids from file
+int EAPOD::read_centroids(std::string centroids_file)
+{
+  std::string centfilename = centroids_file;
+  FILE *fpcent;
+  if (comm->me == 0) {
+
+    fpcent = utils::open_potential(centfilename,lmp,nullptr);
+    if (fpcent == nullptr)
+      error->one(FLERR,"Cannot open PCA centroids file {}: ",
+                                   centfilename, utils::getsyserror());
+  }
+
+  // check format for first line of file
+
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+  int nwords = 0;
+  while (nwords == 0) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcent);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcent);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // strip comment, skip line if blank
+
+    nwords = utils::count_words(utils::trim_comment(line));
+  }
+
+  if (nwords != 2)
+    error->all(FLERR,"Incorrect format in PCA centroids file");
+
+  // strip single and double quotes from words
+
+  int ncentall;
+  std::string tmp_str;
+  try {
+    ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
+    tmp_str = words.next_string();
+    ncentall = words.next_int();
+  } catch (TokenizerException &e) {
+    error->all(FLERR,"Incorrect format in PCA centroids file: {}", e.what());
+  }
+
+  // loop over single block of coefficients and insert values in coeff
+
+  memory->create(Centroids, ncentall, "pod:pca_cent");
+
+  for (int icent = 0; icent < ncentall; icent++) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcent);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcent);
+      }
+    }
+
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof)
+      error->all(FLERR,"Incorrect format in PCA centroids file");
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    try {
+      ValueTokenizer cff(utils::trim_comment(line));
+      if (cff.count() != 1)
+        error->all(FLERR,"Incorrect format in PCA centroids file");
+
+      Centroids[icent] = cff.next_double();
+    } catch (TokenizerException &e) {
+      error->all(FLERR,"Incorrect format in PCA centroids file: {}", e.what());
+    }
+  }
+  if (comm->me == 0) {
+    if (!eof) fclose(fpcent);
+  }
+
+  if (comm->me == 0) {
+    utils::logmesg(lmp, "**************** Begin of PCA centroids ****************\n");
+    utils::logmesg(lmp, "total number of elements for PCA centroids: {}\n", ncentall);
+    utils::logmesg(lmp, "**************** End of PCA centroids ****************\n\n");
+  }
+
+  return ncentall;
+}
+
+
+void EAPOD::peratombase_descriptors(double *bd1, double *bdd1, double *rij, double *temp,
+        int *ti, int *tj, int Nj)
+{
+  for (int i=0; i<Mdesc; i++) bd[i] = 0.0;
+  for (int i=0; i<3*Nj*Mdesc; i++) bdd[i] = 0.0;
+
+  double *d2 =  &bd1[0]; // nl2
+  double *d3 =  &bd1[nl2]; // nl3
+  double *d4 =  &bd1[nl2 + nl3]; // nl4
+  double *d23 =  &bd1[nl2 + nl3 + nl4]; // nl23
+  double *d33 =  &bd1[nl2 + nl3 + nl4 + nl23]; // nl33
+  double *d34 =  &bd1[nl2 + nl3 + nl4 + nl23 + nl33]; // nl34
+  double *d44 =  &bd1[nl2 + nl3 + nl4 + nl23 + nl33 + nl34]; // nl44
+
+  double *dd2 = &bdd1[0]; // 3*Nj*nl2  
+  double *dd3 = &bdd1[3*Nj*nl2]; // 3*Nj*nl3  
+  double *dd4 = &bdd1[3*Nj*(nl2+nl3)]; // 3*Nj*nl4
+  double *dd23 = &bdd1[3*Nj*(nl2+nl3+nl4)]; // 3*Nj*nl23
+  double *dd33 = &bdd1[3*Nj*(nl2+nl3+nl4+nl23)]; // 3*Nj*nl33
+  double *dd34 = &bdd1[3*Nj*(nl2+nl3+nl4+nl23+nl33)]; // 3*Nj*nl34
+  double *dd44 = &bdd1[3*Nj*(nl2+nl3+nl4+nl23+nl33+nl34)]; // 3*Nj*nl44
+
+  int n1 = Nj*K3*nrbf3;
+  int n2 = Nj*nrbfmax;
+  int n3 = Nj*ns;
+  int n4 = Nj*K3;
+  int n5 = K3*nrbf3*nelements;
+
+  double *U = &temp[0]; // Nj*K3*nrbf3
+  double *Ux = &temp[n1]; // Nj*K3*nrbf3
+  double *Uy = &temp[2*n1]; // Nj*K3*nrbf3
+  double *Uz = &temp[3*n1]; // Nj*K3*nrbf3
+  double *sumU = &temp[4*n1]; // K3*nrbf3*nelements
+
+  double *rbf = &temp[4*n1 + n5]; // Nj*nrbf2
+  double *rbfx = &temp[4*n1 + n5 + n2]; // Nj*nrbf2
+  double *rbfy = &temp[4*n1 + n5 + 2*n2]; // Nj*nrbf2
+  double *rbfz = &temp[4*n1 + n5 + 3*n2]; // Nj*nrbf2
+
+  double *rbft = &temp[4*n1 + n5 + 4*n2]; // Nj*ns
+  double *rbfxt = &temp[4*n1 + n5 + 4*n2 + n3]; // Nj*ns
+  double *rbfyt = &temp[4*n1 + n5 + 4*n2 + 2*n3]; // Nj*ns
+  double *rbfzt = &temp[4*n1 + n5 + 4*n2 + 3*n3]; // Nj*ns
+
+  radialbasis(rbft, rbfxt, rbfyt, rbfzt, rij, besselparams, rin, rcut-rin, pdegree[0], pdegree[1], nbesselpars, Nj);
+
+  char chn = 'N';
+  double alpha = 1.0, beta = 0.0;
+  DGEMM(&chn, &chn, &Nj, &nrbfmax, &ns, &alpha, rbft, &Nj, Phi, &ns, &beta, rbf, &Nj);
+  DGEMM(&chn, &chn, &Nj, &nrbfmax, &ns, &alpha, rbfxt, &Nj, Phi, &ns, &beta, rbfx, &Nj);
+  DGEMM(&chn, &chn, &Nj, &nrbfmax, &ns, &alpha, rbfyt, &Nj, Phi, &ns, &beta, rbfy, &Nj);
+  DGEMM(&chn, &chn, &Nj, &nrbfmax, &ns, &alpha, rbfzt, &Nj, Phi, &ns, &beta, rbfz, &Nj);
+
+  if ((nl2>0) && (Nj>0)) {
+    twobodydescderiv(d2, dd2, rbf, rbfx, rbfy, rbfz, tj, Nj);
+  }
+
+  if ((nl3 > 0) && (Nj>1)) {
+    double *abf = &temp[4*n1 + n5 + 4*n2]; // Nj*K3
+    double *abfx = &temp[4*n1 + n5 + 4*n2 + n4]; // Nj*K3
+    double *abfy = &temp[4*n1 + n5 + 4*n2 + 2*n4]; // Nj*K3
+    double *abfz = &temp[4*n1 + n5 + 4*n2 + 3*n4]; // Nj*K3
+    double *tm = &temp[4*n1 + n5 + 4*n2 + 4*n4]; // 4*K3
+
+    angularbasis(abf, abfx, abfy, abfz, rij, tm, pq3, Nj, K3);
+
+    radialangularbasis(sumU, U, Ux, Uy, Uz, rbf, rbfx, rbfy, rbfz,
+            abf, abfx, abfy, abfz, tm, tj, Nj, K3, nrbf3, nelements);
+
+    threebodydesc(d3, sumU, Nj);
+    threebodydescderiv(dd3, sumU, Ux, Uy, Uz, tj, Nj);
+
+    if ((nl23>0) && (Nj>2)) {
+      fourbodydesc23(d23, d2, d3);
+      fourbodydescderiv23(dd23, d2, d3, dd2, dd3, 3*Nj);
+    }
+
+    if ((nl33>0) && (Nj>3)) {
+      crossdesc(d33, d3, d3, ind33l, ind33r, nl33);
+      crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, nl33, 3*Nj);
+    }
+
+    if ((nl4 > 0) && (Nj>2)) {
+      if (K4 < K3) {
+        for (int m=0; m<nrbf4; m++)
+          for (int k=0; k<K4; k++)
+            for (int i=0; i<nelements; i++)
+              sumU[i + nelements*k + nelements*K4*m] = sumU[i + nelements*k + nelements*K3*m];
+
+        for (int m=0; m<nrbf4; m++)
+          for (int k=0; k<K4; k++)
+            for (int i=0; i<Nj; i++) {
+              int ii = i + Nj*k + Nj*K4*m;
+              int jj = i + Nj*k + Nj*K3*m;
+              Ux[ii] = Ux[jj];
+              Uy[ii] = Uy[jj];
+              Uz[ii] = Uz[jj];
+            }
+      }
+      fourbodydescderiv(d4, dd4, sumU, Ux, Uy, Uz, tj, Nj);
+      
+      if ((nl34>0) && (Nj>4)) {
+        crossdesc(d34, d3, d4, ind34l, ind34r, nl34);
+        crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, nl34, 3*Nj);
+      }
+
+      if ((nl44>0) && (Nj>5)) {
+        crossdesc(d44, d4, d4, ind44l, ind44r, nl44);
+        crossdescderiv(dd44, d4, d4, dd4, dd4, ind44l, ind44r, nl44, 3*Nj);
+      }
+    }
+  }
+}
+
+double EAPOD::peratomenergyforce(double *fij, double *rij, double *temp,
+        int *ti, int *tj, int Nj)
+{
+  int N = 3*Nj;
+  for (int n=0; n<N; n++) fij[n] = 0.0;
+
+  double *coeff1 = &coeff[nCoeffPerElement*(ti[0]-1)];
+  double e = coeff1[0];
+
+  // calculate base descriptors and their derivatives with respect to atom coordinates
+  peratombase_descriptors(bd, bdd, rij, temp, ti, tj, Nj);  
+
+  if (nClusters > 1) { // multi-environment descriptors
+    // calculate multi-environment descriptors and their derivatives with respect to atom coordinates
+    peratomenvironment_descriptors(pd, pdd, bd, bdd, temp, ti[0] - 1,  Nj);
+    
+    for (int j = 0; j<nClusters; j++)
+      for (int m=0; m<Mdesc; m++) 
+        e += coeff1[1 + m + j*Mdesc]*bd[m]*pd[j];
+          
+    double *cb = &temp[0];
+    double *cp = &temp[Mdesc];
+    for (int m = 0; m<Mdesc; m++) cb[m] = 0.0;    
+    for (int j = 0; j<nClusters; j++) cp[j] = 0.0;
+    for (int j = 0; j<nClusters; j++)  
+      for (int m = 0; m<Mdesc; m++)  {   
+        cb[m] += coeff1[1 + m + j*Mdesc]*pd[j];
+        cp[j] += coeff1[1 + m + j*Mdesc]*bd[m];
+      }
+    char chn = 'N';    
+    double alpha = 1.0, beta = 0.0;
+    int inc1 = 1;
+    DGEMV(&chn, &N, &Mdesc, &alpha, bdd, &N, cb, &inc1, &beta, fij, &inc1);    
+    DGEMV(&chn, &N, &nClusters, &alpha, pdd, &N, cp, &inc1, &alpha, fij, &inc1);    
+  }
+  else { // single-environment descriptors
+    for (int m=0; m<Mdesc; m++) 
+      e += coeff1[1+m]*bd[m];
+      
+    char chn = 'N';    
+    double alpha = 1.0, beta = 0.0;
+    int inc1 = 1;
+    DGEMV(&chn, &N, &Mdesc, &alpha, bdd, &N, &coeff1[1], &inc1, &beta, fij, &inc1);    
+  }
+
+  return e;
+}
+
+double EAPOD::energyforce(double *force, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom)
+{
+  double etot = 0.0;
+  for (int i=0; i<3*natom; i++) force[i] = 0.0;
+
+  for (int i=0; i<natom; i++) {
+    int Nj = pairnumsum[i+1] - pairnumsum[i]; // # neighbors around atom i
+
+    if (Nj==0) {
+      etot += coeff[nCoeffPerElement*(atomtype[i]-1)];
+    }
+    else
+    {
+      // reallocate temporary memory
+      if (Nj>Njmax) {
+        Njmax = Nj;
+        free_temp_memory();
+        allocate_temp_memory(Njmax);
+      }
+
+      double *rij = &tmpmem[0];    // 3*Nj
+      double *fij = &tmpmem[3*Nj]; // 3*Nj
+      int *ai = &tmpint[0];        // Nj
+      int *aj = &tmpint[Nj];       // Nj
+      int *ti = &tmpint[2*Nj];     // Nj
+      int *tj = &tmpint[3*Nj];     // Nj
+
+      myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i);
+
+      etot += peratomenergyforce(fij, rij, &tmpmem[6*Nj], ti, tj, Nj);
+
+      tallyforce(force, fij, ai, aj, Nj);
+    }
+  }
+
+  return etot;
+}
+
+void EAPOD::base_descriptors(double *basedesc, double *x,
+        int *atomtype, int *alist, int *jlist, int *pairnumsum, int natom)
+{
+  for (int i=0; i<natom*Mdesc; i++) basedesc[i] = 0.0;
+
+  for (int i=0; i<natom; i++) {
+    int Nj = pairnumsum[i+1] - pairnumsum[i]; // # neighbors around atom i
+
+    if (Nj>0) {
+      // reallocate temporary memory
+      if (Nj>Njmax) {
+        Njmax = Nj;
+        free_temp_memory();
+        allocate_temp_memory(Njmax);
+        printf("reallocate temporary memory with Njmax = %d ...\n", Njmax);
+      }
+
+      double *rij = &tmpmem[0]; // 3*Nj
+      int *ai = &tmpint[0];     // Nj
+      int *aj = &tmpint[Nj];   // Nj
+      int *ti = &tmpint[2*Nj]; // Nj
+      int *tj = &tmpint[3*Nj]; // Nj
+
+      myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i);
+      
+      // many-body descriptors
+      peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj);
+
+      for (int m=0; m<Mdesc; m++) {
+        basedesc[i + natom*(m)] = bd[m];
+      }
+
+    }
+  }  
+}
+
+void EAPOD::descriptors(double *gd, double *gdd, double *basedesc, double *x,
+        int *atomtype, int *alist, int *jlist, int *pairnumsum, int natom)
+{
+  for (int i=0; i<nd; i++) gd[i] = 0.0;
+  for (int i=0; i<3*natom*nd; i++) gdd[i] = 0.0;
+  for (int i=0; i<natom*Mdesc; i++) basedesc[i] = 0.0;
+
+  for (int i=0; i<natom; i++) {
+    int Nj = pairnumsum[i+1] - pairnumsum[i]; // # neighbors around atom i
+
+    // one-body descriptor
+    if (nd1>0) {
+      gd[nCoeffPerElement*(atomtype[i]-1)] += 1.0;
+    }
+
+    if (Nj>0) {
+      // reallocate temporary memory
+      if (Nj>Njmax) {
+        Njmax = Nj;
+        free_temp_memory();
+        allocate_temp_memory(Njmax);
+        printf("reallocate temporary memory with Njmax = %d ...\n", Njmax);
+      }
+
+      double *rij = &tmpmem[0]; // 3*Nj
+      int *ai = &tmpint[0];     // Nj
+      int *aj = &tmpint[Nj];   // Nj
+      int *ti = &tmpint[2*Nj]; // Nj
+      int *tj = &tmpint[3*Nj]; // Nj
+
+      myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i);
+      
+      // many-body descriptors
+      peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj);
+
+      for (int m=0; m<Mdesc; m++) {
+        basedesc[i + natom*(m)] = bd[m];
+        int k = nCoeffPerElement*(ti[0]-1) + nl1 + m; // increment by nl1 because of the one-body descriptor
+        gd[k] += bd[m];
+        for (int n=0; n<Nj; n++) {
+          int im = 3*ai[n] + 3*natom*k;
+          int jm = 3*aj[n] + 3*natom*k;
+          int nm = 3*n + 3*Nj*m;
+          gdd[0 + im] += bdd[0 + nm];
+          gdd[1 + im] += bdd[1 + nm];
+          gdd[2 + im] += bdd[2 + nm];
+          gdd[0 + jm] -= bdd[0 + nm];
+          gdd[1 + jm] -= bdd[1 + nm];
+          gdd[2 + jm] -= bdd[2 + nm];
+        }
+      }
+
+    }
+  }  
+}
+
+void EAPOD::descriptors(double *gd, double *gdd, double *basedesc, double *probdesc, double *x,
+        int *atomtype, int *alist, int *jlist, int *pairnumsum, int natom)
+{
+  for (int i=0; i<nCoeffAll; i++) gd[i] = 0.0;
+  for (int i=0; i<3*natom*nCoeffAll; i++) gdd[i] = 0.0;
+  for (int i=0; i<natom*Mdesc; i++) basedesc[i] = 0.0;
+  for (int i=0; i<natom*nClusters; i++) probdesc[i] = 0.0;
+
+  for (int i=0; i<natom; i++) {
+    int Nj = pairnumsum[i+1] - pairnumsum[i]; // # neighbors around atom i
+
+    // one-body descriptor
+    if (nd1>0) {
+      gd[nCoeffPerElement*(atomtype[i]-1)] += 1.0;
+    }
+
+    if (Nj>0) {
+      // reallocate temporary memory
+      if (Nj>Njmax) {
+        Njmax = Nj;
+        free_temp_memory();
+        allocate_temp_memory(Njmax);
+        printf("reallocate temporary memory with Njmax = %d ...\n", Njmax);
+      }
+
+      double *rij = &tmpmem[0]; // 3*Nj
+      int *ai = &tmpint[0];     // Nj
+      int *aj = &tmpint[Nj];   // Nj
+      int *ti = &tmpint[2*Nj]; // Nj
+      int *tj = &tmpint[3*Nj]; // Nj
+
+      myneighbors(rij, x, ai, aj, ti, tj, jlist, pairnumsum, atomtype, alist, i);
+      
+      // many-body descriptors
+      peratombase_descriptors(bd, bdd, rij, &tmpmem[3*Nj], ti, tj, Nj);
+
+      // calculate multi-environment descriptors and their derivatives with respect to atom coordinates
+      peratomenvironment_descriptors(pd, pdd, bd, bdd, tmpmem, ti[0] - 1,  Nj);
+
+      for (int j = 0; j < nClusters; j++) {
+        probdesc[i + natom*(j)] = pd[j];
+        for (int m=0; m<Mdesc; m++) {
+          basedesc[i + natom*(m)] = bd[m];
+          int k = nCoeffPerElement*(ti[0]-1) + nl1 + m + j*Mdesc; // increment by nl1 because of the one-body descriptor
+          gd[k] += pd[j]*bd[m];
+          for (int n=0; n<Nj; n++) {
+            int im = 3*ai[n] + 3*natom*k;
+            int jm = 3*aj[n] + 3*natom*k;
+            int nm = 3*n + 3*Nj*m;
+            int nj = 3*n + 3*Nj*j;
+            gdd[0 + im] += bdd[0 + nm]*pd[j] + bd[m]*pdd[0 + nj];
+            gdd[1 + im] += bdd[1 + nm]*pd[j] + bd[m]*pdd[1 + nj];
+            gdd[2 + im] += bdd[2 + nm]*pd[j] + bd[m]*pdd[2 + nj];
+            gdd[0 + jm] -= bdd[0 + nm]*pd[j] + bd[m]*pdd[0 + nj];
+            gdd[1 + jm] -= bdd[1 + nm]*pd[j] + bd[m]*pdd[1 + nj];
+            gdd[2 + jm] -= bdd[2 + nm]*pd[j] + bd[m]*pdd[2 + nj];
+          }
+        }
+      }
+
+    }
+  }  
+}
+
+void EAPOD::fourbodydesc23(double *d23, double *d2, double *d3)
+{
+  for (int j = 0; j<n32; j++)
+    for (int i = 0; i<n23; i++)
+      d23[i + n23*j] = d2[ind23[i]]*d3[ind32[j]];
+}
+
+void EAPOD::fourbodydescderiv23(double* dd23, double *d2, double *d3, double *dd2, double *dd3, int N)
+{
+  for (int j = 0; j<n32; j++)
+    for (int i = 0; i<n23; i++)
+      for (int n=0; n<N; n++)
+        dd23[n + N*i + N*n23*j] = d2[ind23[i]]*dd3[n + N*ind32[j]] + dd2[n + N*ind23[i]]*d3[ind32[j]];
+}
+
+void EAPOD::crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12)
+{
+  for (int i = 0; i<n12; i++)
+    d12[i] = d1[ind1[i]]*d2[ind2[i]];
+}
+
+void EAPOD::crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int n12, int N)
+{
+  for (int i = 0; i<n12; i++)
+    for (int n=0; n<N; n++)
+      dd12[n + N*i] = d1[ind1[i]]*dd2[n + N*ind2[i]] + dd1[n + N*ind1[i]]*d2[ind2[i]];
+}
+
+void EAPOD::myneighbors(double *rij, double *x, int *ai, int *aj, int *ti, int *tj,
+        int *jlist, int *pairnumsum, int *atomtype, int *alist, int i)
+{
+  int itype = atomtype[i];
+  int start = pairnumsum[i];
+  int m = pairnumsum[i+1] - start; // number of neighbors around i
+  for (int l=0; l<m ; l++) {   // loop over each atom around atom i
+    int j = jlist[l + start];  // atom j
+    ai[l]        = i;
+    aj[l]        = alist[j];
+    ti[l]        = itype;
+    tj[l]        = atomtype[alist[j]];
+    rij[0 + 3*l]   = x[0 + 3*j] -  x[0 + 3*i];
+    rij[1 + 3*l]   = x[1 + 3*j] -  x[1 + 3*i];
+    rij[2 + 3*l]   = x[2 + 3*j] -  x[2 + 3*i];
+  }
+}
+
+void EAPOD::fourbodydescderiv(double *d4, double *dd4, double *sumU, double *Ux, double *Uy,
+        double *Uz, int *atomtype, int N)
+{
+  int Me = nelements*(nelements+1)*(nelements+2)/6; //count4(nelements);
+  for (int m=0; m<nabf4*nrbf4*Me; m++)
+    d4[m] = 0.0;
+
+  for (int m=0; m<3*N*nabf4*nrbf4*Me; m++)
+    dd4[m] = 0.0;
+
+  int Q = pa4[nabf4];
+
+  if (nelements==1) {
+    for (int m=0; m<nrbf4; m++)
+      for (int p=0; p<nabf4; p++) {
+        int n1 = pa4[p];
+        int n2 = pa4[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          int c = pc4[n1+q];
+          int j1 = pb4[n1+q];
+          int j2 = pb4[n1+q + Q];
+          int j3 = pb4[n1+q + 2*Q];
+          double c1 = c*sumU[j1 + K4*m];
+          double c2 = c*sumU[j2 + K4*m];
+          double t12 = c1*sumU[j2 + K4*m];
+          double c3 = sumU[j3 + K4*m];
+          double t13 = c1*c3;
+          double t23 = c2*c3;
+          int kk = p + nabf4*m;
+          int ii = 3*N*(p + nabf4*m);
+          d4[kk] += t12*c3;
+          for (int j=0; j<N; j++) {
+            int jj = j + N*j3 + N*K4*m;
+            dd4[0 + 3*j + ii] += t12*Ux[jj];
+            dd4[1 + 3*j + ii] += t12*Uy[jj];
+            dd4[2 + 3*j + ii] += t12*Uz[jj];
+            jj = j + N*j2 + N*K4*m;
+            dd4[0 + 3*j + ii] += t13*Ux[jj];
+            dd4[1 + 3*j + ii] += t13*Uy[jj];
+            dd4[2 + 3*j + ii] += t13*Uz[jj];
+            jj = j + N*j1 + N*K4*m;
+            dd4[0 + 3*j + ii] += t23*Ux[jj];
+            dd4[1 + 3*j + ii] += t23*Uy[jj];
+            dd4[2 + 3*j + ii] += t23*Uz[jj];
+          }
+        }
+      }
+  }
+  else {
+    for (int m=0; m<nrbf4; m++)
+      for (int p=0; p<nabf4; p++) {
+        int n1 = pa4[p];
+        int n2 = pa4[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          int c = pc4[n1+q];
+          int j1 = pb4[n1+q];
+          int j2 = pb4[n1+q + Q];
+          int j3 = pb4[n1+q + 2*Q];
+          int k = 0;
+          for (int i1=0; i1<nelements; i1++) {
+            double c1 = c*sumU[i1 + nelements*j1 + nelements*K4*m];
+            for (int i2=i1; i2<nelements; i2++) {
+              double c2 = c*sumU[i2 + nelements*j2 + nelements*K4*m];
+              double t12 = c1*sumU[i2 + nelements*j2 + nelements*K4*m];
+              for (int i3=i2; i3<nelements; i3++) {
+                double c3 = sumU[i3 + nelements*j3 + nelements*K4*m];
+                double t13 = c1*c3;
+                double t23 = c2*c3;
+                int kk = p + nabf4*m + nabf4*nrbf4*k;
+                int ii = 3*N*(p + nabf4*m + nabf4*nrbf4*k);
+                d4[kk] += t12*c3;
+                for (int j=0; j<N; j++) {
+                  int tj = atomtype[j]-1;
+                  if (tj==i3) {
+                    int jj = j + N*j3 + N*K4*m;
+                    dd4[0 + 3*j + ii] += t12*Ux[jj];
+                    dd4[1 + 3*j + ii] += t12*Uy[jj];
+                    dd4[2 + 3*j + ii] += t12*Uz[jj];
+                  }
+                  if (tj==i2) {
+                    int jj = j + N*j2 + N*K4*m;
+                    dd4[0 + 3*j + ii] += t13*Ux[jj];
+                    dd4[1 + 3*j + ii] += t13*Uy[jj];
+                    dd4[2 + 3*j + ii] += t13*Uz[jj];
+                  }
+                  if (tj==i1) {
+                    int jj = j + N*j1 + N*K4*m;
+                    dd4[0 + 3*j + ii] += t23*Ux[jj];
+                    dd4[1 + 3*j + ii] += t23*Uy[jj];
+                    dd4[2 + 3*j + ii] += t23*Uz[jj];
+                  }
+                }
+                k += 1;
+              }
+            }
+          }
+        }
+      }
+  }
+}
+
+void EAPOD::threebodydesc(double *d3, double *sumU, int N)
+{
+  int Me = nelements*(nelements+1)/2;
+  for (int m=0; m<nabf3*nrbf3*Me; m++)
+    d3[m] = 0.0;
+
+  if (nelements==1) {
+    for (int m=0; m<nrbf3; m++)
+      for (int p=0; p<nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          double t1 = pc3[n1+q]*sumU[(n1+q) + K3*m];
+          d3[p + nabf3*m] += t1*sumU[(n1+q) + K3*m];
+        }
+      }
+  }
+  else {
+    for (int m=0; m<nrbf3; m++)
+      for (int p=0; p<nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          int k = 0;
+          for (int i1=0; i1<nelements; i1++) {
+            double t1 = pc3[n1+q]*sumU[i1 + nelements*(n1+q) + nelements*K3*m];
+            for (int i2=i1; i2<nelements; i2++) {
+              d3[p + nabf3*m + nabf3*nrbf3*k] += t1*sumU[i2 + nelements*(n1+q) + nelements*K3*m];
+              k += 1;
+            }
+          }
+        }
+      }
+  }
+}
+
+void EAPOD::threebodydescderiv(double *dd3, double *sumU, double *Ux, double *Uy, double *Uz,
+        int *atomtype, int N)
+{
+  int Me = nelements*(nelements+1)/2;
+  for (int m=0; m<3*N*nabf3*nrbf3*Me; m++)
+    dd3[m] = 0.0;
+
+  if (nelements==1) {
+    for (int m=0; m<nrbf3; m++)
+      for (int p=0; p<nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          double t1 = pc3[n1+q]*sumU[(n1+q) + K3*m];
+          for (int j=0; j<N; j++) {
+            double f = 2.0*t1;
+            int ii = 3*j + 3*N*(p + nabf3*m);
+            int jj = j + N*(n1+q) + N*K3*m;
+            dd3[0 + ii] += f*Ux[jj];
+            dd3[1 + ii] += f*Uy[jj];
+            dd3[2 + ii] += f*Uz[jj];
+          }
+        }
+      }
+  }
+  else {
+    for (int m=0; m<nrbf3; m++)
+      for (int p=0; p<nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p+1];
+        int nn = n2 - n1;
+        for (int q=0; q<nn; q++) {
+          for (int i1=0; i1<nelements; i1++) {
+            double t1 = pc3[n1+q]*sumU[i1 + nelements*(n1+q) + nelements*K3*m];
+            for (int j=0; j<N; j++) {
+              int i2 = atomtype[j]-1;
+              int k = elemindex[i2 + nelements*i1];
+              double f = (i1==i2) ? 2.0*t1 : t1;
+              int ii = 3*j + 3*N*(p + nabf3*m + nabf3*nrbf3*k);
+              int jj = j + N*(n1+q) + N*K3*m;
+              dd3[0 + ii] += f*Ux[jj];
+              dd3[1 + ii] += f*Uy[jj];
+              dd3[2 + ii] += f*Uz[jj];
+            }
+          }
+        }
+      }
+  }
+}
+
+/**
+ * @brief Calculates the two-body descriptor derivatives for a given set of atoms.
+ *
+ * @param d2   Pointer to the array of two-body descriptors.
+ * @param dd2  Pointer to the array of two-body descriptor derivatives.
+ * @param rbf  Pointer to the array of radial basis functions.
+ * @param rbfx Pointer to the array of radial basis function derivatives with respect to x.
+ * @param rbfy Pointer to the array of radial basis function derivatives with respect to y.
+ * @param rbfz Pointer to the array of radial basis function derivatives with respect to z.
+ * @param tj   Pointer to the array of atom types of neighboring atoms.
+ * @param N    Number of neighboring atoms.
+ */
+void EAPOD::twobodydescderiv(double *d2, double *dd2, double *rbf, double *rbfx,
+        double *rbfy, double *rbfz, int *tj, int N)
+{
+  // Initialize the two-body descriptors and their derivatives to zero
+  for (int m=0; m<nl2; m++)
+    d2[m] = 0.0;
+  for (int m=0; m<3*N*nl2; m++)
+    dd2[m] = 0.0;
+
+  // Calculate the two-body descriptors and their derivatives
+  for (int m=0; m<nrbf2; m++) {
+    for (int n=0; n<N; n++) {
+      int i2 = n + N*m; // Index of the radial basis function for atom n and RBF m
+      int i1 = n + N*m + N*nrbf2*(tj[n]-1); // Index of the descriptor for atom n, RBF m, and atom type tj[n]
+      d2[m + nrbf2*(tj[n]-1)] += rbf[i2]; // Add the radial basis function to the corresponding descriptor
+      dd2[0 + 3*i1] += rbfx[i2]; // Add the derivative with respect to x to the corresponding descriptor derivative
+      dd2[1 + 3*i1] += rbfy[i2]; // Add the derivative with respect to y to the corresponding descriptor derivative
+      dd2[2 + 3*i1] += rbfz[i2]; // Add the derivative with respect to z to the corresponding descriptor derivative
+    }
+  }
+}
+
+/**
+ * @brief Calculates the radial basis functions and their derivatives.
+ *
+ * @param rbf           Pointer to the array of radial basis functions.
+ * @param rbfx          Pointer to the array of derivatives of radial basis functions with respect to x.
+ * @param rbfy          Pointer to the array of derivatives of radial basis functions with respect to y.
+ * @param rbfz          Pointer to the array of derivatives of radial basis functions with respect to z.
+ * @param rij           Pointer to the relative positions of neighboring atoms and atom i.
+ * @param besselparams  Pointer to the array of Bessel function parameters.
+ * @param rin           Minimum distance for radial basis functions.
+ * @param rmax          Maximum distance for radial basis functions.
+ * @param besseldegree  Degree of Bessel functions.
+ * @param inversedegree Degree of inverse distance functions.
+ * @param nbesselpars   Number of Bessel function parameters.
+ * @param N             Number of neighboring atoms.
+ */
+void EAPOD::radialbasis(double *rbf, double *rbfx, double *rbfy, double *rbfz, double *rij, double *besselparams, double rin,
+        double rmax, int besseldegree, int inversedegree, int nbesselpars, int N)
+{
+  // Loop over all neighboring atoms
+  for (int n=0; n<N; n++) {
+    double xij1 = rij[0+3*n];
+    double xij2 = rij[1+3*n];
+    double xij3 = rij[2+3*n];
+
+    double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3);
+    double dr1 = xij1/dij;
+    double dr2 = xij2/dij;
+    double dr3 = xij3/dij;
+
+    double r = dij - rin;
+    double y = r/rmax;
+    double y2 = y*y;
+
+    double y3 = 1.0 - y2*y;
+    double y4 = y3*y3 + 1e-6;
+    double y5 = sqrt(y4);
+    double y6 = exp(-1.0/y5);
+    double y7 = y4*sqrt(y4);
+
+    // Calculate the final cutoff function as y6/exp(-1)
+    double fcut = y6/exp(-1.0);
+
+    // Calculate the derivative of the final cutoff function
+    double dfcut = ((3.0/(rmax*exp(-1.0)))*(y2)*y6*(y*y2 - 1.0))/y7;
+
+    // Calculate fcut/r, fcut/r^2, and dfcut/r
+    double f1 = fcut/r;
+    double f2 = f1/r;
+    double df1 = dfcut/r;
+
+    double alpha = besselparams[0];
+    double t1 = (1.0-exp(-alpha));
+    double t2 = exp(-alpha*r/rmax);
+    double x0 =  (1.0 - t2)/t1;
+    double dx0 = (alpha/rmax)*t2/t1;
+
+    if (nbesselpars==1) {
+      for (int i=0; i<besseldegree; i++) {
+        double a = (i+1)*MY_PI;
+        double b = (sqrt(2.0/(rmax))/(i+1));
+        double af1 = a*f1;
+
+        double sinax = sin(a*x0);
+        int nij = n + N*i;
+
+        rbf[nij] = b*f1*sinax;
+
+        double drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x0)*dx0);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+      }
+    }
+    else if (nbesselpars==2) {
+      alpha = besselparams[1];
+      t1 = (1.0-exp(-alpha));
+      t2 = exp(-alpha*r/rmax);
+      double x1 =  (1.0 - t2)/t1;
+      double dx1 = (alpha/rmax)*t2/t1;
+      for (int i=0; i<besseldegree; i++) {
+        double a = (i+1)*MY_PI;
+        double b = (sqrt(2.0/(rmax))/(i+1));
+        double af1 = a*f1;
+
+        double sinax = sin(a*x0);
+        int nij = n + N*i;
+
+        rbf[nij] = b*f1*sinax;
+
+        double drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x0)*dx0);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+
+        sinax = sin(a*x1);
+        nij = n + N*i + N*besseldegree*1;
+        rbf[nij] = b*f1*sinax;
+
+        drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x1)*dx1);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+      }
+    }
+    else if (nbesselpars==3) {
+      alpha = besselparams[1];
+      t1 = (1.0-exp(-alpha));
+      t2 = exp(-alpha*r/rmax);
+      double x1 =  (1.0 - t2)/t1;
+      double dx1 = (alpha/rmax)*t2/t1;
+
+      alpha = besselparams[2];
+      t1 = (1.0-exp(-alpha));
+      t2 = exp(-alpha*r/rmax);
+      double x2 =  (1.0 - t2)/t1;
+      double dx2 = (alpha/rmax)*t2/t1;
+      for (int i=0; i<besseldegree; i++) {
+        double a = (i+1)*MY_PI;
+        double b = (sqrt(2.0/(rmax))/(i+1));
+        double af1 = a*f1;
+
+        double sinax = sin(a*x0);
+        int nij = n + N*i;
+
+        rbf[nij] = b*f1*sinax;
+        double drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x0)*dx0);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+
+        sinax = sin(a*x1);
+        nij = n + N*i + N*besseldegree*1;
+
+        rbf[nij] = b*f1*sinax;
+        drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x1)*dx1);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+
+        sinax = sin(a*x2);
+        nij = n + N*i + N*besseldegree*2;
+        rbf[nij] = b*f1*sinax;
+        drbfdr = b*(df1*sinax - f2*sinax + af1*cos(a*x2)*dx2);
+        rbfx[nij] = drbfdr*dr1;
+        rbfy[nij] = drbfdr*dr2;
+        rbfz[nij] = drbfdr*dr3;
+      }
+    }
+
+    // Calculate fcut/dij and dfcut/dij
+    f1 = fcut/dij;
+    for (int i=0; i<inversedegree; i++) {
+      int p = besseldegree*nbesselpars + i;
+      int nij = n + N*p;
+      double a = pow(dij, (double) (i+1.0));
+
+      rbf[nij] = fcut/a;
+
+      double drbfdr = (dfcut - (i+1.0)*f1)/a;
+      rbfx[nij] = drbfdr*dr1;
+      rbfy[nij] = drbfdr*dr2;
+      rbfz[nij] = drbfdr*dr3;
+    }
+  }
+}
+
+/**
+ * @brief Calculates the angular basis functions and their derivatives.
+ *
+ * @param abf   Pointer to the angular basis functions.
+ * @param abfx  Pointer to the derivative of the angular basis functions w.r.t. x.
+ * @param abfy  Pointer to the derivative of the angular basis functions w.r.t. y.
+ * @param abfz  Pointer to the derivative of the angular basis functions w.r.t. z.
+ * @param rij   Pointer to the relative positions of neighboring atoms and atom i.
+ * @param tm    Pointer to temporary array.
+ * @param pq    Pointer to array of indices for angular basis functions.
+ * @param N     Number of neighboring atoms.
+ * @param K     Number of angular basis functions.
+ */
+void EAPOD::angularbasis(double *abf, double *abfx, double *abfy, double *abfz, double *rij, double *tm, int *pq, int N, int K)
+{
+  // Initialize temporary arrays
+  double *tmu = &tm[K];
+  double *tmv = &tm[2*K];
+  double *tmw = &tm[3*K];
+
+  // Initialize first angular basis function and its derivatives
+  tm[0] = 1.0;
+  tmu[0] = 0.0;
+  tmv[0] = 0.0;
+  tmw[0] = 0.0;
+
+  // Loop over all neighboring atoms
+  for (int j=0; j<N; j++) {
+    // Calculate relative positions of neighboring atoms and atom i
+    double x = rij[0+3*j];
+    double y = rij[1+3*j];
+    double z = rij[2+3*j];
+
+    // Calculate various terms for derivatives
+    double xx = x*x;
+    double yy = y*y;
+    double zz = z*z;
+    double xy = x*y;
+    double xz = x*z;
+    double yz = y*z;
+
+    // Calculate distance between neighboring atoms and unit vectors
+    double dij = sqrt(xx + yy + zz);
+    double u = x/dij;
+    double v = y/dij;
+    double w = z/dij;
+
+    // Calculate derivatives of unit vectors
+    double dij3 = dij*dij*dij;
+    double dudx = (yy+zz)/dij3;
+    double dudy = -xy/dij3;
+    double dudz = -xz/dij3;
+
+    double dvdx = -xy/dij3;
+    double dvdy = (xx+zz)/dij3;
+    double dvdz = -yz/dij3;
+
+    double dwdx = -xz/dij3;
+    double dwdy = -yz/dij3;
+    double dwdz = (xx+yy)/dij3;
+
+    // Initialize first angular basis function and its derivatives
+    abf[j] = tm[0];
+    abfx[j] = 0.0;
+    abfy[j] = 0.0;
+    abfz[j] = 0.0;
+
+    // Loop over all angular basis functions
+    for (int n=1; n<K; n++) {
+      // Get indices for angular basis function
+      int m = pq[n]-1;
+      int d = pq[n + K];
+
+      // Calculate angular basis function and its derivatives using recursion relation
+      if (d==1) {
+        tm[n] = tm[m]*u;
+        tmu[n] = tmu[m]*u + tm[m];
+        tmv[n] = tmv[m]*u;
+        tmw[n] = tmw[m]*u;
+      }
+      else if (d==2) {
+        tm[n] = tm[m]*v;
+        tmu[n] = tmu[m]*v;
+        tmv[n] = tmv[m]*v + tm[m];
+        tmw[n] = tmw[m]*v;
+      }
+      else if (d==3) {
+        tm[n] = tm[m]*w;
+        tmu[n] = tmu[m]*w;
+        tmv[n] = tmv[m]*w;
+        tmw[n] = tmw[m]*w + tm[m];
+      }
+      abf[j + N*n] = tm[n];
+      abfx[j + N*n] = tmu[n]*dudx + tmv[n]*dvdx + tmw[n]*dwdx;
+      abfy[j + N*n] = tmu[n]*dudy + tmv[n]*dvdy + tmw[n]*dwdy;
+      abfz[j + N*n] = tmu[n]*dudz + tmv[n]*dvdz + tmw[n]*dwdz;
+    }
+  }
+}
+
+/**
+ * @brief Calculates the radial-angular basis functions and their derivatives.
+ *
+ * @param sumU  Pointer to the array to store the sum of the basis functions.
+ * @param U     Pointer to the array to store the radial-angular basis functions.
+ * @param Ux    Pointer to the array to store the derivative of U with respect to x.
+ * @param Uy    Pointer to the array to store the derivative of U with respect to y.
+ * @param Uz    Pointer to the array to store the derivative of U with respect to z.
+ * @param rbf   Pointer to the radial basis function array.
+ * @param rbfx  Pointer to the derivative of rbf with respect to x.
+ * @param rbfy  Pointer to the derivative of rbf with respect to y.
+ * @param rbfz  Pointer to the derivative of rbf with respect to z.
+ * @param abf   Pointer to the angular basis function array.
+ * @param abfx  Pointer to the derivative of abf with respect to x.
+ * @param abfy  Pointer to the derivative of abf with respect to y.
+ * @param abfz  Pointer to the derivative of abf with respect to z.
+ * @param tm    Pointer to the temporary memory array .
+ * @param atomtype  Pointer to the array of atom types.
+ * @param N     Number of neighboring atoms.
+ * @param K     Number of angular basis functions.
+ * @param M     Number of radial basis functions.
+ * @param Ne    Number of elements.
+ */
+void EAPOD::radialangularbasis(double *sumU, double *U, double *Ux, double *Uy, double *Uz,
+                 double *rbf, double *rbfx, double *rbfy, double *rbfz,
+                 double *abf, double *abfx, double *abfy, double *abfz,
+                 double *tm, int *atomtype, int N, int K, int M, int Ne)
+{
+  // Initialize sumU to zero
+  std::fill(sumU, sumU + Ne * K * M, 0.0);
+
+  // Calculate radial-angular basis functions
+  if (Ne == 1) { // Case when Ne is 1
+    // Case when Ne is not 1
+    for (int m = 0; m < M; m++) {
+      for (int k = 0; k < K; k++) {
+        double sum = 0.0;
+        for (int n = 0; n < N; n++) {
+          int ia = n + N * k;
+          int ib = n + N * m;
+          int ii = ia + N * K * m;
+
+          // Calculate c1 and c2
+          double c1 = rbf[ib];
+          double c2 = abf[ia];
+
+          // Calculate U, Ux, Uy, Uz
+          U[ii] = c1 * c2;
+          Ux[ii] = abfx[ia] * c1 + c2 * rbfx[ib];
+          Uy[ii] = abfy[ia] * c1 + c2 * rbfy[ib];
+          Uz[ii] = abfz[ia] * c1 + c2 * rbfz[ib];
+
+          // Update sum
+          sum += c1 * c2;
+        }
+        // Update sumU
+        sumU[k + K * m] += sum;
+      }
+    }
+  } else { // Case when Ne is not 1
+    // Loop over all radial, angular basis functions, and neighboring atoms
+    for (int m = 0; m < M; m++) {
+      for (int k = 0; k < K; k++) {
+        for (int n = 0; n < N; n++) {
+          int ia = n + N * k;
+          int ib = n + N * m;
+          int ii = ia + N * K * m;
+
+          // Calculate c1 and c2
+          double c1 = rbf[ib];
+          double c2 = abf[ia];
+
+          // Calculate U, Ux, Uy, Uz
+          U[ii] = c1 * c2;
+          Ux[ii] = abfx[ia] * c1 + c2 * rbfx[ib];
+          Uy[ii] = abfy[ia] * c1 + c2 * rbfy[ib];
+          Uz[ii] = abfz[ia] * c1 + c2 * rbfz[ib];
+
+          // Update sumU with atomtype adjustment
+          int tn = atomtype[n] - 1; // offset the atom type by 1, since atomtype is 1-based
+          sumU[tn + Ne * k + Ne * K * m] += c1 * c2;
+        }
+      }
+    }
+  }
+}
+
+/**
+ * @brief Tally the force on each atom i and its neighboring atoms.
+ *
+ * @param force Pointer to the output array for the global force
+ * @param fij Pointer to the array of forces between each pair of neighboring atoms.
+ * @param ai Pointer to the array of atom indices for each pair of neighboring atoms.
+ * @param aj Pointer to the array of neighboring atom indices for each pair of neighboring atoms.
+ * @param N Number of neighboring atom pairs.
+ */
+void EAPOD::tallyforce(double *force, double *fij,  int *ai, int *aj, int N)
+{
+  // Loop over all neighboring atoms
+  for (int n=0; n<N; n++) {
+    int im =  3*ai[n]; // Output index for the force on atom i
+    int jm =  3*aj[n]; // Output index for the force on atom j
+    int nm = 3*n; // Input index for the force between atom i and j
+    force[0 + im] += fij[0 + nm]; // Tally the x-component of the force on atom i
+    force[1 + im] += fij[1 + nm]; // Tally the y-component of the force on atom i
+    force[2 + im] += fij[2 + nm]; // Tally the z-component of the force on atom i
+    force[0 + jm] -= fij[0 + nm]; // Tally the x-component of the force on atom j
+    force[1 + jm] -= fij[1 + nm]; // Tally the y-component of the force on atom j
+    force[2 + jm] -= fij[2 + nm]; // Tally the z-component of the force on atom j
+  }
+}
+
+/**
+ * @brief Create new coefficients for the local descriptors.
+ *
+ * @param c Pointer to the input array of original coefficients for the global descriptors.
+ */
+void EAPOD::mknewcoeff(double *c, int nc)
+{
+  // Allocate memory for the new coefficients
+  memory->create(coeff, nc, "coeff");
+
+  // Copy the  coefficients
+  for (int n=0; n<nc; n++)
+    coeff[n] = c[n];
+}
+
+/**
+ * @brief Compute the radial basis function (RBF) for each atom.
+ *
+ * @param rbf Pointer to the output array for the RBF.
+ * @param xij Pointer to the array of distances between each pair of atoms.
+ * @param N Number of points in the interval [rin, rcut].
+ */
+void EAPOD::snapshots(double *rbf, double *xij, int N)
+{
+  // Compute the maximum distance between two atoms
+  double rmax = rcut-rin;
+
+  // Loop over all atoms
+  for (int n=0; n<N; n++) {
+    double dij = xij[n];
+
+    // Compute the distance between two atoms
+    double r = dij - rin;
+
+    // Compute the normalized distance
+    double y = r/rmax;
+    double y2 = y*y;
+    double y3 = 1.0 - y2*y;
+    double y4 = y3*y3 + 1e-6;
+    double y5 = pow(y4, 0.5);
+    double y6 = exp(-1.0/y5);
+
+    // Compute the cutoff function
+    double fcut = y6/exp(-1.0);
+
+    // Loop over all Bessel parameters
+    for (int j=0; j<nbesselpars; j++) {
+      double alpha = besselparams[j];
+      if (fabs(alpha) <= 1.0e-6) alpha = 1e-3;
+      double x =  (1.0 - exp(-alpha*r/rmax))/(1.0-exp(-alpha));
+
+      // Loop over all Bessel degrees
+      for (int i=0; i<besseldegree; i++) {
+        double a = (i+1)*MY_PI;
+        double b = (sqrt(2.0/(rmax))/(i+1));
+        int nij = n + N*i + N*besseldegree*j;
+
+        // Compute the RBF
+        rbf[nij] = b*fcut*sin(a*x)/r;
+      }
+    }
+
+    // Loop over all polynomial degrees of the radial inverse functions
+    for (int i=0; i<inversedegree; i++) {
+      int p = besseldegree*nbesselpars + i;
+      int nij = n + N*p;
+      //double a = pow(dij, (double) (i+1.0));
+      double a = powint(dij, i+1);
+
+      // Compute the RBF
+      rbf[nij] = fcut/a;
+    }
+  }
+}
+
+
+/**
+ * @brief Perform eigenvalue decomposition of the snapshots matrix S and return the eigenvectors and eigenvalues.
+ *
+ * @param Phi Pointer to the output array for the eigenvectors.
+ * @param Lambda Pointer to the output array for the eigenvalues.
+ * @param N Number of points in the interval [rin, rcut].
+ */
+void EAPOD::eigenvaluedecomposition(double *Phi, double *Lambda, int N)
+{
+  int ns = besseldegree*nbesselpars + inversedegree;
+
+  // Allocate memory for temporary arrays
+  double *xij = (double *) malloc(N*sizeof(double));
+  double *S = (double *) malloc(N*ns*sizeof(double));
+  double *Q = (double *) malloc(N*ns*sizeof(double));
+  double *A = (double *) malloc(ns*ns*sizeof(double));
+  double *b = (double *) malloc(ns*sizeof(double));
+
+  // Generate the xij array
+  for (int i=0; i<N; i++)
+    xij[i] = (rin+1e-6) + (rcut-rin-1e-6)*(i*1.0/(N-1));
+
+  // Compute the snapshots matrix S
+  snapshots(S, xij, N);
+
+  // Compute the matrix A = S^T * S
+  char chn = 'N';
+  char cht = 'T';
+  double alpha = 1.0, beta = 0.0;
+  DGEMM(&cht, &chn, &ns, &ns, &N, &alpha, S, &N, S, &N, &beta, A, &ns);
+
+  // Normalize the matrix A by dividing by N
+  for (int i=0; i<ns*ns; i++)
+    A[i] = A[i]*(1.0/N);
+
+  // Compute the eigenvectors and eigenvalues of A
+  int lwork = ns * ns;  // the length of the array work, lwork >= max(1,3*N-1)
+  int info = 1;     // = 0:  successful exit
+  double work[ns*ns];
+  char chv = 'V';
+  char chu = 'U';
+  DSYEV(&chv, &chu, &ns, A, &ns, b, work, &lwork, &info);
+
+  // Order eigenvalues and eigenvectors from largest to smallest
+  for (int j=0; j<ns; j++)
+    for (int i=0; i<ns; i++)
+      Phi[i + ns*(ns-j-1)] = A[i + ns*j];
+
+  for (int i=0; i<ns; i++)
+    Lambda[(ns-i-1)] = b[i];
+
+  // Compute the matrix Q = S * Phi
+  DGEMM(&chn, &chn, &N, &ns, &ns, &alpha, S, &N, Phi, &ns, &beta, Q, &N);
+
+  // Compute the area of each snapshot and normalize the eigenvectors
+  for (int i=0; i<(N-1); i++)
+    xij[i] = xij[i+1] - xij[i];
+  double area;
+  for (int m=0; m<ns; m++) {
+    area = 0.0;
+    for (int i=0; i<(N-1); i++)
+      area += 0.5*xij[i]*(Q[i + N*m]*Q[i + N*m] + Q[i+1 + N*m]*Q[i+1 + N*m]);
+    for (int i=0; i<ns; i++)
+      Phi[i + ns*m] = Phi[i + ns*m]/sqrt(area);
+  }
+
+  // Enforce consistent signs for the eigenvectors
+  for (int m=0; m<ns; m++) {
+    if (Phi[m + ns*m] < 0.0) {
+      for (int i=0; i<ns; i++)
+        Phi[i + ns*m] = -Phi[i + ns*m];
+    }
+  }
+
+  // Free temporary arrays
+  free(xij); free(S); free(A); free(b); free(Q);
+}
+
+/**
+ * @brief Initialize the two-body coefficients.
+ *
+ * @param None
+ */
+void EAPOD::init2body()
+{
+  // Set the degree of the Bessel function and the inverse distance function
+  pdegree[0] = besseldegree;
+  pdegree[1] = inversedegree;
+
+  // Compute the total number of snapshots
+  ns = nbesselpars * pdegree[0] + pdegree[1];
+
+  // Allocate memory for the eigenvectors and eigenvalues
+  memory->create(Phi, ns * ns, "Phi");
+  memory->create(Lambda, ns, "Lambda");
+
+  // Perform eigenvalue decomposition of the snapshots matrix S and store the eigenvectors and eigenvalues
+  eigenvaluedecomposition(Phi, Lambda, 2000);
+}
+
+/**
+ * @brief Initialize arrays for the three-body descriptors.
+ *
+ * @param Pa3 The degree of the angular basis functions of the three-body descriptors.
+ */
+void EAPOD::init3body(int Pa3)
+{
+  // Define the number of monomials for each degree
+  int npa[] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455};
+
+  // Set the number of coefficients, the number of basis functions, and the degree of the Bessel function
+  nabf3 = Pa3+1;    // Number of angular basis functions
+  K3 = npa[nabf3];  // number of monimials
+  P3 = nabf3-1;     // the degree of angular basis functions of the three-body descriptors
+
+  // Allocate memory for the coefficients, the basis functions, and the cutoff function
+  memory->create(pn3, nabf3+1, "pn3"); // array stores the number of monomials for each degree
+  memory->create(pq3, K3*2, "pq3"); // array needed for the recursive computation of the angular basis functions
+  memory->create(pc3, K3, "pc3");   // array needed for the computation of the three-body descriptors
+
+  // Initialize the arrays
+  init3bodyarray(pn3, pq3, pc3, nabf3-1);
+}
+
+/**
+ * @brief Initialize arrays for the four-body descriptors.
+ *
+ * @param Pa4 The degree of the angular basis functions of the four-body descriptors.
+ */
+void EAPOD::init4body(int Pa4)
+{
+  // Define the number of monomials for each degree
+  int npa[] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455};
+
+  // Define the number of angular basis functions for each degree
+  int nb[] = {1,     2,     4,     7,    11,    16,    23};
+
+  // Define the number of terms needed to compute angular basis functions
+  int ns[] = {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345};
+
+  // Set the degree of the angular basis functions of the four-body descriptors
+  P4 = Pa4;
+
+  // Set the number of monomials for the angular basis functions of the four-body descriptors
+  K4 = npa[Pa4+1];
+
+  // Allocate memory for the output arrays
+  int *pn4, *tm4;
+  memory->create(pn4, Pa4+2, "pn4"); // array stores the number of monomials for each degree
+  memory->create(pq4, K4*2, "pq4");  // array needed for the recursive computation of the angular basis functions
+  memory->create(tm4, K4, "tm4");
+
+  // Initialize the arrays
+  init3bodyarray(pn4, pq4, tm4, Pa4);
+
+  // Set the number of angular basis functions for the four-body descriptors
+  nabf4 = nb[Pa4];
+
+  // the size of the array pc4
+  Q4 = ns[nabf4];
+
+  // Allocate memory for the coefficients, the basis functions, and the cutoff function
+  memory->create(pa4, nabf4+1, "pa4"); // this array is a subset of the array ns
+  memory->create(pb4, Q4*3, "pb4"); // array stores the indices of the monomials needed for the computation of the angular basis functions
+  memory->create(pc4, Q4, "pc4");   // array of monomial coefficients needed for the computation of the four-body descriptors
+
+  // Initialize the arrays
+  init4bodyarray(pa4, pb4, pc4, Pa4);
+
+  // Deallocate memory
+  memory->destroy(pn4);
+  memory->destroy(tm4);
+}
+
+
+/**
+ * @brief Estimate the amount of memory needed for the computation.
+ *
+ * @param Nj Number of neighboring atoms.
+ * @return int The estimated amount of memory needed.
+ */
+int EAPOD::estimate_temp_memory(int Nj)
+{
+  // Determine the maximum number of radial basis functions and angular basis functions
+  int Kmax = (K3 > K4) ? K3 : K4;
+  int nrbf34 = (nrbf3 > nrbf4) ? nrbf3 : nrbf4;
+  int nrbfmax = (nrbf2 > nrbf34) ? nrbf2 : nrbf34;
+  int Knrbf34 = (K3*nrbf3 > K4*nrbf4) ? K3*nrbf3 : K4*nrbf4;
+
+  // Determine the maximum number of local descriptors
+  int nld = (nl23 > nl33) ? nl23 : nl33;
+  nld = (nld > nl34) ? nld : nl34;
+  nld = (nld > nl44) ? nld : nl44;
+
+  // rij, fij, and d2, dd2, d3, dd3, d4, dd4
+  int nmax1 = 6*Nj + nl2 + 3*Nj*nl2 + nl3 + 3*Nj*nl3 + nl4 + 3*Nj*nl4 + nld + 3*Nj*nld;
+
+  // U, Ux, Uy, Uz
+  int nmax2 = 4*Nj*Knrbf34;
+
+  // sumU and cU
+  int nmax3 = 2*nelements*Knrbf34;
+
+  // rbf, rbfx, rbfy, rbfz
+  int nmax4 = 4*Nj*nrbfmax;
+
+  // rbft, rbfxt, rbfyt, rbfzt
+  int nmax5 = 4*Nj*ns;
+
+  // abf, abfx, abfy, abfz
+  int nmax6 = 4*(Nj+1)*Kmax;
+
+  // Determine the maximum amount of memory needed for U, Ux, Uy, Uz, sumU, cU, rbf, rbfx, rbfy, rbfz, abf, abfx, abfy, abfz
+  int nmax7 = (nmax5 > nmax6) ? nmax5 : nmax6;
+  int nmax8 = nmax2 + nmax3 + nmax4 + nmax7;
+
+  // Determine the total amount of memory needed for all double memory
+  ndblmem = (nmax1 + nmax8);
+
+  int nmax9 = 6*Nj + nComponents + nClusters + nClusters*nComponents + 2*nClusters*Mdesc + nClusters*nClusters;
+  if (ndblmem < nmax9) ndblmem = nmax9;
+
+  // Determine the total amount of memory needed for all integer memory
+  nintmem = 4*Nj;
+
+  // Return the estimated amount of memory needed
+  return ndblmem;
+}
+
+void EAPOD::allocate_temp_memory(int Nj)
+{
+  estimate_temp_memory(Nj);
+  memory->create(tmpmem, ndblmem, "tmpmem");
+  memory->create(tmpint, nintmem, "tmpint");
+  memory->create(bd, Mdesc, "bdd");
+  memory->create(bdd, 3*Nj*Mdesc, "bdd");
+  memory->create(pd, nClusters, "bdd");
+  memory->create(pdd, 3*Nj*nClusters, "bdd");  
+}
+
+void EAPOD::free_temp_memory()
+{
+  memory->destroy(tmpmem);
+  memory->destroy(tmpint);
+  memory->destroy(bd);
+  memory->destroy(bdd);
+  memory->destroy(pd);
+  memory->destroy(pdd);
+}
+
+/**
+ * @brief Map a 3D index to a 1D index.
+ *
+ * @param indx The 1D index array.
+ * @param n1 The size of the first dimension.
+ * @param n2 The size of the second dimension.
+ * @param n3 The size of the third dimension.
+ * @param N1 The stride of the first dimension.
+ * @param N2 The stride of the second dimension.
+ * @return int The total number of elements in the 1D index array.
+ */
+int EAPOD::indexmap3(int *indx, int n1, int n2, int n3, int N1, int N2)
+{
+  int k = 0;
+  for (int i3=0; i3<n3; i3++)
+    for (int i2=0; i2<n2; i2++)
+      for (int i1=0; i1<n1; i1++)
+      {
+        // Map the 3D index to a 1D index
+        indx[k] = i1 + N1*i2 + N1*N2*i3;
+        k += 1;
+      }
+
+  // Return the total number of elements in the 1D index array
+  return k;
+}
+
+/**
+ * @brief Calculate the number of cross descriptors between two sets of descriptors.
+ *
+ * @param dabf1 Pointer to the array of degrees of angular basis functions of the first set of descriptors.
+ * @param nabf1 Number of angular basis functions in the first set of descriptors.
+ * @param nrbf1 Number of radial basis functions in the first set of descriptors.
+ * @param nebf1 Number of element interactions in the first set of descriptors.
+ * @param dabf2 Pointer to the array of degrees of angular basis functions of the second set of descriptors.
+ * @param nabf2 Number of angular basis functions in the second set of descriptors.
+ * @param nrbf2 Number of radial basis functions in the second set of descriptors.
+ * @param nebf2 Number of element interactions in the second set descriptors.
+ * @param dabf12 degree of angular basis functions for the cross descriptors
+ * @param nrbf12 number of radial basis functions for the cross descriptors
+ * @return int The number of cross descriptors between two sets of descriptors.
+ */
+int EAPOD::crossindices(int *dabf1, int nabf1, int nrbf1, int nebf1,
+         int *dabf2, int nabf2, int nrbf2, int nebf2, int dabf12, int nrbf12)
+{
+  int n = 0;
+
+  // Loop over the first set of descriptors
+  for (int i1=0; i1<nebf1; i1++)
+    for (int j1=0; j1<nrbf1; j1++)
+      for (int k1=0; k1<nabf1; k1++) {
+        int m1 = k1 + j1*nabf1;
+        int a1 = dabf1[k1];
+        // Loop over the second set of descriptors
+        for (int i2=0; i2<nebf2; i2++)
+          for (int j2=0; j2<nrbf2; j2++)
+            for (int k2=0; k2<nabf2; k2++) {
+              int m2 = k2 + j2*nabf2;
+              int a2 = dabf2[k2];
+              // Check if the sum of the angular degrees is less than or equal to dabf12,
+              // the number of radial basis functions is less than nrbf12, and the indices are in the correct order
+              if ((m2 >= m1) && (i2 >= i1) && (a1 + a2 <= dabf12) && (j1+j2 < nrbf12)) {
+                n += 1;
+              }
+            }
+      }
+
+  return n;
+}
+
+/**
+ * @brief Calculate the number of cross descriptors between two sets of descriptors and store the indices in two arrays.
+ *
+ * @param ind1 Pointer to the array of indices of the first set of descriptors.
+ * @param ind2 Pointer to the array of indices of the second set of descriptors.
+ * @param dabf1 Pointer to the array of degrees of angular basis functions of the first set of descriptors.
+ * @param nabf1 Number of angular basis functions in the first set of descriptors.
+ * @param nrbf1 Number of radial basis functions in the first set of descriptors.
+ * @param nebf1 Number of element interactions in the first set of descriptors.
+ * @param dabf2 Pointer to the array of degrees of angular basis functions of the second set of descriptors.
+ * @param nabf2 Number of angular basis functions in the second set of descriptors.
+ * @param nrbf2 Number of radial basis functions in the second set of descriptors.
+ * @param nebf2 Number of element interactions in the second set descriptors.
+ * @param dabf12 degree of angular basis functions for the cross descriptors
+ * @param nrbf12 number of radial basis functions for the cross descriptors
+ * @return int The number of cross descriptors between two sets of descriptors.
+ */
+int EAPOD::crossindices(int *ind1, int *ind2, int *dabf1, int nabf1, int nrbf1, int nebf1,
+         int *dabf2, int nabf2, int nrbf2, int nebf2, int dabf12, int nrbf12)
+{
+  int n = 0;
+
+  // Loop over the first set of descriptors
+  for (int i1=0; i1<nebf1; i1++)
+    for (int j1=0; j1<nrbf1; j1++)
+      for (int k1=0; k1<nabf1; k1++) {
+        int m1 = k1 + j1*nabf1;
+        int n1 = m1 + i1*nabf1*nrbf1;
+        int a1 = dabf1[k1];
+        // Loop over the second set of descriptors
+        for (int i2=0; i2<nebf2; i2++)
+          for (int j2=0; j2<nrbf2; j2++)
+            for (int k2=0; k2<nabf2; k2++) {
+              int m2 = k2 + j2*nabf2;
+              int n2 = m2 + i2*nabf2*nrbf2;
+              int a2 = dabf2[k2];
+              // Check if the sum of the angular degrees is less than or equal to dabf12,
+              // the number of radial basis functions is less than nrbf12, and the indices are in the correct order
+              if ((m2 >= m1) && (i2 >= i1) && (a1 + a2 <= dabf12) && (j1+j2 < nrbf12)) {
+                ind1[n] = n1;
+                ind2[n] = n2;
+                n += 1;
+              }
+            }
+      }
+
+  return n;
+}
+
+void EAPOD::print_matrix(const char* desc, int m, int n, int* a, int lda )
+{
+    int i, j;
+    printf( "\n %s\n", desc );
+
+    for( i = 0; i < m; i++ )
+    {
+        for( j = 0; j < n; j++ ) printf( " %d", a[i+j*lda] );
+        printf( "\n" );
+    }
+}
+
+void EAPOD::print_matrix(const char* desc, int m, int n, double* a, int lda )
+{
+    int i, j;
+    printf( "\n %s\n", desc );
+
+    for( i = 0; i < m; i++ )
+    {
+        for( j = 0; j < n; j++ ) printf( " %6.12f", a[i+j*lda] );
+        printf( "\n" );
+    }
+}
+
+void EAPOD::scalarproduct(double *d, double c, int N)
+{
+  for (int n=0; n<N; n++)
+    d[n] = d[n]*c;
+}
+
+double EAPOD::dotproduct(double *c, double *d, int ndesc)
+{
+  double e = 0.0;
+  for (int n = 0; n<ndesc; n++)
+    e += d[n]*c[n];
+  return e;
+}
+
+void EAPOD::mvproduct(double *fij, double *c, double *dd, int N, int ndesc)
+{
+  for (int m=0; m<ndesc; m++)
+    for (int n=0; n<N; n++)
+      fij[n] += dd[n + N*m]*c[m];
+}
+
+void EAPOD::MatMul(double *c, double *a, double *b, int r1, int c1, int c2)
+{
+  int i, j, k;
+
+  for(j = 0; j < c2; j++)
+    for(i = 0; i < r1; i++)
+      c[i + r1*j] = 0.0;
+
+  for(j = 0; j < c2; j++)
+    for(i = 0; i < r1; i++)
+      for(k = 0; k < c1; k++)
+        c[i + r1*j] += a[i + r1*k] * b[k + c1*j];
+}
+
+void EAPOD::peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors) 
+{
+  double *ProjMat = &Proj[nComponents*Mdesc*elem];
+  double *centroids = &Centroids[nComponents*nClusters*elem];
+  double *pca = &tmp[0];
+  double *D = &tmp[nComponents];
+  double *dD_dpca = &tmp[nComponents + nClusters];
+  double *dD_dB = &tmp[nComponents + nClusters + nClusters*nComponents];
+  double *dP_dD = &tmp[nComponents + nClusters + nClusters*nComponents + nClusters*Mdesc];
+  double *dP_dB = &tmp[nComponents + nClusters + nClusters*nComponents + nClusters*Mdesc + nClusters*nClusters];      
+
+  // calculate principal components
+  for (int k = 0; k < nComponents; k++) {
+    pca[k] = 0.0;
+    for (int m = 0; m < Mdesc; m++) {
+      pca[k] += ProjMat[k + nComponents*m] * B[m];
+    }
+  }
+
+  // calculate inverse square distances
+  double sumD = 0.0;
+  for (int j = 0; j < nClusters; j++) {
+    D[j] = 1e-20; // fix for zero distances
+    for (int k = 0; k < nComponents; k++) {
+      D[j] += (pca[k] - centroids[k + j * nComponents]) * (pca[k] - centroids[k + j * nComponents]);
+    }
+    D[j] = 1.0 / D[j];
+    sumD += D[j];
+  }
+
+  // calculate probabilities
+  for (int j = 0; j < nClusters; j++) {
+    P[j] = D[j] / sumD;
+  }
+
+  // calculate dD_dpca
+  for (int n = 0; n < nComponents; n++) {
+    for (int k = 0; k < nClusters; k++) {
+      dD_dpca[k + n * nClusters] = 2 * D[k] * D[k] * (centroids[n + k * nComponents] - pca[n]);
+    }
+  }
+
+  // calculate dD_dB
+  char chn = 'N';
+  char cht = 'T';
+  double alpha = 1.0, beta = 0.0;
+  DGEMM(&chn, &chn, &nClusters, &Mdesc, &nComponents, &alpha, dD_dpca, &nClusters, ProjMat, &nComponents, &beta, dD_dB, &nClusters);
+  
+  // calculate dP_dD
+  double S1 = 1 / sumD;
+  double S2 = sumD * sumD;
+  for (int k = 0; k < nClusters; k++) {
+    for (int j = 0; j < nClusters; j++) {
+      dP_dD[j + k * nClusters] = -D[j] / S2;
+    }
+  }
+  for (int j = 0; j < nClusters; j++) {
+    dP_dD[j + j * nClusters] += S1;
+  }
+
+  // calculate dP_dB = dP_dD * dD_dB, which are derivatives of probabilities with respect to local descriptors 
+  DGEMM(&chn, &chn, &nClusters, &Mdesc, &nClusters, &alpha, dP_dD, &nClusters, dD_dB, &nClusters, &beta, dP_dB, &nClusters);
+  
+  // calculate dP_dR = dB_dR * dP_dB , which are derivatives of probabilities with respect to atomic coordinates  
+  int N = 3*nNeighbors;
+  DGEMM(&chn, &cht, &N, &nClusters, &Mdesc, &alpha, dB_dR, &N, dP_dB, &nClusters, &beta, dP_dR, &N);  
+}
+
+void EAPOD::init3bodyarray(int *np, int *pq, int *pc, int Pa)
+{
+  int npa[] = {0, 1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455};
+
+  int poly[455][6] =
+  {
+    {0, 0, 0, 0, 0, 1},
+    {1, 0, 0, 1, 1, 1},
+    {0, 1, 0, 1, 2, 1},
+    {0, 0, 1, 1, 3, 1},
+    {2, 0, 0, 2, 1, 1},
+    {1, 1, 0, 2, 2, 2},
+    {0, 2, 0, 3, 2, 1},
+    {1, 0, 1, 2, 3, 2},
+    {0, 1, 1, 3, 3, 2},
+    {0, 0, 2, 4, 3, 1},
+    {3, 0, 0, 5, 1, 1},
+    {2, 1, 0, 5, 2, 3},
+    {1, 2, 0, 6, 2, 3},
+    {0, 3, 0, 7, 2, 1},
+    {2, 0, 1, 5, 3, 3},
+    {1, 1, 1, 6, 3, 6},
+    {0, 2, 1, 7, 3, 3},
+    {1, 0, 2, 8, 3, 3},
+    {0, 1, 2, 9, 3, 3},
+    {0, 0, 3, 10, 3, 1},
+    {4, 0, 0, 11, 1, 1},
+    {3, 1, 0, 11, 2, 4},
+    {2, 2, 0, 12, 2, 6},
+    {1, 3, 0, 13, 2, 4},
+    {0, 4, 0, 14, 2, 1},
+    {3, 0, 1, 11, 3, 4},
+    {2, 1, 1, 12, 3, 12},
+    {1, 2, 1, 13, 3, 12},
+    {0, 3, 1, 14, 3, 4},
+    {2, 0, 2, 15, 3, 6},
+    {1, 1, 2, 16, 3, 12},
+    {0, 2, 2, 17, 3, 6},
+    {1, 0, 3, 18, 3, 4},
+    {0, 1, 3, 19, 3, 4},
+    {0, 0, 4, 20, 3, 1},
+    {5, 0, 0, 21, 1, 1},
+    {4, 1, 0, 21, 2, 5},
+    {3, 2, 0, 22, 2, 10},
+    {2, 3, 0, 23, 2, 10},
+    {1, 4, 0, 24, 2, 5},
+    {0, 5, 0, 25, 2, 1},
+    {4, 0, 1, 21, 3, 5},
+    {3, 1, 1, 22, 3, 20},
+    {2, 2, 1, 23, 3, 30},
+    {1, 3, 1, 24, 3, 20},
+    {0, 4, 1, 25, 3, 5},
+    {3, 0, 2, 26, 3, 10},
+    {2, 1, 2, 27, 3, 30},
+    {1, 2, 2, 28, 3, 30},
+    {0, 3, 2, 29, 3, 10},
+    {2, 0, 3, 30, 3, 10},
+    {1, 1, 3, 31, 3, 20},
+    {0, 2, 3, 32, 3, 10},
+    {1, 0, 4, 33, 3, 5},
+    {0, 1, 4, 34, 3, 5},
+    {0, 0, 5, 35, 3, 1},
+    {6, 0, 0, 36, 1, 1},
+    {5, 1, 0, 36, 2, 6},
+    {4, 2, 0, 37, 2, 15},
+    {3, 3, 0, 38, 2, 20},
+    {2, 4, 0, 39, 2, 15},
+    {1, 5, 0, 40, 2, 6},
+    {0, 6, 0, 41, 2, 1},
+    {5, 0, 1, 36, 3, 6},
+    {4, 1, 1, 37, 3, 30},
+    {3, 2, 1, 38, 3, 60},
+    {2, 3, 1, 39, 3, 60},
+    {1, 4, 1, 40, 3, 30},
+    {0, 5, 1, 41, 3, 6},
+    {4, 0, 2, 42, 3, 15},
+    {3, 1, 2, 43, 3, 60},
+    {2, 2, 2, 44, 3, 90},
+    {1, 3, 2, 45, 3, 60},
+    {0, 4, 2, 46, 3, 15},
+    {3, 0, 3, 47, 3, 20},
+    {2, 1, 3, 48, 3, 60},
+    {1, 2, 3, 49, 3, 60},
+    {0, 3, 3, 50, 3, 20},
+    {2, 0, 4, 51, 3, 15},
+    {1, 1, 4, 52, 3, 30},
+    {0, 2, 4, 53, 3, 15},
+    {1, 0, 5, 54, 3, 6},
+    {0, 1, 5, 55, 3, 6},
+    {0, 0, 6, 56, 3, 1},
+    {7, 0, 0, 57, 1, 1},
+    {6, 1, 0, 57, 2, 7},
+    {5, 2, 0, 58, 2, 21},
+    {4, 3, 0, 59, 2, 35},
+    {3, 4, 0, 60, 2, 35},
+    {2, 5, 0, 61, 2, 21},
+    {1, 6, 0, 62, 2, 7},
+    {0, 7, 0, 63, 2, 1},
+    {6, 0, 1, 57, 3, 7},
+    {5, 1, 1, 58, 3, 42},
+    {4, 2, 1, 59, 3, 105},
+    {3, 3, 1, 60, 3, 140},
+    {2, 4, 1, 61, 3, 105},
+    {1, 5, 1, 62, 3, 42},
+    {0, 6, 1, 63, 3, 7},
+    {5, 0, 2, 64, 3, 21},
+    {4, 1, 2, 65, 3, 105},
+    {3, 2, 2, 66, 3, 210},
+    {2, 3, 2, 67, 3, 210},
+    {1, 4, 2, 68, 3, 105},
+    {0, 5, 2, 69, 3, 21},
+    {4, 0, 3, 70, 3, 35},
+    {3, 1, 3, 71, 3, 140},
+    {2, 2, 3, 72, 3, 210},
+    {1, 3, 3, 73, 3, 140},
+    {0, 4, 3, 74, 3, 35},
+    {3, 0, 4, 75, 3, 35},
+    {2, 1, 4, 76, 3, 105},
+    {1, 2, 4, 77, 3, 105},
+    {0, 3, 4, 78, 3, 35},
+    {2, 0, 5, 79, 3, 21},
+    {1, 1, 5, 80, 3, 42},
+    {0, 2, 5, 81, 3, 21},
+    {1, 0, 6, 82, 3, 7},
+    {0, 1, 6, 83, 3, 7},
+    {0, 0, 7, 84, 3, 1},
+    {8, 0, 0, 85, 1, 1},
+    {7, 1, 0, 85, 2, 8},
+    {6, 2, 0, 86, 2, 28},
+    {5, 3, 0, 87, 2, 56},
+    {4, 4, 0, 88, 2, 70},
+    {3, 5, 0, 89, 2, 56},
+    {2, 6, 0, 90, 2, 28},
+    {1, 7, 0, 91, 2, 8},
+    {0, 8, 0, 92, 2, 1},
+    {7, 0, 1, 85, 3, 8},
+    {6, 1, 1, 86, 3, 56},
+    {5, 2, 1, 87, 3, 168},
+    {4, 3, 1, 88, 3, 280},
+    {3, 4, 1, 89, 3, 280},
+    {2, 5, 1, 90, 3, 168},
+    {1, 6, 1, 91, 3, 56},
+    {0, 7, 1, 92, 3, 8},
+    {6, 0, 2, 93, 3, 28},
+    {5, 1, 2, 94, 3, 168},
+    {4, 2, 2, 95, 3, 420},
+    {3, 3, 2, 96, 3, 560},
+    {2, 4, 2, 97, 3, 420},
+    {1, 5, 2, 98, 3, 168},
+    {0, 6, 2, 99, 3, 28},
+    {5, 0, 3, 100, 3, 56},
+    {4, 1, 3, 101, 3, 280},
+    {3, 2, 3, 102, 3, 560},
+    {2, 3, 3, 103, 3, 560},
+    {1, 4, 3, 104, 3, 280},
+    {0, 5, 3, 105, 3, 56},
+    {4, 0, 4, 106, 3, 70},
+    {3, 1, 4, 107, 3, 280},
+    {2, 2, 4, 108, 3, 420},
+    {1, 3, 4, 109, 3, 280},
+    {0, 4, 4, 110, 3, 70},
+    {3, 0, 5, 111, 3, 56},
+    {2, 1, 5, 112, 3, 168},
+    {1, 2, 5, 113, 3, 168},
+    {0, 3, 5, 114, 3, 56},
+    {2, 0, 6, 115, 3, 28},
+    {1, 1, 6, 116, 3, 56},
+    {0, 2, 6, 117, 3, 28},
+    {1, 0, 7, 118, 3, 8},
+    {0, 1, 7, 119, 3, 8},
+    {0, 0, 8, 120, 3, 1},
+    {9, 0, 0, 121, 1, 1},
+    {8, 1, 0, 121, 2, 9},
+    {7, 2, 0, 122, 2, 36},
+    {6, 3, 0, 123, 2, 84},
+    {5, 4, 0, 124, 2, 126},
+    {4, 5, 0, 125, 2, 126},
+    {3, 6, 0, 126, 2, 84},
+    {2, 7, 0, 127, 2, 36},
+    {1, 8, 0, 128, 2, 9},
+    {0, 9, 0, 129, 2, 1},
+    {8, 0, 1, 121, 3, 9},
+    {7, 1, 1, 122, 3, 72},
+    {6, 2, 1, 123, 3, 252},
+    {5, 3, 1, 124, 3, 504},
+    {4, 4, 1, 125, 3, 630},
+    {3, 5, 1, 126, 3, 504},
+    {2, 6, 1, 127, 3, 252},
+    {1, 7, 1, 128, 3, 72},
+    {0, 8, 1, 129, 3, 9},
+    {7, 0, 2, 130, 3, 36},
+    {6, 1, 2, 131, 3, 252},
+    {5, 2, 2, 132, 3, 756},
+    {4, 3, 2, 133, 3, 1260},
+    {3, 4, 2, 134, 3, 1260},
+    {2, 5, 2, 135, 3, 756},
+    {1, 6, 2, 136, 3, 252},
+    {0, 7, 2, 137, 3, 36},
+    {6, 0, 3, 138, 3, 84},
+    {5, 1, 3, 139, 3, 504},
+    {4, 2, 3, 140, 3, 1260},
+    {3, 3, 3, 141, 3, 1680},
+    {2, 4, 3, 142, 3, 1260},
+    {1, 5, 3, 143, 3, 504},
+    {0, 6, 3, 144, 3, 84},
+    {5, 0, 4, 145, 3, 126},
+    {4, 1, 4, 146, 3, 630},
+    {3, 2, 4, 147, 3, 1260},
+    {2, 3, 4, 148, 3, 1260},
+    {1, 4, 4, 149, 3, 630},
+    {0, 5, 4, 150, 3, 126},
+    {4, 0, 5, 151, 3, 126},
+    {3, 1, 5, 152, 3, 504},
+    {2, 2, 5, 153, 3, 756},
+    {1, 3, 5, 154, 3, 504},
+    {0, 4, 5, 155, 3, 126},
+    {3, 0, 6, 156, 3, 84},
+    {2, 1, 6, 157, 3, 252},
+    {1, 2, 6, 158, 3, 252},
+    {0, 3, 6, 159, 3, 84},
+    {2, 0, 7, 160, 3, 36},
+    {1, 1, 7, 161, 3, 72},
+    {0, 2, 7, 162, 3, 36},
+    {1, 0, 8, 163, 3, 9},
+    {0, 1, 8, 164, 3, 9},
+    {0, 0, 9, 165, 3, 1},
+    {10, 0, 0, 166, 1, 1},
+    {9, 1, 0, 166, 2, 10},
+    {8, 2, 0, 167, 2, 45},
+    {7, 3, 0, 168, 2, 120},
+    {6, 4, 0, 169, 2, 210},
+    {5, 5, 0, 170, 2, 252},
+    {4, 6, 0, 171, 2, 210},
+    {3, 7, 0, 172, 2, 120},
+    {2, 8, 0, 173, 2, 45},
+    {1, 9, 0, 174, 2, 10},
+    {0, 10, 0, 175, 2, 1},
+    {9, 0, 1, 166, 3, 10},
+    {8, 1, 1, 167, 3, 90},
+    {7, 2, 1, 168, 3, 360},
+    {6, 3, 1, 169, 3, 840},
+    {5, 4, 1, 170, 3, 1260},
+    {4, 5, 1, 171, 3, 1260},
+    {3, 6, 1, 172, 3, 840},
+    {2, 7, 1, 173, 3, 360},
+    {1, 8, 1, 174, 3, 90},
+    {0, 9, 1, 175, 3, 10},
+    {8, 0, 2, 176, 3, 45},
+    {7, 1, 2, 177, 3, 360},
+    {6, 2, 2, 178, 3, 1260},
+    {5, 3, 2, 179, 3, 2520},
+    {4, 4, 2, 180, 3, 3150},
+    {3, 5, 2, 181, 3, 2520},
+    {2, 6, 2, 182, 3, 1260},
+    {1, 7, 2, 183, 3, 360},
+    {0, 8, 2, 184, 3, 45},
+    {7, 0, 3, 185, 3, 120},
+    {6, 1, 3, 186, 3, 840},
+    {5, 2, 3, 187, 3, 2520},
+    {4, 3, 3, 188, 3, 4200},
+    {3, 4, 3, 189, 3, 4200},
+    {2, 5, 3, 190, 3, 2520},
+    {1, 6, 3, 191, 3, 840},
+    {0, 7, 3, 192, 3, 120},
+    {6, 0, 4, 193, 3, 210},
+    {5, 1, 4, 194, 3, 1260},
+    {4, 2, 4, 195, 3, 3150},
+    {3, 3, 4, 196, 3, 4200},
+    {2, 4, 4, 197, 3, 3150},
+    {1, 5, 4, 198, 3, 1260},
+    {0, 6, 4, 199, 3, 210},
+    {5, 0, 5, 200, 3, 252},
+    {4, 1, 5, 201, 3, 1260},
+    {3, 2, 5, 202, 3, 2520},
+    {2, 3, 5, 203, 3, 2520},
+    {1, 4, 5, 204, 3, 1260},
+    {0, 5, 5, 205, 3, 252},
+    {4, 0, 6, 206, 3, 210},
+    {3, 1, 6, 207, 3, 840},
+    {2, 2, 6, 208, 3, 1260},
+    {1, 3, 6, 209, 3, 840},
+    {0, 4, 6, 210, 3, 210},
+    {3, 0, 7, 211, 3, 120},
+    {2, 1, 7, 212, 3, 360},
+    {1, 2, 7, 213, 3, 360},
+    {0, 3, 7, 214, 3, 120},
+    {2, 0, 8, 215, 3, 45},
+    {1, 1, 8, 216, 3, 90},
+    {0, 2, 8, 217, 3, 45},
+    {1, 0, 9, 218, 3, 10},
+    {0, 1, 9, 219, 3, 10},
+    {0, 0, 10, 220, 3, 1},
+    {11, 0, 0, 221, 1, 1},
+    {10, 1, 0, 221, 2, 11},
+    {9, 2, 0, 222, 2, 55},
+    {8, 3, 0, 223, 2, 165},
+    {7, 4, 0, 224, 2, 330},
+    {6, 5, 0, 225, 2, 462},
+    {5, 6, 0, 226, 2, 462},
+    {4, 7, 0, 227, 2, 330},
+    {3, 8, 0, 228, 2, 165},
+    {2, 9, 0, 229, 2, 55},
+    {1, 10, 0, 230, 2, 11},
+    {0, 11, 0, 231, 2, 1},
+    {10, 0, 1, 221, 3, 11},
+    {9, 1, 1, 222, 3, 110},
+    {8, 2, 1, 223, 3, 495},
+    {7, 3, 1, 224, 3, 1320},
+    {6, 4, 1, 225, 3, 2310},
+    {5, 5, 1, 226, 3, 2772},
+    {4, 6, 1, 227, 3, 2310},
+    {3, 7, 1, 228, 3, 1320},
+    {2, 8, 1, 229, 3, 495},
+    {1, 9, 1, 230, 3, 110},
+    {0, 10, 1, 231, 3, 11},
+    {9, 0, 2, 232, 3, 55},
+    {8, 1, 2, 233, 3, 495},
+    {7, 2, 2, 234, 3, 1980},
+    {6, 3, 2, 235, 3, 4620},
+    {5, 4, 2, 236, 3, 6930},
+    {4, 5, 2, 237, 3, 6930},
+    {3, 6, 2, 238, 3, 4620},
+    {2, 7, 2, 239, 3, 1980},
+    {1, 8, 2, 240, 3, 495},
+    {0, 9, 2, 241, 3, 55},
+    {8, 0, 3, 242, 3, 165},
+    {7, 1, 3, 243, 3, 1320},
+    {6, 2, 3, 244, 3, 4620},
+    {5, 3, 3, 245, 3, 9240},
+    {4, 4, 3, 246, 3, 11550},
+    {3, 5, 3, 247, 3, 9240},
+    {2, 6, 3, 248, 3, 4620},
+    {1, 7, 3, 249, 3, 1320},
+    {0, 8, 3, 250, 3, 165},
+    {7, 0, 4, 251, 3, 330},
+    {6, 1, 4, 252, 3, 2310},
+    {5, 2, 4, 253, 3, 6930},
+    {4, 3, 4, 254, 3, 11550},
+    {3, 4, 4, 255, 3, 11550},
+    {2, 5, 4, 256, 3, 6930},
+    {1, 6, 4, 257, 3, 2310},
+    {0, 7, 4, 258, 3, 330},
+    {6, 0, 5, 259, 3, 462},
+    {5, 1, 5, 260, 3, 2772},
+    {4, 2, 5, 261, 3, 6930},
+    {3, 3, 5, 262, 3, 9240},
+    {2, 4, 5, 263, 3, 6930},
+    {1, 5, 5, 264, 3, 2772},
+    {0, 6, 5, 265, 3, 462},
+    {5, 0, 6, 266, 3, 462},
+    {4, 1, 6, 267, 3, 2310},
+    {3, 2, 6, 268, 3, 4620},
+    {2, 3, 6, 269, 3, 4620},
+    {1, 4, 6, 270, 3, 2310},
+    {0, 5, 6, 271, 3, 462},
+    {4, 0, 7, 272, 3, 330},
+    {3, 1, 7, 273, 3, 1320},
+    {2, 2, 7, 274, 3, 1980},
+    {1, 3, 7, 275, 3, 1320},
+    {0, 4, 7, 276, 3, 330},
+    {3, 0, 8, 277, 3, 165},
+    {2, 1, 8, 278, 3, 495},
+    {1, 2, 8, 279, 3, 495},
+    {0, 3, 8, 280, 3, 165},
+    {2, 0, 9, 281, 3, 55},
+    {1, 1, 9, 282, 3, 110},
+    {0, 2, 9, 283, 3, 55},
+    {1, 0, 10, 284, 3, 11},
+    {0, 1, 10, 285, 3, 11},
+    {0, 0, 11, 286, 3, 1},
+    {12, 0, 0, 287, 1, 1},
+    {11, 1, 0, 287, 2, 12},
+    {10, 2, 0, 288, 2, 66},
+    {9, 3, 0, 289, 2, 220},
+    {8, 4, 0, 290, 2, 495},
+    {7, 5, 0, 291, 2, 792},
+    {6, 6, 0, 292, 2, 924},
+    {5, 7, 0, 293, 2, 792},
+    {4, 8, 0, 294, 2, 495},
+    {3, 9, 0, 295, 2, 220},
+    {2, 10, 0, 296, 2, 66},
+    {1, 11, 0, 297, 2, 12},
+    {0, 12, 0, 298, 2, 1},
+    {11, 0, 1, 287, 3, 12},
+    {10, 1, 1, 288, 3, 132},
+    {9, 2, 1, 289, 3, 660},
+    {8, 3, 1, 290, 3, 1980},
+    {7, 4, 1, 291, 3, 3960},
+    {6, 5, 1, 292, 3, 5544},
+    {5, 6, 1, 293, 3, 5544},
+    {4, 7, 1, 294, 3, 3960},
+    {3, 8, 1, 295, 3, 1980},
+    {2, 9, 1, 296, 3, 660},
+    {1, 10, 1, 297, 3, 132},
+    {0, 11, 1, 298, 3, 12},
+    {10, 0, 2, 299, 3, 66},
+    {9, 1, 2, 300, 3, 660},
+    {8, 2, 2, 301, 3, 2970},
+    {7, 3, 2, 302, 3, 7920},
+    {6, 4, 2, 303, 3, 13860},
+    {5, 5, 2, 304, 3, 16632},
+    {4, 6, 2, 305, 3, 13860},
+    {3, 7, 2, 306, 3, 7920},
+    {2, 8, 2, 307, 3, 2970},
+    {1, 9, 2, 308, 3, 660},
+    {0, 10, 2, 309, 3, 66},
+    {9, 0, 3, 310, 3, 220},
+    {8, 1, 3, 311, 3, 1980},
+    {7, 2, 3, 312, 3, 7920},
+    {6, 3, 3, 313, 3, 18480},
+    {5, 4, 3, 314, 3, 27720},
+    {4, 5, 3, 315, 3, 27720},
+    {3, 6, 3, 316, 3, 18480},
+    {2, 7, 3, 317, 3, 7920},
+    {1, 8, 3, 318, 3, 1980},
+    {0, 9, 3, 319, 3, 220},
+    {8, 0, 4, 320, 3, 495},
+    {7, 1, 4, 321, 3, 3960},
+    {6, 2, 4, 322, 3, 13860},
+    {5, 3, 4, 323, 3, 27720},
+    {4, 4, 4, 324, 3, 34650},
+    {3, 5, 4, 325, 3, 27720},
+    {2, 6, 4, 326, 3, 13860},
+    {1, 7, 4, 327, 3, 3960},
+    {0, 8, 4, 328, 3, 495},
+    {7, 0, 5, 329, 3, 792},
+    {6, 1, 5, 330, 3, 5544},
+    {5, 2, 5, 331, 3, 16632},
+    {4, 3, 5, 332, 3, 27720},
+    {3, 4, 5, 333, 3, 27720},
+    {2, 5, 5, 334, 3, 16632},
+    {1, 6, 5, 335, 3, 5544},
+    {0, 7, 5, 336, 3, 792},
+    {6, 0, 6, 337, 3, 924},
+    {5, 1, 6, 338, 3, 5544},
+    {4, 2, 6, 339, 3, 13860},
+    {3, 3, 6, 340, 3, 18480},
+    {2, 4, 6, 341, 3, 13860},
+    {1, 5, 6, 342, 3, 5544},
+    {0, 6, 6, 343, 3, 924},
+    {5, 0, 7, 344, 3, 792},
+    {4, 1, 7, 345, 3, 3960},
+    {3, 2, 7, 346, 3, 7920},
+    {2, 3, 7, 347, 3, 7920},
+    {1, 4, 7, 348, 3, 3960},
+    {0, 5, 7, 349, 3, 792},
+    {4, 0, 8, 350, 3, 495},
+    {3, 1, 8, 351, 3, 1980},
+    {2, 2, 8, 352, 3, 2970},
+    {1, 3, 8, 353, 3, 1980},
+    {0, 4, 8, 354, 3, 495},
+    {3, 0, 9, 355, 3, 220},
+    {2, 1, 9, 356, 3, 660},
+    {1, 2, 9, 357, 3, 660},
+    {0, 3, 9, 358, 3, 220},
+    {2, 0, 10, 359, 3, 66},
+    {1, 1, 10, 360, 3, 132},
+    {0, 2, 10, 361, 3, 66},
+    {1, 0, 11, 362, 3, 12},
+    {0, 1, 11, 363, 3, 12},
+    {0, 0, 12, 364, 3, 1}
+  };
+
+  for (int i = 0; i<= Pa+1; i++)
+    np[i] = npa[i];
+
+  int nmax = np[Pa+1];
+
+  for (int i=0; i<nmax; i++) {
+    pq[i]        = poly[i][3];
+    pq[i+nmax]   = poly[i][4];
+    pc[i]        = poly[i][5];
+  }
+}
+
+void EAPOD::init4bodyarray(int *ns4, int *pb4, int *pc4, int Pa)
+{
+  int nb[] = {1,     2,     4,     7,    11,    16,    23};
+
+  int ns[] =
+  {
+    0,
+    1,
+    4,
+    10,
+    19,
+    29,
+    47,
+    74,
+    89,
+    119,
+    155,
+    209,
+    230,
+    275,
+    335,
+    425,
+    533,
+    561,
+    624,
+    714,
+    849,
+    949,
+    1129,
+    1345
+  };
+
+  int poly[1345][4] =
+  {
+    {1, 1, 1, 1},
+    {2, 2, 1, 1},
+    {3, 3, 1, 1},
+    {4, 4, 1, 1},
+    {5, 5, 1, 1},
+    {6, 6, 1, 2},
+    {8, 8, 1, 2},
+    {7, 7, 1, 1},
+    {9, 9, 1, 2},
+    {10, 10, 1, 1},
+    {5, 2, 2, 1},
+    {6, 2, 3, 1},
+    {6, 3, 2, 1},
+    {8, 2, 4, 1},
+    {8, 4, 2, 1},
+    {7, 3, 3, 1},
+    {9, 3, 4, 1},
+    {9, 4, 3, 1},
+    {10, 4, 4, 1},
+    {11, 11, 1, 1},
+    {12, 12, 1, 3},
+    {15, 15, 1, 3},
+    {13, 13, 1, 3},
+    {16, 16, 1, 6},
+    {18, 18, 1, 3},
+    {14, 14, 1, 1},
+    {17, 17, 1, 3},
+    {19, 19, 1, 3},
+    {20, 20, 1, 1},
+    {11, 5, 2, 1},
+    {12, 5, 3, 1},
+    {12, 6, 2, 2},
+    {15, 5, 4, 1},
+    {15, 8, 2, 2},
+    {13, 6, 3, 2},
+    {13, 7, 2, 1},
+    {16, 6, 4, 2},
+    {16, 8, 3, 2},
+    {16, 9, 2, 2},
+    {18, 8, 4, 2},
+    {18, 10, 2, 1},
+    {14, 7, 3, 1},
+    {17, 7, 4, 1},
+    {17, 9, 3, 2},
+    {19, 9, 4, 2},
+    {19, 10, 3, 1},
+    {20, 10, 4, 1},
+    {5, 5, 5, 1},
+    {5, 6, 6, 1},
+    {5, 8, 8, 1},
+    {6, 5, 6, 1},
+    {6, 6, 5, 1},
+    {6, 6, 7, 1},
+    {6, 8, 9, 1},
+    {6, 7, 6, 1},
+    {6, 9, 8, 1},
+    {8, 5, 8, 1},
+    {8, 6, 9, 1},
+    {8, 8, 5, 1},
+    {8, 8, 10, 1},
+    {8, 9, 6, 1},
+    {8, 10, 8, 1},
+    {7, 6, 6, 1},
+    {7, 7, 7, 1},
+    {7, 9, 9, 1},
+    {9, 6, 8, 1},
+    {9, 8, 6, 1},
+    {9, 7, 9, 1},
+    {9, 9, 7, 1},
+    {9, 9, 10, 1},
+    {9, 10, 9, 1},
+    {10, 8, 8, 1},
+    {10, 9, 9, 1},
+    {10, 10, 10, 1},
+    {21, 21, 1, 1},
+    {22, 22, 1, 4},
+    {26, 26, 1, 4},
+    {23, 23, 1, 6},
+    {27, 27, 1, 12},
+    {30, 30, 1, 6},
+    {24, 24, 1, 4},
+    {28, 28, 1, 12},
+    {31, 31, 1, 12},
+    {33, 33, 1, 4},
+    {25, 25, 1, 1},
+    {29, 29, 1, 4},
+    {32, 32, 1, 6},
+    {34, 34, 1, 4},
+    {35, 35, 1, 1},
+    {21, 11, 2, 1},
+    {22, 11, 3, 1},
+    {22, 12, 2, 3},
+    {26, 11, 4, 1},
+    {26, 15, 2, 3},
+    {23, 12, 3, 3},
+    {23, 13, 2, 3},
+    {27, 12, 4, 3},
+    {27, 15, 3, 3},
+    {27, 16, 2, 6},
+    {30, 15, 4, 3},
+    {30, 18, 2, 3},
+    {24, 13, 3, 3},
+    {24, 14, 2, 1},
+    {28, 13, 4, 3},
+    {28, 16, 3, 6},
+    {28, 17, 2, 3},
+    {31, 16, 4, 6},
+    {31, 18, 3, 3},
+    {31, 19, 2, 3},
+    {33, 18, 4, 3},
+    {33, 20, 2, 1},
+    {25, 14, 3, 1},
+    {29, 14, 4, 1},
+    {29, 17, 3, 3},
+    {32, 17, 4, 3},
+    {32, 19, 3, 3},
+    {34, 19, 4, 3},
+    {34, 20, 3, 1},
+    {35, 20, 4, 1},
+    {21, 5, 5, 1},
+    {22, 5, 6, 2},
+    {22, 6, 5, 2},
+    {26, 5, 8, 2},
+    {26, 8, 5, 2},
+    {23, 5, 7, 1},
+    {23, 6, 6, 4},
+    {23, 7, 5, 1},
+    {27, 5, 9, 2},
+    {27, 6, 8, 4},
+    {27, 8, 6, 4},
+    {27, 9, 5, 2},
+    {30, 5, 10, 1},
+    {30, 8, 8, 4},
+    {30, 10, 5, 1},
+    {24, 6, 7, 2},
+    {24, 7, 6, 2},
+    {28, 6, 9, 4},
+    {28, 8, 7, 2},
+    {28, 7, 8, 2},
+    {28, 9, 6, 4},
+    {31, 6, 10, 2},
+    {31, 8, 9, 4},
+    {31, 9, 8, 4},
+    {31, 10, 6, 2},
+    {33, 8, 10, 2},
+    {33, 10, 8, 2},
+    {25, 7, 7, 1},
+    {29, 7, 9, 2},
+    {29, 9, 7, 2},
+    {32, 7, 10, 1},
+    {32, 9, 9, 4},
+    {32, 10, 7, 1},
+    {34, 9, 10, 2},
+    {34, 10, 9, 2},
+    {35, 10, 10, 1},
+    {11, 11, 5, 1},
+    {11, 12, 6, 1},
+    {11, 15, 8, 1},
+    {12, 11, 6, 1},
+    {12, 12, 5, 2},
+    {12, 12, 7, 1},
+    {12, 15, 9, 1},
+    {12, 13, 6, 2},
+    {12, 16, 8, 2},
+    {15, 11, 8, 1},
+    {15, 12, 9, 1},
+    {15, 15, 5, 2},
+    {15, 15, 10, 1},
+    {15, 16, 6, 2},
+    {15, 18, 8, 2},
+    {13, 12, 6, 2},
+    {13, 13, 5, 1},
+    {13, 13, 7, 2},
+    {13, 16, 9, 2},
+    {13, 14, 6, 1},
+    {13, 17, 8, 1},
+    {16, 12, 8, 2},
+    {16, 15, 6, 2},
+    {16, 13, 9, 2},
+    {16, 16, 5, 2},
+    {16, 16, 7, 2},
+    {16, 16, 10, 2},
+    {16, 18, 9, 2},
+    {16, 17, 6, 2},
+    {16, 19, 8, 2},
+    {18, 15, 8, 2},
+    {18, 16, 9, 2},
+    {18, 18, 5, 1},
+    {18, 18, 10, 2},
+    {18, 19, 6, 1},
+    {18, 20, 8, 1},
+    {14, 13, 6, 1},
+    {14, 14, 7, 1},
+    {14, 17, 9, 1},
+    {17, 13, 8, 1},
+    {17, 16, 6, 2},
+    {17, 14, 9, 1},
+    {17, 17, 7, 2},
+    {17, 17, 10, 1},
+    {17, 19, 9, 2},
+    {19, 16, 8, 2},
+    {19, 18, 6, 1},
+    {19, 17, 9, 2},
+    {19, 19, 7, 1},
+    {19, 19, 10, 2},
+    {19, 20, 9, 1},
+    {20, 18, 8, 1},
+    {20, 19, 9, 1},
+    {20, 20, 10, 1},
+    {36, 36, 1, 1},
+    {37, 37, 1, 5},
+    {42, 42, 1, 5},
+    {38, 38, 1, 10},
+    {43, 43, 1, 20},
+    {47, 47, 1, 10},
+    {39, 39, 1, 10},
+    {44, 44, 1, 30},
+    {48, 48, 1, 30},
+    {51, 51, 1, 10},
+    {40, 40, 1, 5},
+    {45, 45, 1, 20},
+    {49, 49, 1, 30},
+    {52, 52, 1, 20},
+    {54, 54, 1, 5},
+    {41, 41, 1, 1},
+    {46, 46, 1, 5},
+    {50, 50, 1, 10},
+    {53, 53, 1, 10},
+    {55, 55, 1, 5},
+    {56, 56, 1, 1},
+    {36, 21, 2, 1},
+    {37, 21, 3, 1},
+    {37, 22, 2, 4},
+    {42, 21, 4, 1},
+    {42, 26, 2, 4},
+    {38, 22, 3, 4},
+    {38, 23, 2, 6},
+    {43, 22, 4, 4},
+    {43, 26, 3, 4},
+    {43, 27, 2, 12},
+    {47, 26, 4, 4},
+    {47, 30, 2, 6},
+    {39, 23, 3, 6},
+    {39, 24, 2, 4},
+    {44, 23, 4, 6},
+    {44, 27, 3, 12},
+    {44, 28, 2, 12},
+    {48, 27, 4, 12},
+    {48, 30, 3, 6},
+    {48, 31, 2, 12},
+    {51, 30, 4, 6},
+    {51, 33, 2, 4},
+    {40, 24, 3, 4},
+    {40, 25, 2, 1},
+    {45, 24, 4, 4},
+    {45, 28, 3, 12},
+    {45, 29, 2, 4},
+    {49, 28, 4, 12},
+    {49, 31, 3, 12},
+    {49, 32, 2, 6},
+    {52, 31, 4, 12},
+    {52, 33, 3, 4},
+    {52, 34, 2, 4},
+    {54, 33, 4, 4},
+    {54, 35, 2, 1},
+    {41, 25, 3, 1},
+    {46, 25, 4, 1},
+    {46, 29, 3, 4},
+    {50, 29, 4, 4},
+    {50, 32, 3, 6},
+    {53, 32, 4, 6},
+    {53, 34, 3, 4},
+    {55, 34, 4, 4},
+    {55, 35, 3, 1},
+    {56, 35, 4, 1},
+    {36, 11, 5, 1},
+    {37, 11, 6, 2},
+    {37, 12, 5, 3},
+    {42, 11, 8, 2},
+    {42, 15, 5, 3},
+    {38, 11, 7, 1},
+    {38, 12, 6, 6},
+    {38, 13, 5, 3},
+    {43, 11, 9, 2},
+    {43, 12, 8, 6},
+    {43, 15, 6, 6},
+    {43, 16, 5, 6},
+    {47, 11, 10, 1},
+    {47, 15, 8, 6},
+    {47, 18, 5, 3},
+    {39, 12, 7, 3},
+    {39, 13, 6, 6},
+    {39, 14, 5, 1},
+    {44, 12, 9, 6},
+    {44, 15, 7, 3},
+    {44, 13, 8, 6},
+    {44, 16, 6, 12},
+    {44, 17, 5, 3},
+    {48, 12, 10, 3},
+    {48, 15, 9, 6},
+    {48, 16, 8, 12},
+    {48, 18, 6, 6},
+    {48, 19, 5, 3},
+    {51, 15, 10, 3},
+    {51, 18, 8, 6},
+    {51, 20, 5, 1},
+    {40, 13, 7, 3},
+    {40, 14, 6, 2},
+    {45, 13, 9, 6},
+    {45, 16, 7, 6},
+    {45, 14, 8, 2},
+    {45, 17, 6, 6},
+    {49, 13, 10, 3},
+    {49, 16, 9, 12},
+    {49, 18, 7, 3},
+    {49, 17, 8, 6},
+    {49, 19, 6, 6},
+    {52, 16, 10, 6},
+    {52, 18, 9, 6},
+    {52, 19, 8, 6},
+    {52, 20, 6, 2},
+    {54, 18, 10, 3},
+    {54, 20, 8, 2},
+    {41, 14, 7, 1},
+    {46, 14, 9, 2},
+    {46, 17, 7, 3},
+    {50, 14, 10, 1},
+    {50, 17, 9, 6},
+    {50, 19, 7, 3},
+    {53, 17, 10, 3},
+    {53, 19, 9, 6},
+    {53, 20, 7, 1},
+    {55, 19, 10, 3},
+    {55, 20, 9, 2},
+    {56, 20, 10, 1},
+    {21, 21, 5, 1},
+    {21, 22, 6, 1},
+    {21, 26, 8, 1},
+    {22, 21, 6, 1},
+    {22, 22, 5, 3},
+    {22, 22, 7, 1},
+    {22, 26, 9, 1},
+    {22, 23, 6, 3},
+    {22, 27, 8, 3},
+    {26, 21, 8, 1},
+    {26, 22, 9, 1},
+    {26, 26, 5, 3},
+    {26, 26, 10, 1},
+    {26, 27, 6, 3},
+    {26, 30, 8, 3},
+    {23, 22, 6, 3},
+    {23, 23, 5, 3},
+    {23, 23, 7, 3},
+    {23, 27, 9, 3},
+    {23, 24, 6, 3},
+    {23, 28, 8, 3},
+    {27, 22, 8, 3},
+    {27, 26, 6, 3},
+    {27, 23, 9, 3},
+    {27, 27, 5, 6},
+    {27, 27, 7, 3},
+    {27, 27, 10, 3},
+    {27, 30, 9, 3},
+    {27, 28, 6, 6},
+    {27, 31, 8, 6},
+    {30, 26, 8, 3},
+    {30, 27, 9, 3},
+    {30, 30, 5, 3},
+    {30, 30, 10, 3},
+    {30, 31, 6, 3},
+    {30, 33, 8, 3},
+    {24, 23, 6, 3},
+    {24, 24, 5, 1},
+    {24, 24, 7, 3},
+    {24, 28, 9, 3},
+    {24, 25, 6, 1},
+    {24, 29, 8, 1},
+    {28, 23, 8, 3},
+    {28, 27, 6, 6},
+    {28, 24, 9, 3},
+    {28, 28, 5, 3},
+    {28, 28, 7, 6},
+    {28, 28, 10, 3},
+    {28, 31, 9, 6},
+    {28, 29, 6, 3},
+    {28, 32, 8, 3},
+    {31, 27, 8, 6},
+    {31, 30, 6, 3},
+    {31, 28, 9, 6},
+    {31, 31, 5, 3},
+    {31, 31, 7, 3},
+    {31, 31, 10, 6},
+    {31, 33, 9, 3},
+    {31, 32, 6, 3},
+    {31, 34, 8, 3},
+    {33, 30, 8, 3},
+    {33, 31, 9, 3},
+    {33, 33, 5, 1},
+    {33, 33, 10, 3},
+    {33, 34, 6, 1},
+    {33, 35, 8, 1},
+    {25, 24, 6, 1},
+    {25, 25, 7, 1},
+    {25, 29, 9, 1},
+    {29, 24, 8, 1},
+    {29, 28, 6, 3},
+    {29, 25, 9, 1},
+    {29, 29, 7, 3},
+    {29, 29, 10, 1},
+    {29, 32, 9, 3},
+    {32, 28, 8, 3},
+    {32, 31, 6, 3},
+    {32, 29, 9, 3},
+    {32, 32, 7, 3},
+    {32, 32, 10, 3},
+    {32, 34, 9, 3},
+    {34, 31, 8, 3},
+    {34, 33, 6, 1},
+    {34, 32, 9, 3},
+    {34, 34, 7, 1},
+    {34, 34, 10, 3},
+    {34, 35, 9, 1},
+    {35, 33, 8, 1},
+    {35, 34, 9, 1},
+    {35, 35, 10, 1},
+    {21, 11, 11, 1},
+    {21, 12, 12, 1},
+    {21, 15, 15, 1},
+    {22, 11, 12, 2},
+    {22, 12, 11, 2},
+    {22, 12, 13, 2},
+    {22, 15, 16, 2},
+    {22, 13, 12, 2},
+    {22, 16, 15, 2},
+    {26, 11, 15, 2},
+    {26, 12, 16, 2},
+    {26, 15, 11, 2},
+    {26, 15, 18, 2},
+    {26, 16, 12, 2},
+    {26, 18, 15, 2},
+    {23, 11, 13, 1},
+    {23, 12, 12, 4},
+    {23, 12, 14, 1},
+    {23, 15, 17, 1},
+    {23, 13, 11, 1},
+    {23, 13, 13, 4},
+    {23, 16, 16, 4},
+    {23, 14, 12, 1},
+    {23, 17, 15, 1},
+    {27, 11, 16, 2},
+    {27, 12, 15, 4},
+    {27, 12, 17, 2},
+    {27, 15, 12, 4},
+    {27, 15, 19, 2},
+    {27, 13, 16, 4},
+    {27, 16, 11, 2},
+    {27, 16, 13, 4},
+    {27, 16, 18, 4},
+    {27, 18, 16, 4},
+    {27, 17, 12, 2},
+    {27, 19, 15, 2},
+    {30, 11, 18, 1},
+    {30, 12, 19, 1},
+    {30, 15, 15, 4},
+    {30, 15, 20, 1},
+    {30, 16, 16, 4},
+    {30, 18, 11, 1},
+    {30, 18, 18, 4},
+    {30, 19, 12, 1},
+    {30, 20, 15, 1},
+    {24, 12, 13, 2},
+    {24, 13, 12, 2},
+    {24, 13, 14, 2},
+    {24, 16, 17, 2},
+    {24, 14, 13, 2},
+    {24, 17, 16, 2},
+    {28, 12, 16, 4},
+    {28, 15, 13, 2},
+    {28, 13, 15, 2},
+    {28, 13, 17, 4},
+    {28, 16, 12, 4},
+    {28, 16, 14, 2},
+    {28, 16, 19, 4},
+    {28, 18, 17, 2},
+    {28, 14, 16, 2},
+    {28, 17, 13, 4},
+    {28, 17, 18, 2},
+    {28, 19, 16, 4},
+    {31, 12, 18, 2},
+    {31, 15, 16, 4},
+    {31, 13, 19, 2},
+    {31, 16, 15, 4},
+    {31, 16, 17, 4},
+    {31, 16, 20, 2},
+    {31, 18, 12, 2},
+    {31, 18, 19, 4},
+    {31, 17, 16, 4},
+    {31, 19, 13, 2},
+    {31, 19, 18, 4},
+    {31, 20, 16, 2},
+    {33, 15, 18, 2},
+    {33, 16, 19, 2},
+    {33, 18, 15, 2},
+    {33, 18, 20, 2},
+    {33, 19, 16, 2},
+    {33, 20, 18, 2},
+    {25, 13, 13, 1},
+    {25, 14, 14, 1},
+    {25, 17, 17, 1},
+    {29, 13, 16, 2},
+    {29, 16, 13, 2},
+    {29, 14, 17, 2},
+    {29, 17, 14, 2},
+    {29, 17, 19, 2},
+    {29, 19, 17, 2},
+    {32, 13, 18, 1},
+    {32, 16, 16, 4},
+    {32, 18, 13, 1},
+    {32, 14, 19, 1},
+    {32, 17, 17, 4},
+    {32, 17, 20, 1},
+    {32, 19, 14, 1},
+    {32, 19, 19, 4},
+    {32, 20, 17, 1},
+    {34, 16, 18, 2},
+    {34, 18, 16, 2},
+    {34, 17, 19, 2},
+    {34, 19, 17, 2},
+    {34, 19, 20, 2},
+    {34, 20, 19, 2},
+    {35, 18, 18, 1},
+    {35, 19, 19, 1},
+    {35, 20, 20, 1},
+    {57, 57, 1, 1},
+    {58, 58, 1, 6},
+    {64, 64, 1, 6},
+    {59, 59, 1, 15},
+    {65, 65, 1, 30},
+    {70, 70, 1, 15},
+    {60, 60, 1, 20},
+    {66, 66, 1, 60},
+    {71, 71, 1, 60},
+    {75, 75, 1, 20},
+    {61, 61, 1, 15},
+    {67, 67, 1, 60},
+    {72, 72, 1, 90},
+    {76, 76, 1, 60},
+    {79, 79, 1, 15},
+    {62, 62, 1, 6},
+    {68, 68, 1, 30},
+    {73, 73, 1, 60},
+    {77, 77, 1, 60},
+    {80, 80, 1, 30},
+    {82, 82, 1, 6},
+    {63, 63, 1, 1},
+    {69, 69, 1, 6},
+    {74, 74, 1, 15},
+    {78, 78, 1, 20},
+    {81, 81, 1, 15},
+    {83, 83, 1, 6},
+    {84, 84, 1, 1},
+    {57, 36, 2, 1},
+    {58, 36, 3, 1},
+    {58, 37, 2, 5},
+    {64, 36, 4, 1},
+    {64, 42, 2, 5},
+    {59, 37, 3, 5},
+    {59, 38, 2, 10},
+    {65, 37, 4, 5},
+    {65, 42, 3, 5},
+    {65, 43, 2, 20},
+    {70, 42, 4, 5},
+    {70, 47, 2, 10},
+    {60, 38, 3, 10},
+    {60, 39, 2, 10},
+    {66, 38, 4, 10},
+    {66, 43, 3, 20},
+    {66, 44, 2, 30},
+    {71, 43, 4, 20},
+    {71, 47, 3, 10},
+    {71, 48, 2, 30},
+    {75, 47, 4, 10},
+    {75, 51, 2, 10},
+    {61, 39, 3, 10},
+    {61, 40, 2, 5},
+    {67, 39, 4, 10},
+    {67, 44, 3, 30},
+    {67, 45, 2, 20},
+    {72, 44, 4, 30},
+    {72, 48, 3, 30},
+    {72, 49, 2, 30},
+    {76, 48, 4, 30},
+    {76, 51, 3, 10},
+    {76, 52, 2, 20},
+    {79, 51, 4, 10},
+    {79, 54, 2, 5},
+    {62, 40, 3, 5},
+    {62, 41, 2, 1},
+    {68, 40, 4, 5},
+    {68, 45, 3, 20},
+    {68, 46, 2, 5},
+    {73, 45, 4, 20},
+    {73, 49, 3, 30},
+    {73, 50, 2, 10},
+    {77, 49, 4, 30},
+    {77, 52, 3, 20},
+    {77, 53, 2, 10},
+    {80, 52, 4, 20},
+    {80, 54, 3, 5},
+    {80, 55, 2, 5},
+    {82, 54, 4, 5},
+    {82, 56, 2, 1},
+    {63, 41, 3, 1},
+    {69, 41, 4, 1},
+    {69, 46, 3, 5},
+    {74, 46, 4, 5},
+    {74, 50, 3, 10},
+    {78, 50, 4, 10},
+    {78, 53, 3, 10},
+    {81, 53, 4, 10},
+    {81, 55, 3, 5},
+    {83, 55, 4, 5},
+    {83, 56, 3, 1},
+    {84, 56, 4, 1},
+    {57, 21, 5, 1},
+    {58, 21, 6, 2},
+    {58, 22, 5, 4},
+    {64, 21, 8, 2},
+    {64, 26, 5, 4},
+    {59, 21, 7, 1},
+    {59, 22, 6, 8},
+    {59, 23, 5, 6},
+    {65, 21, 9, 2},
+    {65, 22, 8, 8},
+    {65, 26, 6, 8},
+    {65, 27, 5, 12},
+    {70, 21, 10, 1},
+    {70, 26, 8, 8},
+    {70, 30, 5, 6},
+    {60, 22, 7, 4},
+    {60, 23, 6, 12},
+    {60, 24, 5, 4},
+    {66, 22, 9, 8},
+    {66, 26, 7, 4},
+    {66, 23, 8, 12},
+    {66, 27, 6, 24},
+    {66, 28, 5, 12},
+    {71, 22, 10, 4},
+    {71, 26, 9, 8},
+    {71, 27, 8, 24},
+    {71, 30, 6, 12},
+    {71, 31, 5, 12},
+    {75, 26, 10, 4},
+    {75, 30, 8, 12},
+    {75, 33, 5, 4},
+    {61, 23, 7, 6},
+    {61, 24, 6, 8},
+    {61, 25, 5, 1},
+    {67, 23, 9, 12},
+    {67, 27, 7, 12},
+    {67, 24, 8, 8},
+    {67, 28, 6, 24},
+    {67, 29, 5, 4},
+    {72, 23, 10, 6},
+    {72, 27, 9, 24},
+    {72, 30, 7, 6},
+    {72, 28, 8, 24},
+    {72, 31, 6, 24},
+    {72, 32, 5, 6},
+    {76, 27, 10, 12},
+    {76, 30, 9, 12},
+    {76, 31, 8, 24},
+    {76, 33, 6, 8},
+    {76, 34, 5, 4},
+    {79, 30, 10, 6},
+    {79, 33, 8, 8},
+    {79, 35, 5, 1},
+    {62, 24, 7, 4},
+    {62, 25, 6, 2},
+    {68, 24, 9, 8},
+    {68, 28, 7, 12},
+    {68, 25, 8, 2},
+    {68, 29, 6, 8},
+    {73, 24, 10, 4},
+    {73, 28, 9, 24},
+    {73, 31, 7, 12},
+    {73, 29, 8, 8},
+    {73, 32, 6, 12},
+    {77, 28, 10, 12},
+    {77, 31, 9, 24},
+    {77, 33, 7, 4},
+    {77, 32, 8, 12},
+    {77, 34, 6, 8},
+    {80, 31, 10, 12},
+    {80, 33, 9, 8},
+    {80, 34, 8, 8},
+    {80, 35, 6, 2},
+    {82, 33, 10, 4},
+    {82, 35, 8, 2},
+    {63, 25, 7, 1},
+    {69, 25, 9, 2},
+    {69, 29, 7, 4},
+    {74, 25, 10, 1},
+    {74, 29, 9, 8},
+    {74, 32, 7, 6},
+    {78, 29, 10, 4},
+    {78, 32, 9, 12},
+    {78, 34, 7, 4},
+    {81, 32, 10, 6},
+    {81, 34, 9, 8},
+    {81, 35, 7, 1},
+    {83, 34, 10, 4},
+    {83, 35, 9, 2},
+    {84, 35, 10, 1},
+    {36, 36, 5, 1},
+    {36, 37, 6, 1},
+    {36, 42, 8, 1},
+    {37, 36, 6, 1},
+    {37, 37, 5, 4},
+    {37, 37, 7, 1},
+    {37, 42, 9, 1},
+    {37, 38, 6, 4},
+    {37, 43, 8, 4},
+    {42, 36, 8, 1},
+    {42, 37, 9, 1},
+    {42, 42, 5, 4},
+    {42, 42, 10, 1},
+    {42, 43, 6, 4},
+    {42, 47, 8, 4},
+    {38, 37, 6, 4},
+    {38, 38, 5, 6},
+    {38, 38, 7, 4},
+    {38, 43, 9, 4},
+    {38, 39, 6, 6},
+    {38, 44, 8, 6},
+    {43, 37, 8, 4},
+    {43, 42, 6, 4},
+    {43, 38, 9, 4},
+    {43, 43, 5, 12},
+    {43, 43, 7, 4},
+    {43, 43, 10, 4},
+    {43, 47, 9, 4},
+    {43, 44, 6, 12},
+    {43, 48, 8, 12},
+    {47, 42, 8, 4},
+    {47, 43, 9, 4},
+    {47, 47, 5, 6},
+    {47, 47, 10, 4},
+    {47, 48, 6, 6},
+    {47, 51, 8, 6},
+    {39, 38, 6, 6},
+    {39, 39, 5, 4},
+    {39, 39, 7, 6},
+    {39, 44, 9, 6},
+    {39, 40, 6, 4},
+    {39, 45, 8, 4},
+    {44, 38, 8, 6},
+    {44, 43, 6, 12},
+    {44, 39, 9, 6},
+    {44, 44, 5, 12},
+    {44, 44, 7, 12},
+    {44, 44, 10, 6},
+    {44, 48, 9, 12},
+    {44, 45, 6, 12},
+    {44, 49, 8, 12},
+    {48, 43, 8, 12},
+    {48, 47, 6, 6},
+    {48, 44, 9, 12},
+    {48, 48, 5, 12},
+    {48, 48, 7, 6},
+    {48, 48, 10, 12},
+    {48, 51, 9, 6},
+    {48, 49, 6, 12},
+    {48, 52, 8, 12},
+    {51, 47, 8, 6},
+    {51, 48, 9, 6},
+    {51, 51, 5, 4},
+    {51, 51, 10, 6},
+    {51, 52, 6, 4},
+    {51, 54, 8, 4},
+    {40, 39, 6, 4},
+    {40, 40, 5, 1},
+    {40, 40, 7, 4},
+    {40, 45, 9, 4},
+    {40, 41, 6, 1},
+    {40, 46, 8, 1},
+    {45, 39, 8, 4},
+    {45, 44, 6, 12},
+    {45, 40, 9, 4},
+    {45, 45, 5, 4},
+    {45, 45, 7, 12},
+    {45, 45, 10, 4},
+    {45, 49, 9, 12},
+    {45, 46, 6, 4},
+    {45, 50, 8, 4},
+    {49, 44, 8, 12},
+    {49, 48, 6, 12},
+    {49, 45, 9, 12},
+    {49, 49, 5, 6},
+    {49, 49, 7, 12},
+    {49, 49, 10, 12},
+    {49, 52, 9, 12},
+    {49, 50, 6, 6},
+    {49, 53, 8, 6},
+    {52, 48, 8, 12},
+    {52, 51, 6, 4},
+    {52, 49, 9, 12},
+    {52, 52, 5, 4},
+    {52, 52, 7, 4},
+    {52, 52, 10, 12},
+    {52, 54, 9, 4},
+    {52, 53, 6, 4},
+    {52, 55, 8, 4},
+    {54, 51, 8, 4},
+    {54, 52, 9, 4},
+    {54, 54, 5, 1},
+    {54, 54, 10, 4},
+    {54, 55, 6, 1},
+    {54, 56, 8, 1},
+    {41, 40, 6, 1},
+    {41, 41, 7, 1},
+    {41, 46, 9, 1},
+    {46, 40, 8, 1},
+    {46, 45, 6, 4},
+    {46, 41, 9, 1},
+    {46, 46, 7, 4},
+    {46, 46, 10, 1},
+    {46, 50, 9, 4},
+    {50, 45, 8, 4},
+    {50, 49, 6, 6},
+    {50, 46, 9, 4},
+    {50, 50, 7, 6},
+    {50, 50, 10, 4},
+    {50, 53, 9, 6},
+    {53, 49, 8, 6},
+    {53, 52, 6, 4},
+    {53, 50, 9, 6},
+    {53, 53, 7, 4},
+    {53, 53, 10, 6},
+    {53, 55, 9, 4},
+    {55, 52, 8, 4},
+    {55, 54, 6, 1},
+    {55, 53, 9, 4},
+    {55, 55, 7, 1},
+    {55, 55, 10, 4},
+    {55, 56, 9, 1},
+    {56, 54, 8, 1},
+    {56, 55, 9, 1},
+    {56, 56, 10, 1},
+    {57, 11, 11, 1},
+    {58, 11, 12, 3},
+    {58, 12, 11, 3},
+    {64, 11, 15, 3},
+    {64, 15, 11, 3},
+    {59, 11, 13, 3},
+    {59, 12, 12, 9},
+    {59, 13, 11, 3},
+    {65, 11, 16, 6},
+    {65, 12, 15, 9},
+    {65, 15, 12, 9},
+    {65, 16, 11, 6},
+    {70, 11, 18, 3},
+    {70, 15, 15, 9},
+    {70, 18, 11, 3},
+    {60, 11, 14, 1},
+    {60, 12, 13, 9},
+    {60, 13, 12, 9},
+    {60, 14, 11, 1},
+    {66, 11, 17, 3},
+    {66, 12, 16, 18},
+    {66, 15, 13, 9},
+    {66, 13, 15, 9},
+    {66, 16, 12, 18},
+    {66, 17, 11, 3},
+    {71, 11, 19, 3},
+    {71, 12, 18, 9},
+    {71, 15, 16, 18},
+    {71, 16, 15, 18},
+    {71, 18, 12, 9},
+    {71, 19, 11, 3},
+    {75, 11, 20, 1},
+    {75, 15, 18, 9},
+    {75, 18, 15, 9},
+    {75, 20, 11, 1},
+    {61, 12, 14, 3},
+    {61, 13, 13, 9},
+    {61, 14, 12, 3},
+    {67, 12, 17, 9},
+    {67, 15, 14, 3},
+    {67, 13, 16, 18},
+    {67, 16, 13, 18},
+    {67, 14, 15, 3},
+    {67, 17, 12, 9},
+    {72, 12, 19, 9},
+    {72, 15, 17, 9},
+    {72, 13, 18, 9},
+    {72, 16, 16, 36},
+    {72, 18, 13, 9},
+    {72, 17, 15, 9},
+    {72, 19, 12, 9},
+    {76, 12, 20, 3},
+    {76, 15, 19, 9},
+    {76, 16, 18, 18},
+    {76, 18, 16, 18},
+    {76, 19, 15, 9},
+    {76, 20, 12, 3},
+    {79, 15, 20, 3},
+    {79, 18, 18, 9},
+    {79, 20, 15, 3},
+    {62, 13, 14, 3},
+    {62, 14, 13, 3},
+    {68, 13, 17, 9},
+    {68, 16, 14, 6},
+    {68, 14, 16, 6},
+    {68, 17, 13, 9},
+    {73, 13, 19, 9},
+    {73, 16, 17, 18},
+    {73, 18, 14, 3},
+    {73, 14, 18, 3},
+    {73, 17, 16, 18},
+    {73, 19, 13, 9},
+    {77, 13, 20, 3},
+    {77, 16, 19, 18},
+    {77, 18, 17, 9},
+    {77, 17, 18, 9},
+    {77, 19, 16, 18},
+    {77, 20, 13, 3},
+    {80, 16, 20, 6},
+    {80, 18, 19, 9},
+    {80, 19, 18, 9},
+    {80, 20, 16, 6},
+    {82, 18, 20, 3},
+    {82, 20, 18, 3},
+    {63, 14, 14, 1},
+    {69, 14, 17, 3},
+    {69, 17, 14, 3},
+    {74, 14, 19, 3},
+    {74, 17, 17, 9},
+    {74, 19, 14, 3},
+    {78, 14, 20, 1},
+    {78, 17, 19, 9},
+    {78, 19, 17, 9},
+    {78, 20, 14, 1},
+    {81, 17, 20, 3},
+    {81, 19, 19, 9},
+    {81, 20, 17, 3},
+    {83, 19, 20, 3},
+    {83, 20, 19, 3},
+    {84, 20, 20, 1},
+    {36, 21, 11, 1},
+    {36, 22, 12, 1},
+    {36, 26, 15, 1},
+    {37, 21, 12, 2},
+    {37, 22, 11, 3},
+    {37, 22, 13, 2},
+    {37, 26, 16, 2},
+    {37, 23, 12, 3},
+    {37, 27, 15, 3},
+    {42, 21, 15, 2},
+    {42, 22, 16, 2},
+    {42, 26, 11, 3},
+    {42, 26, 18, 2},
+    {42, 27, 12, 3},
+    {42, 30, 15, 3},
+    {38, 21, 13, 1},
+    {38, 22, 12, 6},
+    {38, 22, 14, 1},
+    {38, 26, 17, 1},
+    {38, 23, 11, 3},
+    {38, 23, 13, 6},
+    {38, 27, 16, 6},
+    {38, 24, 12, 3},
+    {38, 28, 15, 3},
+    {43, 21, 16, 2},
+    {43, 22, 15, 6},
+    {43, 22, 17, 2},
+    {43, 26, 12, 6},
+    {43, 26, 19, 2},
+    {43, 23, 16, 6},
+    {43, 27, 11, 6},
+    {43, 27, 13, 6},
+    {43, 27, 18, 6},
+    {43, 30, 16, 6},
+    {43, 28, 12, 6},
+    {43, 31, 15, 6},
+    {47, 21, 18, 1},
+    {47, 22, 19, 1},
+    {47, 26, 15, 6},
+    {47, 26, 20, 1},
+    {47, 27, 16, 6},
+    {47, 30, 11, 3},
+    {47, 30, 18, 6},
+    {47, 31, 12, 3},
+    {47, 33, 15, 3},
+    {39, 22, 13, 3},
+    {39, 23, 12, 6},
+    {39, 23, 14, 3},
+    {39, 27, 17, 3},
+    {39, 24, 11, 1},
+    {39, 24, 13, 6},
+    {39, 28, 16, 6},
+    {39, 25, 12, 1},
+    {39, 29, 15, 1},
+    {44, 22, 16, 6},
+    {44, 26, 13, 3},
+    {44, 23, 15, 6},
+    {44, 23, 17, 6},
+    {44, 27, 12, 12},
+    {44, 27, 14, 3},
+    {44, 27, 19, 6},
+    {44, 30, 17, 3},
+    {44, 24, 16, 6},
+    {44, 28, 11, 3},
+    {44, 28, 13, 12},
+    {44, 28, 18, 6},
+    {44, 31, 16, 12},
+    {44, 29, 12, 3},
+    {44, 32, 15, 3},
+    {48, 22, 18, 3},
+    {48, 26, 16, 6},
+    {48, 23, 19, 3},
+    {48, 27, 15, 12},
+    {48, 27, 17, 6},
+    {48, 27, 20, 3},
+    {48, 30, 12, 6},
+    {48, 30, 19, 6},
+    {48, 28, 16, 12},
+    {48, 31, 11, 3},
+    {48, 31, 13, 6},
+    {48, 31, 18, 12},
+    {48, 33, 16, 6},
+    {48, 32, 12, 3},
+    {48, 34, 15, 3},
+    {51, 26, 18, 3},
+    {51, 27, 19, 3},
+    {51, 30, 15, 6},
+    {51, 30, 20, 3},
+    {51, 31, 16, 6},
+    {51, 33, 11, 1},
+    {51, 33, 18, 6},
+    {51, 34, 12, 1},
+    {51, 35, 15, 1},
+    {40, 23, 13, 3},
+    {40, 24, 12, 2},
+    {40, 24, 14, 3},
+    {40, 28, 17, 3},
+    {40, 25, 13, 2},
+    {40, 29, 16, 2},
+    {45, 23, 16, 6},
+    {45, 27, 13, 6},
+    {45, 24, 15, 2},
+    {45, 24, 17, 6},
+    {45, 28, 12, 6},
+    {45, 28, 14, 6},
+    {45, 28, 19, 6},
+    {45, 31, 17, 6},
+    {45, 25, 16, 2},
+    {45, 29, 13, 6},
+    {45, 29, 18, 2},
+    {45, 32, 16, 6},
+    {49, 23, 18, 3},
+    {49, 27, 16, 12},
+    {49, 30, 13, 3},
+    {49, 24, 19, 3},
+    {49, 28, 15, 6},
+    {49, 28, 17, 12},
+    {49, 28, 20, 3},
+    {49, 31, 12, 6},
+    {49, 31, 14, 3},
+    {49, 31, 19, 12},
+    {49, 33, 17, 3},
+    {49, 29, 16, 6},
+    {49, 32, 13, 6},
+    {49, 32, 18, 6},
+    {49, 34, 16, 6},
+    {52, 27, 18, 6},
+    {52, 30, 16, 6},
+    {52, 28, 19, 6},
+    {52, 31, 15, 6},
+    {52, 31, 17, 6},
+    {52, 31, 20, 6},
+    {52, 33, 12, 2},
+    {52, 33, 19, 6},
+    {52, 32, 16, 6},
+    {52, 34, 13, 2},
+    {52, 34, 18, 6},
+    {52, 35, 16, 2},
+    {54, 30, 18, 3},
+    {54, 31, 19, 3},
+    {54, 33, 15, 2},
+    {54, 33, 20, 3},
+    {54, 34, 16, 2},
+    {54, 35, 18, 2},
+    {41, 24, 13, 1},
+    {41, 25, 14, 1},
+    {41, 29, 17, 1},
+    {46, 24, 16, 2},
+    {46, 28, 13, 3},
+    {46, 25, 17, 2},
+    {46, 29, 14, 3},
+    {46, 29, 19, 2},
+    {46, 32, 17, 3},
+    {50, 24, 18, 1},
+    {50, 28, 16, 6},
+    {50, 31, 13, 3},
+    {50, 25, 19, 1},
+    {50, 29, 17, 6},
+    {50, 29, 20, 1},
+    {50, 32, 14, 3},
+    {50, 32, 19, 6},
+    {50, 34, 17, 3},
+    {53, 28, 18, 3},
+    {53, 31, 16, 6},
+    {53, 33, 13, 1},
+    {53, 29, 19, 3},
+    {53, 32, 17, 6},
+    {53, 32, 20, 3},
+    {53, 34, 14, 1},
+    {53, 34, 19, 6},
+    {53, 35, 17, 1},
+    {55, 31, 18, 3},
+    {55, 33, 16, 2},
+    {55, 32, 19, 3},
+    {55, 34, 17, 2},
+    {55, 34, 20, 3},
+    {55, 35, 19, 2},
+    {56, 33, 18, 1},
+    {56, 34, 19, 1},
+    {56, 35, 20, 1},
+    {21, 21, 21, 1},
+    {21, 22, 22, 2},
+    {21, 26, 26, 2},
+    {21, 23, 23, 1},
+    {21, 27, 27, 2},
+    {21, 30, 30, 1},
+    {22, 21, 22, 2},
+    {22, 22, 21, 2},
+    {22, 22, 23, 4},
+    {22, 26, 27, 4},
+    {22, 23, 22, 4},
+    {22, 23, 24, 2},
+    {22, 27, 26, 4},
+    {22, 27, 28, 4},
+    {22, 30, 31, 2},
+    {22, 24, 23, 2},
+    {22, 28, 27, 4},
+    {22, 31, 30, 2},
+    {26, 21, 26, 2},
+    {26, 22, 27, 4},
+    {26, 26, 21, 2},
+    {26, 26, 30, 4},
+    {26, 23, 28, 2},
+    {26, 27, 22, 4},
+    {26, 27, 31, 4},
+    {26, 30, 26, 4},
+    {26, 30, 33, 2},
+    {26, 28, 23, 2},
+    {26, 31, 27, 4},
+    {26, 33, 30, 2},
+    {23, 21, 23, 1},
+    {23, 22, 22, 4},
+    {23, 22, 24, 2},
+    {23, 26, 28, 2},
+    {23, 23, 21, 1},
+    {23, 23, 23, 8},
+    {23, 23, 25, 1},
+    {23, 27, 27, 8},
+    {23, 27, 29, 2},
+    {23, 30, 32, 1},
+    {23, 24, 22, 2},
+    {23, 24, 24, 4},
+    {23, 28, 26, 2},
+    {23, 28, 28, 8},
+    {23, 31, 31, 4},
+    {23, 25, 23, 1},
+    {23, 29, 27, 2},
+    {23, 32, 30, 1},
+    {27, 21, 27, 2},
+    {27, 22, 26, 4},
+    {27, 22, 28, 4},
+    {27, 26, 22, 4},
+    {27, 26, 31, 4},
+    {27, 23, 27, 8},
+    {27, 23, 29, 2},
+    {27, 27, 21, 2},
+    {27, 27, 23, 8},
+    {27, 27, 30, 8},
+    {27, 27, 32, 4},
+    {27, 30, 27, 8},
+    {27, 30, 34, 2},
+    {27, 24, 28, 4},
+    {27, 28, 22, 4},
+    {27, 28, 24, 4},
+    {27, 28, 31, 8},
+    {27, 31, 26, 4},
+    {27, 31, 28, 8},
+    {27, 31, 33, 4},
+    {27, 33, 31, 4},
+    {27, 29, 23, 2},
+    {27, 32, 27, 4},
+    {27, 34, 30, 2},
+    {30, 21, 30, 1},
+    {30, 22, 31, 2},
+    {30, 26, 26, 4},
+    {30, 26, 33, 2},
+    {30, 23, 32, 1},
+    {30, 27, 27, 8},
+    {30, 27, 34, 2},
+    {30, 30, 21, 1},
+    {30, 30, 30, 8},
+    {30, 30, 35, 1},
+    {30, 28, 28, 4},
+    {30, 31, 22, 2},
+    {30, 31, 31, 8},
+    {30, 33, 26, 2},
+    {30, 33, 33, 4},
+    {30, 32, 23, 1},
+    {30, 34, 27, 2},
+    {30, 35, 30, 1},
+    {24, 22, 23, 2},
+    {24, 23, 22, 2},
+    {24, 23, 24, 4},
+    {24, 27, 28, 4},
+    {24, 24, 23, 4},
+    {24, 24, 25, 2},
+    {24, 28, 27, 4},
+    {24, 28, 29, 4},
+    {24, 31, 32, 2},
+    {24, 25, 24, 2},
+    {24, 29, 28, 4},
+    {24, 32, 31, 2},
+    {28, 22, 27, 4},
+    {28, 26, 23, 2},
+    {28, 23, 26, 2},
+    {28, 23, 28, 8},
+    {28, 27, 22, 4},
+    {28, 27, 24, 4},
+    {28, 27, 31, 8},
+    {28, 30, 28, 4},
+    {28, 24, 27, 4},
+    {28, 24, 29, 4},
+    {28, 28, 23, 8},
+    {28, 28, 30, 4},
+    {28, 28, 25, 2},
+    {28, 28, 32, 8},
+    {28, 31, 27, 8},
+    {28, 31, 29, 4},
+    {28, 31, 34, 4},
+    {28, 33, 32, 2},
+    {28, 25, 28, 2},
+    {28, 29, 24, 4},
+    {28, 29, 31, 4},
+    {28, 32, 28, 8},
+    {28, 32, 33, 2},
+    {28, 34, 31, 4},
+    {31, 22, 30, 2},
+    {31, 26, 27, 4},
+    {31, 23, 31, 4},
+    {31, 27, 26, 4},
+    {31, 27, 28, 8},
+    {31, 27, 33, 4},
+    {31, 30, 22, 2},
+    {31, 30, 31, 8},
+    {31, 24, 32, 2},
+    {31, 28, 27, 8},
+    {31, 28, 29, 4},
+    {31, 28, 34, 4},
+    {31, 31, 23, 4},
+    {31, 31, 30, 8},
+    {31, 31, 32, 8},
+    {31, 31, 35, 2},
+    {31, 33, 27, 4},
+    {31, 33, 34, 4},
+    {31, 29, 28, 4},
+    {31, 32, 24, 2},
+    {31, 32, 31, 8},
+    {31, 34, 28, 4},
+    {31, 34, 33, 4},
+    {31, 35, 31, 2},
+    {33, 26, 30, 2},
+    {33, 27, 31, 4},
+    {33, 30, 26, 2},
+    {33, 30, 33, 4},
+    {33, 28, 32, 2},
+    {33, 31, 27, 4},
+    {33, 31, 34, 4},
+    {33, 33, 30, 4},
+    {33, 33, 35, 2},
+    {33, 32, 28, 2},
+    {33, 34, 31, 4},
+    {33, 35, 33, 2},
+    {25, 23, 23, 1},
+    {25, 24, 24, 2},
+    {25, 28, 28, 2},
+    {25, 25, 25, 1},
+    {25, 29, 29, 2},
+    {25, 32, 32, 1},
+    {29, 23, 27, 2},
+    {29, 27, 23, 2},
+    {29, 24, 28, 4},
+    {29, 28, 24, 4},
+    {29, 28, 31, 4},
+    {29, 31, 28, 4},
+    {29, 25, 29, 2},
+    {29, 29, 25, 2},
+    {29, 29, 32, 4},
+    {29, 32, 29, 4},
+    {29, 32, 34, 2},
+    {29, 34, 32, 2},
+    {32, 23, 30, 1},
+    {32, 27, 27, 4},
+    {32, 30, 23, 1},
+    {32, 24, 31, 2},
+    {32, 28, 28, 8},
+    {32, 28, 33, 2},
+    {32, 31, 24, 2},
+    {32, 31, 31, 8},
+    {32, 33, 28, 2},
+    {32, 25, 32, 1},
+    {32, 29, 29, 4},
+    {32, 29, 34, 2},
+    {32, 32, 25, 1},
+    {32, 32, 32, 8},
+    {32, 32, 35, 1},
+    {32, 34, 29, 2},
+    {32, 34, 34, 4},
+    {32, 35, 32, 1},
+    {34, 27, 30, 2},
+    {34, 30, 27, 2},
+    {34, 28, 31, 4},
+    {34, 31, 28, 4},
+    {34, 31, 33, 4},
+    {34, 33, 31, 4},
+    {34, 29, 32, 2},
+    {34, 32, 29, 2},
+    {34, 32, 34, 4},
+    {34, 34, 32, 4},
+    {34, 34, 35, 2},
+    {34, 35, 34, 2},
+    {35, 30, 30, 1},
+    {35, 31, 31, 2},
+    {35, 33, 33, 2},
+    {35, 32, 32, 1},
+    {35, 34, 34, 2},
+    {35, 35, 35, 1}
+  };
+
+  int nb4 = nb[Pa];
+  for (int i = 0; i<= nb4; i++)
+    ns4[i] = ns[i];
+
+  int nmax = ns4[nb4];
+  for (int i=0; i<nmax; i++) {
+    pb4[i]        = poly[i][0]-1;
+    pb4[i+nmax]   = poly[i][1]-1;
+    pb4[i+2*nmax] = poly[i][2]-1;
+    pc4[i]        = poly[i][3];
+  }
+}
diff --git a/src/ML-POD/eapod.h b/src/ML-POD/eapod.h
new file mode 100644
index 00000000000..70c6b5e61e3
--- /dev/null
+++ b/src/ML-POD/eapod.h
@@ -0,0 +1,225 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/ Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_EAPOD_H
+#define LMP_EAPOD_H
+
+#include "pointers.h"
+
+#define DDOT ddot_
+#define DGEMV dgemv_
+#define DGEMM dgemm_
+#define DGETRF dgetrf_
+#define DGETRI dgetri_
+#define DSYEV dsyev_
+#define DPOSV dposv_
+
+extern "C" {
+  double DDOT(int *, double *, int *, double *, int *);
+  void DGEMV(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
+             int *);
+  void DGEMM(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *,
+             double *, double *, int *);
+  void DGETRF(int *, int *, double *, int *, int *, int *);
+  void DGETRI(int *, double *, int *, int *, double *, int *, int *);
+  void DSYEV(char *, char *, int *, double *, int *, double *, double *, int *, int *);
+  void DPOSV(char *, int *, int *, double *, int *, double *, int *, int *);
+}
+
+namespace LAMMPS_NS {
+
+class EAPOD : protected Pointers {
+private:
+  int indexmap3(int *indx, int n1, int n2, int n3, int N1, int N2);
+  int crossindices(int *dabf1, int nabf1, int nrbf1, int nebf1,
+         int *dabf2, int nabf2, int nrbf2, int nebf2, int dabf12, int nrbf12);
+  int crossindices(int *ind1, int *ind2, int *dabf1, int nabf1, int nrbf1, int nebf1,
+         int *dabf2, int nabf2, int nrbf2, int nebf2, int dabf12, int nrbf12);
+
+  void init3bodyarray(int *np, int *pq, int *pc, int Pa3);
+
+  void init4bodyarray(int *pa4, int *pb4, int *pc4, int Pa4);
+
+  void init2body();
+
+  void init3body(int Pa3);
+
+  void init4body(int Pa4);
+
+  void snapshots(double *rbf, double *xij, int N);
+
+  void eigenvaluedecomposition(double *Phi, double *Lambda, int N);
+
+  void myneighbors(double *rij, double *x, int *ai, int *aj, int *ti, int *tj,
+        int *jlist, int *pairnumsum, int *atomtype, int *alist, int i);
+
+  void radialbasis(double *rbf, double *rbfx, double *rbfy, double *rbfz, double *rij, double *besselparams, double rin,
+        double rmax, int besseldegree, int inversedegree, int nbesselpars, int N);
+
+  void angularbasis(double *abf, double *abfx, double *abfy, double *abfz, double *rij, double *tm, int *pq, int N, int K);
+
+  void radialangularbasis(double *sumU, double *U, double *Ux, double *Uy, double *Uz,
+        double *rbf, double *rbfx, double *rbfy, double *rbfz, double *abf,
+        double *abfx, double *abfy, double *abfz, double *tm, int *atomtype, int N, int K, int M, int Ne);
+
+  void MatMul(double *c, double *a, double *b, int r1, int c1, int c2);
+
+  void scalarproduct(double *d, double c, int N);
+
+  double dotproduct(double *c, double *d, int ndesc);
+
+  void mvproduct(double *fij, double *c, double *dd, int N, int ndesc);
+
+public:
+  std::vector<std::string> species;
+
+  double rin;
+  double rcut;
+  int true4BodyDesc;
+  
+
+  int nelements; // number of elements
+  int pbc[3];
+  int *elemindex ;
+
+  int onebody;   // one-body descriptors
+  int besseldegree;
+  int inversedegree;
+  int pdegree[2];
+  int nbesselpars;
+  int timing;
+  double comptime[20];
+  double besselparams[3];
+  double *Phi ;    // eigenvectors
+  double *Lambda ; // eigenvalues
+  double *coeff;  // coefficients
+  //double *newcoeff ;  // coefficients
+  double *tmpmem;
+
+  // environmental variables
+  int nClusters; // number of environment clusters
+  int nComponents; // number of principal components
+  //int nNeighbors; // numbe of neighbors
+  int Mdesc; // number of base descriptors 
+  
+  double *Proj; // PCA Projection matrix
+  double *Centroids; // centroids of the clusters
+  double *bd; // base descriptors
+  double *bdd; // derivatives of the base descriptors with respect to the atomic positions
+  double *pd; //  multi-environment descriptors
+  double *pdd; // derivative of the multi-environment descriptors with respect to the atomic positions
+
+  int nproj; // number of elements in projection matrix (nComponents * Mdesc * nelements)
+  int ncentroids; // number of centroids (nComponents * nClusters * nelements)
+
+  int Njmax;
+  int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters)
+  int nCoeffAll; // number of coefficients for all elements = (nl1 + Mdesc*nClusters)*nelements
+  int ncoeff;  // number of coefficients in the input file 
+  int ns;      // number of snapshots for radial basis functions
+  int nd1, nd2, nd3, nd4, nd5, nd6, nd7, nd;   // number of global descriptors
+  int nl1, nl2, nl3, nl4, nl5, nl6, nl7, nl;   // number of local descriptors
+  int nrbf2, nrbf3, nrbf4, nrbfmax;            // number of radial basis functions
+  int nabf3, nabf4;                            // number of angular basis functions
+  int P3, P4;                                  // angular polynomial degrees
+  int K3, K4, Q4;                              // number of monomials
+  int *pn3, *pq3, *pc3;          // arrays to compute 3-body angular basis functions
+  int *pq4, *pa4, *pb4, *pc4; // arrays to compute 3-body angular basis functions
+  int *tmpint;
+  int nintmem; // number of integers in tmpint array
+  int ndblmem; // number of doubles in tmpmem array
+
+  // four-body descriptors
+  int *ind23, *ind32, nrbf23, nabf23, P23, n23, n32, nl23, nd23;
+
+  // five-body descriptors
+  int *ind33, nrbf33, nabf33, P33, n33, nl33, nd33;
+
+  // six-body descriptors
+  int *ind34, *ind43, nrbf34, nabf34, nabf43, P34, n34, n43, nl34, nd34;
+
+  // seven-body descriptors
+  int *ind44, nrbf44, nabf44, P44, n44, nl44, nd44;
+
+  int nld33, nld34, nld44, ngd33, ngd34, ngd44;
+  int *ind33l, *ind33r, *ind34l, *ind34r, *ind44l, *ind44r;
+
+  EAPOD(LAMMPS *, const std::string &pod_file, const std::string &coeff_file, const std::string &proj_file, const std::string &centroids_file);
+
+  EAPOD(LAMMPS *lmp) : Pointers(lmp){};
+  ~EAPOD() override;
+
+  void print_matrix(const char* desc, int m, int n, int* a, int lda );
+  void print_matrix(const char* desc, int m, int n, double* a, int lda );
+
+  void read_pod_file(std::string pod_file);
+  int read_coeff_file(std::string coeff_file);
+  int read_projection_matrix(std::string proj_file);
+  int read_centroids(std::string centroids_file);
+
+  int estimate_temp_memory(int Nj);    
+  void free_temp_memory();  
+  void allocate_temp_memory(int Nj);
+
+  //void mknewcoeff();
+
+  void mknewcoeff(double *c, int nc);
+
+  void twobodydescderiv(double *d2, double *dd2, double *rbf, double *rbfx,
+        double *rbfy, double *rbfz, int *tj, int N);
+
+  void threebodydesc(double *d3, double *sumU, int N);
+
+  void threebodydescderiv(double *dd3, double *sumU, double *Ux, double *Uy, double *Uz,
+        int *atomtype, int N);
+
+  void fourbodydescderiv(double *d4, double *dd4, double *sumU, double *Ux, double *Uy, double *Uz,
+      int *atomtype, int N);
+
+  void descriptors(double *gd, double *gdd, double *basedesc, double *probdesc, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom);
+
+  void descriptors(double *gd, double *gdd, double *basedesc, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom);
+
+  void peratombase_descriptors(double *bd, double *bdd, double *rij, double *temp,
+        int *ti, int *tj, int Nj);
+
+  void peratomenvironment_descriptors(double *P, double *dP_dR, double *B, double *dB_dR, double *tmp, int elem, int nNeighbors);
+
+  void base_descriptors(double *basedesc, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom);
+
+  void descriptors(double *basedesc, double *probdesc, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom);
+  
+  double peratomenergyforce(double *fij, double *rij, double *temp, int *ti, int *tj, int Nj);
+
+  double energyforce(double *force, double *x, int *atomtype, int *alist,
+          int *jlist, int *pairnumsum, int natom);
+
+  void tallyforce(double *force, double *fij,  int *ai, int *aj, int N);
+
+  void fourbodydesc23(double* d23, double* d2, double *d3);
+  void fourbodydescderiv23(double* dd23, double* d2, double *d3, double* dd2, double *dd3, int N);
+
+  void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12);
+  void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int n12, int N);  
+
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+
diff --git a/src/ML-POD/fitpod_command.cpp b/src/ML-POD/fitpod_command.cpp
index ef39962e0bf..b21bb8cb37a 100644
--- a/src/ML-POD/fitpod_command.cpp
+++ b/src/ML-POD/fitpod_command.cpp
@@ -15,9 +15,10 @@
    Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL)
 ------------------------------------------------------------------------- */
 
+
 #include "fitpod_command.h"
 
-#include "mlpod.h"
+#include "eapod.h"
 
 #include "comm.h"
 #include "error.h"
@@ -28,15 +29,18 @@
 #include <algorithm>
 #include <cmath>
 #include <random>
-#include <utility>
+#include <string>
+#include <vector>
+#include <unordered_map>
 
 using namespace LAMMPS_NS;
 using MathSpecial::powint;
 
-static constexpr int MAXLINE = 1024;
+#define MAXLINE 1024
+
 static constexpr double SMALL = 1.0e-10;
 
-FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), podptr(nullptr)
+FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), fastpodptr(nullptr)
 {
 }
 
@@ -46,48 +50,115 @@ void FitPOD::command(int narg, char **arg)
 
   std::string pod_file = std::string(arg[0]);  // pod input file
   std::string data_file = std::string(arg[1]); // data input file
-  std::string coeff_file; // coefficient input file
+  std::string coeff_file, proj_file, cent_file; // coefficient input files
 
   if (narg > 2)
     coeff_file = std::string(arg[2]); // coefficient input file
   else
     coeff_file = "";
+  if (narg > 3)
+    proj_file = std::string(arg[3]); // projection input file
+  else
+    proj_file = "";
+  if (narg > 4)
+    cent_file = std::string(arg[4]); // centroid input file
+  else
+    cent_file = "";
+  
+  fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, cent_file);        
+  
+  desc.nCoeffAll = fastpodptr->nCoeffAll;
+  desc.nClusters = fastpodptr->nClusters;
+  read_data_files(data_file, fastpodptr->species);
+
+  estimate_memory_neighborstruct(traindata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements);
+  estimate_memory_neighborstruct(testdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements);
+  if (desc.nClusters > 1) estimate_memory_neighborstruct(envdata, fastpodptr->pbc, fastpodptr->rcut, fastpodptr->nelements);
+  allocate_memory_neighborstruct();
+  estimate_memory_fastpod(traindata);
+  estimate_memory_fastpod(testdata);  
+  allocate_memory_descriptorstruct(fastpodptr->nCoeffAll);
+
+  if (coeff_file != "") podArrayCopy(desc.c, fastpodptr->coeff, fastpodptr->nCoeffAll);
+  
+  if (compute_descriptors==0) {
+
+    if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) {
+      environment_cluster_calculation(envdata);
+      //error->all(FLERR, "stop after enviroment_cluster_calculation");
+        memory->destroy(envdata.lattice);
+        memory->destroy(envdata.energy);
+        memory->destroy(envdata.stress);
+        memory->destroy(envdata.position);
+        memory->destroy(envdata.force);
+        memory->destroy(envdata.atomtype);
+        memory->destroy(envdata.we);
+        memory->destroy(envdata.wf);
+    }
 
-  podptr = new MLPOD(lmp, pod_file, coeff_file);
-  read_data_files(data_file, podptr->pod.species);
-
-  if ((int) traindata.data_path.size() > 1)
-    allocate_memory(traindata);
-  else if ((int) testdata.data_path.size() > 1)
-    allocate_memory(testdata);
+    // compute POD coefficients using least-squares method
 
-  // get POD coefficients from an input file
+    if (coeff_file == "") least_squares_fit(traindata);
+    //error->all(FLERR, "stop after least_squares_fit");
 
-  if (coeff_file != "") podArrayCopy(desc.c, podptr->pod.coeff, podptr->pod.nd);
+    // calculate errors for the training data set
 
-  // compute POD coefficients using least-squares method
+    if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) )
+      error_analysis(traindata, desc.c);
 
-  least_squares_fit(traindata);
+    //error->all(FLERR, "stop after error_analysis");
 
-  // calculate errors for the training data set
+    // calculate energy and force for the training data set
 
-  if ((traindata.training_analysis) && ((int) traindata.data_path.size() > 1) )
-    error_analysis(traindata, desc.c);
+    if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) )
+      energyforce_calculation(traindata, desc.c);
 
-  // calculate errors for the test data set
+    if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0)))
+    {
+      // calculate errors for the test data set
 
-  if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path))
-    error_analysis(testdata, desc.c);
+      if ((testdata.test_analysis) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) ) {
+        error_analysis(testdata, desc.c);
+      }
 
-  // calculate energy and force for the training data set
+      // calculate energy and force for the test data set
 
-  if ((traindata.training_calculation) && ((int) traindata.data_path.size() > 1) )
-    energyforce_calculation(traindata, desc.c);
+      if ((testdata.test_analysis) && (testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) )
+        energyforce_calculation(testdata, desc.c);
 
-  // calculate energy and force for the test data set
+      // deallocate testing data
 
-  if ((testdata.test_calculation) && ((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path) )
-    energyforce_calculation(testdata, desc.c);
+      if ((int) testdata.data_path.size() > 1 && (testdata.test_analysis) && (testdata.fraction > 0) ){
+        memory->destroy(testdata.lattice);
+        memory->destroy(testdata.energy);
+        memory->destroy(testdata.stress);
+        memory->destroy(testdata.position);
+        memory->destroy(testdata.force);
+        memory->destroy(testdata.atomtype);
+        memory->destroy(testdata.we);
+        memory->destroy(testdata.wf);
+      }
+    }
+  }
+  else if (compute_descriptors>0) {
+    // compute and save POD descriptors
+    descriptors_calculation(traindata);
+
+    if (!((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0)))
+    {
+      if ((int) testdata.data_path.size() > 1){
+          descriptors_calculation(testdata);
+          memory->destroy(testdata.lattice);
+          memory->destroy(testdata.energy);
+          memory->destroy(testdata.stress);
+          memory->destroy(testdata.position);
+          memory->destroy(testdata.force);
+          memory->destroy(testdata.atomtype);
+          memory->destroy(testdata.we);
+          memory->destroy(testdata.wf);
+        }
+    }
+  }
 
   // deallocate training data
 
@@ -98,44 +169,36 @@ void FitPOD::command(int narg, char **arg)
     memory->destroy(traindata.position);
     memory->destroy(traindata.force);
     memory->destroy(traindata.atomtype);
-  }
-
-  // deallocate testing data
-
-  if ((int) testdata.data_path.size() > 1 && (testdata.data_path != traindata.data_path)){
-    memory->destroy(testdata.lattice);
-    memory->destroy(testdata.energy);
-    memory->destroy(testdata.stress);
-    memory->destroy(testdata.position);
-    memory->destroy(testdata.force);
-    memory->destroy(testdata.atomtype);
+    memory->destroy(traindata.we);
+    memory->destroy(traindata.wf);
   }
 
   // deallocate descriptors
 
-  memory->destroy(desc.gd);
-  memory->destroy(desc.gdd);
   memory->destroy(desc.A);
   memory->destroy(desc.b);
   memory->destroy(desc.c);
-  memory->destroy(desc.tmpint);
-
-  // deallocate neighbor data
+  memory->destroy(desc.bd);
+  memory->destroy(desc.pd);
+  memory->destroy(desc.gd);
+  memory->destroy(desc.gdd);
 
+  // // deallocate neighbor data
   memory->destroy(nb.alist);
   memory->destroy(nb.pairnum);
   memory->destroy(nb.pairnum_cumsum);
   memory->destroy(nb.pairlist);
   memory->destroy(nb.y);
-  delete podptr;
-}
 
-/* ---------------------------------------------------------------------- */
+  delete fastpodptr;
+}
 
 int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
-                             std::string &file_extension, std::string &test_path,
+                             std::string &file_extension, std::string &env_path, std::string &test_path,
                              std::string &training_path, std::string &filenametag,
-                             const std::string &data_file)
+                             const std::string &data_file, std::string &group_weight_type,
+                             std::unordered_map<std::string, double> &we_map,
+                             std::unordered_map<std::string, double> &wf_map)
 {
   int precision = 8;
 
@@ -150,8 +213,7 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
 
   // loop through lines of training data file and parse keywords
 
-  char line[MAXLINE] = {'\0'};
-  char *ptr;
+  char line[MAXLINE],*ptr;
   int eof = 0;
   while (true) {
     if (comm->me == 0) {
@@ -177,7 +239,7 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
 
     if (words.size() == 0) continue;
 
-    const auto &keywd = words[0];
+    auto keywd = words[0];
 
     if (words.size() != 2)
       error->one(FLERR,"Improper POD file.", utils::getsyserror());
@@ -197,6 +259,8 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
     if (keywd == "randomize_test_data_set") fitting_weights[10] = utils::numeric(FLERR,words[1],false,lmp);
     if (keywd == "fitting_regularization_parameter") fitting_weights[11] = utils::numeric(FLERR,words[1],false,lmp);
     if (keywd == "precision_for_pod_coefficients") precision = utils::inumeric(FLERR,words[1],false,lmp);
+    if (keywd == "save_pod_descriptors") save_descriptors = utils::inumeric(FLERR,words[1],false,lmp);
+    if (keywd == "compute_pod_descriptors") compute_descriptors = utils::inumeric(FLERR,words[1],false,lmp);
 
     // other settings
 
@@ -204,7 +268,64 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
     if (keywd == "file_extension") file_extension = words[1];
     if (keywd == "path_to_training_data_set") training_path = words[1];
     if (keywd == "path_to_test_data_set") test_path = words[1];
+    if (keywd == "path_to_enviroment_configuration_set") env_path = words[1];
     if (keywd == "basename_for_output_files") filenametag = words[1];
+
+    // group weight table
+    if (keywd == "group_weights") group_weight_type = words[1];
+    if (std::strcmp(group_weight_type.c_str(), "table") == 0){
+      // Read the table as a hash map.
+      // Get next line.
+      if (comm->me == 0) {
+        ptr = fgets(line,MAXLINE,fpdata);
+        if (ptr == nullptr) {
+          eof = 1;
+          fclose(fpdata);
+        }
+      }
+      MPI_Bcast(&eof,1,MPI_INT,0,world);
+      if (eof) break;
+      MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+      // Tokenize.
+      //std::vector<std::string> words;
+      try {
+        words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
+      } catch (TokenizerException &) {
+        // ignore
+      }
+      int numwords = words.size();
+
+      // Loop over group table entries.
+      while (numwords == 3){
+
+        // Insert in map.
+        double we = atof(words[1].c_str());
+        we_map[words[0]] = atof(words[1].c_str());
+        double wf = atof(words[2].c_str());
+        wf_map[words[0]] = atof(words[2].c_str());
+
+        // Get next line.
+        if (comm->me == 0) {
+          ptr = fgets(line,MAXLINE,fpdata);
+          if (ptr == nullptr) {
+            eof = 1;
+            fclose(fpdata);
+          }
+        }
+        MPI_Bcast(&eof,1,MPI_INT,0,world);
+        if (eof) break;
+        MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+        // Tokenize.
+        //std::vector<std::string> words;
+        try {
+          words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
+        } catch (TokenizerException &) {
+          // ignore
+        }
+        numwords = words.size();
+      }
+
+    }
   }
 
   if (comm->me == 0) {
@@ -213,6 +334,8 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
     utils::logmesg(lmp, "file extension: {}\n", file_extension);
     utils::logmesg(lmp, "path to training data set: {}\n", training_path);
     utils::logmesg(lmp, "path to test data set: {}\n", test_path);
+    utils::logmesg(lmp, "path to enviroment configuration set: {}\n", env_path);
+    utils::logmesg(lmp, "basename for output files: {}\n", filenametag);
     utils::logmesg(lmp, "training fraction: {}\n", fitting_weights[7]);
     utils::logmesg(lmp, "test fraction: {}\n", fitting_weights[8]);
     utils::logmesg(lmp, "randomize training data set: {}\n", fitting_weights[9]);
@@ -224,14 +347,15 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
     utils::logmesg(lmp, "fitting weight for energy: {}\n", fitting_weights[0]);
     utils::logmesg(lmp, "fitting weight for force: {}\n", fitting_weights[1]);
     utils::logmesg(lmp, "fitting weight for stress: {}\n", fitting_weights[2]);
-    utils::logmesg(lmp, "fitting regularization parameter: {}\n", fitting_weights[11]);
+    utils::logmesg(lmp, "save pod descriptors: {}\n", save_descriptors);
+    utils::logmesg(lmp, "compute pod descriptors: {}\n", compute_descriptors);
     utils::logmesg(lmp, "**************** End of Data File ****************\n");
   }
 
   return precision;
 }
 
-void FitPOD::get_exyz_files(std::vector<std::string>& files, const std::string &datapath,
+void FitPOD::get_exyz_files(std::vector<std::string>& files, std::vector<std::string> &group_names, const std::string &datapath,
                              const std::string &extension)
 {
   auto allfiles = platform::list_directory(datapath);
@@ -239,6 +363,11 @@ void FitPOD::get_exyz_files(std::vector<std::string>& files, const std::string &
   for (const auto &fname : allfiles) {
     if (utils::strmatch(fname, fmt::format(".*\\.{}$", extension)))
       files.push_back(datapath + platform::filepathsep + fname);
+      int start_pos_erase = fname.find(extension) - 1;
+      int ext_size = extension.size() + 1;
+      //std::string substr = fname.erase(start_pos_erase, ext_size);
+      std::string substr = fname.substr(0, start_pos_erase);
+      group_names.push_back(substr);
   }
 }
 
@@ -252,8 +381,7 @@ int FitPOD::get_number_atom_exyz(std::vector<int>& num_atom, int& num_atom_sum,
       error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror());
   }
 
-  char line[MAXLINE] = {'\0'};
-  char *ptr;
+  char line[MAXLINE],*ptr;
   int eof = 0;
   int num_configs = 0;
   num_atom_sum = 0;
@@ -313,8 +441,8 @@ int FitPOD::get_number_atoms(std::vector<int>& num_atom, std::vector<int> &num_a
   return num_atom_all;
 }
 
-void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, double *pos, double *forces,
-    int *atomtype, std::string file, std::vector<std::string> species)
+void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, double *pos, double *forces,
+    int *atomtype, std::string file, std::vector<std::string> species, double we_group, double wf_group)
 {
 
   std::string filename = std::move(file);
@@ -325,8 +453,7 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou
       error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror());
   }
 
-  char line[MAXLINE] = {'\0'};
-  char *ptr;
+  char line[MAXLINE],*ptr;
   int eof = 0;
   int cfi = 0;
   int nat = 0;
@@ -385,54 +512,62 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou
         }
       }
 
-      // find the word containing "energy"
+      if (compute_descriptors == 0) {
 
-      it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; });
+        // find the word containing "energy"
 
-      // get index of element from iterator
+        it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("nergy") != std::string::npos; });
 
-      index = std::distance(words.begin(), it);
+        // get index of element from iterator
 
-      if (words[index].find("=") != std::string::npos) {
+        index = std::distance(words.begin(), it);
 
-        // energy is after "=" inside this string
+        if (words[index].find("=") != std::string::npos) {
 
-        std::size_t found = words[index].find("=");
-        energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp);
-      } else {
+          // energy is after "=" inside this string
 
-        // energy is at index + 2
+          std::size_t found = words[index].find("=");
+          energy[cfi] = utils::numeric(FLERR,words[index].substr(found+1),false,lmp);
+        } else {
 
-        energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp);
+          // energy is at index + 2
 
-      }
+          energy[cfi] = utils::numeric(FLERR,words[index+2],false,lmp);
+        }
 
-      // find the word containing "stress"
+        // find the word containing "stress"
 
-      it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; });
+        it = std::find_if(words.begin(), words.end(), [](const std::string& str) { return str.find("tress") != std::string::npos; });
 
-      // get index of element from iterator
+        // get index of element from iterator
 
-      index = std::distance(words.begin(), it);
+        index = std::distance(words.begin(), it);
 
-      if (words[index].find("=") != std::string::npos) {
+        if (index < std::distance(words.begin(), words.end())) {
+          if (words[index].find("=") != std::string::npos) {
 
-        // stress numbers start at index + 1
+            // stress numbers start at index + 1
 
-        for (int k = 0; k < 9; k++) {
-          stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp);
-        }
-      } else {
+            for (int k = 0; k < 9; k++) {
+              stress[k + 9*cfi] = utils::numeric(FLERR,words[index+1+k],false,lmp);
+            }
+          } else {
 
-        // lattice numbers start at index + 2
+            // lattice numbers start at index + 2
 
-        for (int k = 0; k < 9; k++) {
-          stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp);
+            for (int k = 0; k < 9; k++) {
+              stress[k + 9*cfi] = utils::numeric(FLERR,words[index+2+k],false,lmp);
+            }
+          }
         }
       }
 
-      cfi += 1;
+      // set fitting weights for this config
 
+      we[cfi] = we_group;
+      wf[cfi] = wf_group;
+
+      cfi += 1;
     }
 
     // loop over atoms
@@ -443,18 +578,25 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou
         if (species[ii] == words[0])
           atomtype[nat] = ii+1;
 
-      for (int k = 0; k < 6; k++) {
-        if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp);
-        if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp);
+      if (compute_descriptors> 0) {
+        for (int k = 0; k < 3; k++)
+          pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp);
       }
+      else {
+        for (int k = 0; k < 6; k++) {
+          if (k <= 2) pos[k + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp);
+          if (k > 2 ) forces[k-3 + 3*nat] = utils::numeric(FLERR,words[1+k],false,lmp);
+        }
+      }
+
       nat += 1;
     }
   }
 }
 
-void FitPOD::get_data(datastruct &data, const std::vector<std::string>& species)
+void FitPOD::get_data(datastruct &data, const std::vector<std::string> &species)
 {
-  get_exyz_files(data.data_files, data.data_path, data.file_extension);
+  get_exyz_files(data.data_files, data.group_names, data.data_path, data.file_extension);
   data.num_atom_sum = get_number_atoms(data.num_atom, data.num_atom_each_file, data.num_config, data.data_files);
   data.num_config_sum = data.num_atom.size();
   size_t maxname = 9;
@@ -486,18 +628,35 @@ void FitPOD::get_data(datastruct &data, const std::vector<std::string>& species)
   memory->create(data.lattice, 9*n, "fitpod:lattice");
   memory->create(data.stress, 9*n, "fitpod:stress");
   memory->create(data.energy, n, "fitpod:energy");
+  // Group weights have same size as energy.
+  memory->create(data.we, n, "fitpod:we");
+  memory->create(data.wf, n, "fitpod:wf");
+  
   n = data.num_atom_sum;
   memory->create(data.position, 3*n, "fitpod:position");
   memory->create(data.force, 3*n, "fitpod:force");
   memory->create(data.atomtype, n, "fitpod:atomtype");
 
+  double we_group, wf_group; // group weights
   int nfiles = data.data_files.size(); // number of files
   int nconfigs = 0;
   int natoms = 0;
   for (int i=0; i<nfiles; i++) {
-    read_exyz_file(&data.lattice[9*nconfigs], &data.stress[9*nconfigs], &data.energy[nconfigs],
+    std::string group_name = data.group_names[i];
+    // If weight maps have this group, assign weight based on map.
+    // Else assign weight based on global value.
+    if (data.we_map.find(group_name) != data.we_map.end())
+    {
+      we_group = data.we_map[group_name];
+      wf_group = data.wf_map[group_name];
+    } else {
+      we_group = data.fitting_weights[0];
+      wf_group = data.fitting_weights[1];
+    }
+    //utils::logmesg(lmp, "Read xyz file: {}\n", group_name);
+    read_exyz_file(&data.lattice[9*nconfigs], &data.stress[9*nconfigs], &data.energy[nconfigs], &data.we[nconfigs], &data.wf[nconfigs],
         &data.position[3*natoms], &data.force[3*natoms], &data.atomtype[natoms],
-        data.data_files[i], species);
+        data.data_files[i], species, we_group, wf_group);
     nconfigs += data.num_config[i];
     natoms += data.num_atom_each_file[i];
   }
@@ -579,8 +738,9 @@ std::vector<int> FitPOD::shuffle(int start_in, int end_in, int num_in)
   for (int i = 0; i<sz; i++)
     myvector[i] = start_in + i;
 
-  unsigned seed = (unsigned) platform::walltime()*1.0e9;
-  std::shuffle (myvector.begin(), myvector.end(), std::default_random_engine(seed));
+  //unsigned seed = (unsigned) platform::walltime()*1.0e9;
+  //std::shuffle (myvector.begin(), myvector.end(), std::default_random_engine(seed));
+  std::shuffle (myvector.begin(), myvector.end(), std::random_device());
 
   std::vector<int> shuffle_vec(num_in);
   for (int i = 0; i<num_in; i++)
@@ -644,11 +804,15 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data)
   podCumsum(&newdata.num_config_cumsum[0], &newdata.num_config[0], nfiles+1);
   newdata.num_config_sum = newdata.num_atom.size();
 
-  int n = data.num_config_sum;
+  int n = newdata.num_config_sum;
   memory->create(newdata.lattice, 9*n, "fitpod:newdata_lattice");
   memory->create(newdata.stress, 9*n, "fitpod:newdata_stress");
   memory->create(newdata.energy, n, "fitpod:newdata_energy");
-  n = data.num_atom_sum;
+  // Group weights have same size as energy.
+  memory->create(newdata.we, n, "fitpod:we");
+  memory->create(newdata.wf, n, "fitpod:wf");
+  
+  n = newdata.num_atom_sum;
   memory->create(newdata.position, 3*n, "fitpod:newdata_position");
   memory->create(newdata.force, 3*n, "fitpod:newdata_force");
   memory->create(newdata.atomtype, n, "fitpod:newdata_atomtype");
@@ -673,6 +837,8 @@ void FitPOD::select_data(datastruct &newdata, const datastruct &data)
       double *force = &data.force[dim*natom_cumsum];
 
       newdata.energy[cn] = data.energy[ci];
+      newdata.we[cn] = data.we[ci];
+      newdata.wf[cn] = data.wf[ci];
       for (int j=0; j<9; j++) {
         newdata.stress[j+9*cn] = data.stress[j+9*ci];
         newdata.lattice[j+9*cn] = data.lattice[j+9*ci];
@@ -719,8 +885,8 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vector<std
 
   // read data input file to datastruct
 
-  data.precision = read_data_file(data.fitting_weights, data.file_format, data.file_extension,
-                      testdata.data_path, data.data_path, data.filenametag, data_file);
+  data.precision = read_data_file(data.fitting_weights, data.file_format, data.file_extension, envdata.data_path,
+                      testdata.data_path, data.data_path, data.filenametag, data_file, data.group_weight_type, data.we_map, data.wf_map);
 
   data.training_analysis = (int) data.fitting_weights[3];
   data.test_analysis = (int) data.fitting_weights[4];
@@ -764,25 +930,70 @@ void FitPOD::read_data_files(const std::string& data_file, const std::vector<std
     memory->destroy(data.atomtype);
   }
 
-  if (((int) testdata.data_path.size() > 1) && (testdata.data_path != traindata.data_path)) {
+  testdata.fraction = traindata.fitting_weights[8];
+  testdata.test_analysis = traindata.test_analysis;
+  testdata.filenametag =  traindata.filenametag;
+
+  if (((int) envdata.data_path.size() > 1) && (desc.nClusters > 1)) {
+    envdata.filenametag =  traindata.filenametag;
+    envdata.file_format = traindata.file_format;
+    envdata.file_extension = traindata.file_extension;
+    int tmp = compute_descriptors;
+    compute_descriptors = 1;
+    if (comm->me == 0)
+      utils::logmesg(lmp, "**************** Begin of Enviroment Configuration Set ****************\n");
+    get_data(envdata, species);    
+    if (comm->me == 0)
+      utils::logmesg(lmp, "**************** End of Enviroment Configuration Set ****************\n");
+    compute_descriptors = tmp;
+  }
+
+  if ((testdata.data_path == traindata.data_path) && (testdata.fraction == 1.0) && (traindata.fraction == 1.0)) {
+    testdata.data_path = traindata.data_path;
+  }
+  else if (((int) testdata.data_path.size() > 1) && (testdata.fraction > 0) && (testdata.test_analysis)) {
     testdata.training = 0;
     testdata.file_format = traindata.file_format;
     testdata.file_extension = traindata.file_extension;
     testdata.training_analysis = traindata.training_analysis;
-    testdata.test_analysis = traindata.test_analysis;
     testdata.training_calculation = traindata.training_calculation;
     testdata.test_calculation = traindata.test_calculation;
-    testdata.fraction = traindata.fitting_weights[8];
     testdata.randomize = (int) traindata.fitting_weights[10];
-    if (comm->me == 0)
-      utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n");
-    get_data(testdata, species);
-    if (comm->me == 0)
-      utils::logmesg(lmp, "**************** End of Test Data Set ****************\n");
+
+    if (testdata.fraction >= 1.0) {
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n");
+      get_data(testdata, species);
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** End of Test Data Set ****************\n");
+    }
+    else {
+      datastruct datatm;
+      testdata.copydatainfo(datatm);
+
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** Begin of Test Data Set ****************\n");
+      get_data(datatm, species);
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** End of Test Data Set ****************\n");
+
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** Begin of Select Test Data Set ****************\n");
+      select_data(testdata, datatm);
+      if (comm->me == 0)
+        utils::logmesg(lmp, "**************** End of Select Test Data Set ****************\n");
+
+      memory->destroy(datatm.lattice);
+      memory->destroy(datatm.energy);
+      memory->destroy(datatm.stress);
+      memory->destroy(datatm.position);
+      memory->destroy(datatm.force);
+      memory->destroy(datatm.atomtype);
+    }
   }
   else {
     testdata.data_path = traindata.data_path;
-  }
+  }    
 }
 
 int FitPOD::latticecoords(double *y, int *alist, double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx)
@@ -868,34 +1079,10 @@ int FitPOD::podfullneighborlist(double *y, int *alist, int *neighlist, int *numn
   return nn;
 }
 
-void FitPOD::allocate_memory(const datastruct &data)
+void FitPOD::estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements)
 {
-  int nd = podptr->pod.nd;
-  memory->create(desc.gd, nd, "fitpod:desc_gd");
-  memory->create(desc.A, nd*nd, "fitpod:desc_A");
-  memory->create(desc.b, nd, "fitpod:desc_b");
-  memory->create(desc.c, nd, "fitpod:desc_c");
-  podArraySetValue(desc.A, 0.0, nd*nd);
-  podArraySetValue(desc.b, 0.0, nd);
-  podArraySetValue(desc.c, 0.0, nd);
-
   int dim = 3;
   int natom_max = data.num_atom_max;
-  int nd1 = podptr->pod.nd1;
-  int nd2 = podptr->pod.nd2;
-  int nd3 = podptr->pod.nd3;
-  int nd4 = podptr->pod.nd4;
-  int nelements = podptr->pod.nelements;
-  int nbesselpars = podptr->pod.nbesselpars;
-  int nrbf2 = podptr->pod.nbf2;
-  int nabf3 = podptr->pod.nabf3;
-  int nrbf3 = podptr->pod.nrbf3;
-  int *pdegree2 = podptr->pod.twobody;
-  int *pdegree3 = podptr->pod.threebody;
-  int *pbc = podptr->pod.pbc;
-  double rcut = podptr->pod.rcut;
-
-  int Nj=0, Nij=0;
   int m=0, n=0, p=0, nl=0, ny=0, na=0, np=0;
 
   for (int ci=0; ci<(int) data.num_atom.size(); ci++)
@@ -917,22 +1104,53 @@ void FitPOD::allocate_memory(const datastruct &data)
     np = MAX(np, natom*natom*nl);
   }
 
-  memory->create(nb.y, ny, "fitpod:nb_y");
-  memory->create(nb.alist, na, "fitpod:nb_alist");
-  memory->create(nb.pairnum, natom_max, "fitpod:nb_pairnum");
-  memory->create(nb.pairnum_cumsum, natom_max+1, "fitpod:nb_pairnum_cumsum");
-  memory->create(nb.pairlist, np, "fitpod:nb_pairlist");
-
-  nb.natom_max = natom_max;
+  nb.natom_max = MAX(nb.natom_max, natom_max);
   nb.sze = nelements*nelements;
-  nb.sza = na;
-  nb.szy = ny;
-  nb.szp = np;
+  nb.sza = MAX(nb.sza, na);
+  nb.szy = MAX(nb.szy, ny);
+  nb.szp = MAX(nb.szp, np);
+}
 
-  if (comm->me == 0)
-    utils::logmesg(lmp,"**************** Begin of Memory Allocation ****************\n");
+void FitPOD::allocate_memory_neighborstruct()
+{
+  memory->create(nb.y, nb.szy, "fitpod:nb_y");
+  memory->create(nb.alist, nb.sza, "fitpod:nb_alist");
+  memory->create(nb.pairnum, nb.natom_max, "fitpod:nb_pairnum");
+  memory->create(nb.pairnum_cumsum, nb.natom_max+1, "fitpod:nb_pairnum_cumsum");
+  memory->create(nb.pairlist, nb.szp, "fitpod:nb_pairlist");
+}
+
+void FitPOD::allocate_memory_descriptorstruct(int nCoeffAll)
+{  
+  memory->create(desc.bd, nb.natom_max*fastpodptr->Mdesc, "fitpod:desc_ld");
+  memory->create(desc.pd, nb.natom_max*fastpodptr->nClusters, "fitpod:desc_ld");
+  memory->create(desc.gd, nCoeffAll, "fitpod:desc_gd");
+  memory->create(desc.A, nCoeffAll*nCoeffAll, "fitpod:desc_A");
+  memory->create(desc.b, nCoeffAll, "fitpod:desc_b");
+  memory->create(desc.c, nCoeffAll, "fitpod:desc_c");
+  memory->create(desc.gdd, desc.szd, "fitpod:desc_gdd");
+  podArraySetValue(desc.A, 0.0, nCoeffAll*nCoeffAll);
+  podArraySetValue(desc.b, 0.0, nCoeffAll);
+  podArraySetValue(desc.c, 0.0, nCoeffAll);
+
+  if (comm->me == 0) {
+    utils::logmesg(lmp, "**************** Begin of Memory Allocation ****************\n");
+    utils::logmesg(lmp, "maximum number of atoms in periodic domain: {}\n", nb.natom_max);
+    utils::logmesg(lmp, "maximum number of atoms in extended domain: {}\n", nb.sza);
+    utils::logmesg(lmp, "maximum number of neighbors in extended domain: {}\n", nb.szp);
+    utils::logmesg(lmp, "size of double memory: {}\n", desc.szd);
+    utils::logmesg(lmp, "size of descriptor matrix: {} x {}\n", nCoeffAll, nCoeffAll);
+    utils::logmesg(lmp, "**************** End of Memory Allocation ****************\n");
+  }
+}
 
-  int szd = 0, szi=0, szsnap=0;
+void FitPOD::estimate_memory_fastpod(const datastruct &data)
+{
+  int dim = 3;
+  int *pbc = fastpodptr->pbc;
+  double rcut = fastpodptr->rcut;
+
+  int Nij=0, Nijmax=0;
   for (int ci=0; ci<(int) data.num_atom.size(); ci++)
   {
     int natom = data.num_atom[ci];
@@ -944,57 +1162,46 @@ void FitPOD::allocate_memory(const datastruct &data)
     double *a3 = &lattice[6];
 
     Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum, x, a1, a2, a3, rcut, pbc, natom);
-
-    int ns2 = pdegree2[0]*nbesselpars + pdegree2[1];
-    int ns3 = pdegree3[0]*nbesselpars + pdegree3[1];
-
-    int szd1 = 3*Nij+ (1+dim)*Nij*MAX(nrbf2+ns2,nrbf3+ns3) + (nabf3+1)*7;
-    int szi1 = 6*Nij + 2*natom+1 + (Nj-1)*Nj;
-    szd = MAX(szd, szd1);
-    szi = MAX(szi, szi1);
-
-    if (podptr->sna.twojmax>0) {
-      szd1 = 0;
-      szd1 += Nij*dim; // rij
-      szd1 += MAX(2*podptr->sna.idxu_max*Nij, 2*podptr->sna.idxz_max*podptr->sna.ndoubles*natom); // (Ur, Ui) and (Zr, Zi)
-      szd1 += 2*podptr->sna.idxu_max*dim*Nij; // dUr, dUi
-      szd1 += MAX(podptr->sna.idxb_max*podptr->sna.ntriples*dim*Nij, 2*podptr->sna.idxu_max*podptr->sna.nelements*natom); // dblist and (Utotr, Utoti)
-      szsnap = MAX(szsnap, szd1);
-    }
+    Nijmax = MAX(Nijmax, Nij);
   }
 
-  szd = MAX(szsnap, szd);
-  szd = MAX(natom_max*(nd1+nd2+nd3+nd4) + szd, dim*natom_max*(nd-nd1-nd2-nd3-nd4));
-  szd = dim*natom_max*(nd1+nd2+nd3+nd4) + szd;
+  desc.szd = MAX(desc.szd, 3*Nijmax*fastpodptr->nCoeffAll);
+}
 
-  // gdd includes linear descriptors derivatives, quadratic descriptors derivatives and temporary memory
+void FitPOD::local_descriptors_fastpod(const datastruct &data, int ci)
+{
+  int dim = 3;
+  int *pbc = fastpodptr->pbc;
+  double rcut = fastpodptr->rcut;
+
+  int natom = data.num_atom[ci];
+  int natom_cumsum = data.num_atom_cumsum[ci];
+  int *atomtype = &data.atomtype[natom_cumsum];
+  double *position = &data.position[dim*natom_cumsum];
+  double *lattice = &data.lattice[9*ci];
+  double *a1 = &lattice[0];
+  double *a2 = &lattice[3];
+  double *a3 = &lattice[6];
 
-  memory->create(desc.gdd, szd, "fitpod:desc_gdd");
-  memory->create(desc.tmpint, szi, "fitpod:desc_tmpint");
-  desc.szd = szd;
-  desc.szi = szi;
+  // neighbor list
+  podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum,
+          position, a1, a2, a3, rcut, pbc, natom);
 
-  if (comm->me == 0) {
-    utils::logmesg(lmp, "maximum number of atoms in periodic domain: {}\n", natom_max);
-    utils::logmesg(lmp, "maximum number of atoms in extended domain: {}\n", nb.sza);
-    utils::logmesg(lmp, "maximum number of neighbors in extended domain: {}\n", nb.szp);
-    utils::logmesg(lmp, "size of double memory: {}\n", szd);
-    utils::logmesg(lmp, "size of int memory: {}\n", szi);
-    utils::logmesg(lmp, "size of descriptor matrix: {} x {}\n", nd, nd);
-    utils::logmesg(lmp, "**************** End of Memory Allocation ****************\n");
+  if (desc.nClusters > 1) {
+    fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, desc.pd, nb.y, atomtype, nb.alist, nb.pairlist,
+            nb.pairnum_cumsum, natom);
+  }
+  else {
+    fastpodptr->descriptors(desc.gd, desc.gdd, desc.bd, nb.y, atomtype, nb.alist, nb.pairlist,
+            nb.pairnum_cumsum, natom);
   }
 }
 
-void FitPOD::linear_descriptors(const datastruct &data, int ci)
+void FitPOD::base_descriptors_fastpod(const datastruct &data, int ci)
 {
   int dim = 3;
-  int nd1 = podptr->pod.nd1;
-  int nd2 = podptr->pod.nd2;
-  int nd3 = podptr->pod.nd3;
-  int nd4 = podptr->pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int *pbc = podptr->pod.pbc;
-  double rcut = podptr->pod.rcut;
+  int *pbc = fastpodptr->pbc;
+  double rcut = fastpodptr->rcut;
 
   int natom = data.num_atom[ci];
   int natom_cumsum = data.num_atom_cumsum[ci];
@@ -1006,162 +1213,227 @@ void FitPOD::linear_descriptors(const datastruct &data, int ci)
   double *a3 = &lattice[6];
 
   // neighbor list
-  int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum,
-        position, a1, a2, a3, rcut, pbc, natom);
-
-  int *tmpint = &desc.tmpint[0];
-  double *tmpmem = &desc.gdd[dim*natom*nd1234+natom*nd1234];
-  podptr->linear_descriptors(desc.gd, desc.gdd, nb.y, tmpmem, atomtype, nb.alist,
-      nb.pairlist, nb.pairnum, nb.pairnum_cumsum, tmpint, natom, Nij);
+  podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum,
+          position, a1, a2, a3, rcut, pbc, natom);
 
+  fastpodptr->base_descriptors(desc.bd, nb.y, atomtype, nb.alist, nb.pairlist,
+          nb.pairnum_cumsum, natom);
 }
 
-void FitPOD::quadratic_descriptors(const datastruct &data, int ci)
+void FitPOD::descriptors_calculation(const datastruct &data)
 {
-  int dim = 3;
-  int natom = data.num_atom[ci];
-  int nd1 = podptr->pod.nd1;
-  int nd2 = podptr->pod.nd2;
-  int nd3 = podptr->pod.nd3;
-  int nd4 = podptr->pod.nd4;
-  int nd22 = podptr->pod.nd22;
-  int nd23 = podptr->pod.nd23;
-  int nd24 = podptr->pod.nd24;
-  int nd33 = podptr->pod.nd33;
-  int nd34 = podptr->pod.nd34;
-  int nd44 = podptr->pod.nd44;
-  int nd123 = nd1+nd2+nd3;
-  int nd1234 = nd1+nd2+nd3+nd4;
-
-  double *fatom2 = &desc.gdd[dim*natom*(nd1)];
-  double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)];
-  double *fatom4 = &desc.gdd[dim*natom*(nd123)];
-
-  // global descriptors for four-body quadratic22 potential
-
-  if (nd22 > 0) {
-    int nq2 = podptr->pod.quadratic22[0]*podptr->pod.nc2;
-    podptr->quadratic_descriptors(&desc.gd[nd1234], &desc.gdd[dim*natom*nd1234],
-        &desc.gd[nd1], fatom2, nq2, dim*natom);
-  }
+  if (comm->me == 0)
+    utils::logmesg(lmp, "**************** Begin Calculating Descriptors ****************\n");
+
+  // loop over each configuration in the training data set
 
-  // global descriptors for four-body quadratic23 potential
+  double sz[2];
+  for (int ci=0; ci < (int) data.num_atom.size(); ci++) {
 
-  if (nd23 > 0) {
-    int nq2 = podptr->pod.quadratic23[0]*podptr->pod.nc2;
-    int nq3 = podptr->pod.quadratic23[1]*podptr->pod.nc3;
-    podptr->quadratic_descriptors(&desc.gd[nd1234+nd22], &desc.gdd[dim*natom*(nd1234+nd22)],
-        &desc.gd[nd1], &desc.gd[nd1+nd2], fatom2, fatom3, nq2, nq3, dim*natom);
-  }
+    if ((ci % 100)==0) {
+      if (comm->me == 0)
+        utils::logmesg(lmp, "Configuration: # {}\n", ci+1);
+    }
 
-  // global descriptors for five-body quadratic24 potential
+    if ((ci % comm->nprocs) == comm->me) {
 
-  if (nd24 > 0) {
-    int nq2 = podptr->pod.quadratic24[0]*podptr->pod.nc2;
-    int nq4 = podptr->pod.quadratic24[1]*podptr->pod.nc4;
-    podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23], &desc.gdd[dim*natom*(nd1234+nd22+nd23)],
-        &desc.gd[nd1], &desc.gd[nd1+nd2+nd3], fatom2, fatom4, nq2, nq4, dim*natom);
+      // compute local POD descriptors
+      local_descriptors_fastpod(data, ci);
+
+      std::string filename0 = data.data_path + "/basedescriptors_config" + std::to_string(ci+1) + ".bin";
+      FILE *fp0 = fopen(filename0.c_str(), "wb");
+      sz[0] = (double) data.num_atom[ci];
+      sz[1] = (double) fastpodptr->Mdesc;
+      fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp0);
+      fwrite( reinterpret_cast<char*>( desc.bd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->Mdesc), 1, fp0);
+      fclose(fp0);
+
+      if (desc.nClusters>1) {
+        std::string filename1 = data.data_path + "/environmentdescriptors_config" + std::to_string(ci+1) + ".bin";
+        FILE *fp1 = fopen(filename1.c_str(), "wb");
+        sz[0] = (double) data.num_atom[ci];
+        sz[1] = (double) fastpodptr->nClusters;
+        fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp1);
+        fwrite( reinterpret_cast<char*>( desc.pd ), sizeof(double) * (data.num_atom[ci]*fastpodptr->nClusters), 1, fp1);
+        fclose(fp1);
+      }
+
+      std::string filename = data.data_path + "/globaldescriptors_config" + std::to_string(ci+1) + ".bin";
+      FILE *fp = fopen(filename.c_str(), "wb");
+
+      sz[0] = (double) data.num_atom[ci];
+      sz[1] = (double) desc.nCoeffAll;
+      fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp);
+      fwrite( reinterpret_cast<char*>( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp);
+      if (compute_descriptors==2) {
+        fwrite( reinterpret_cast<char*>( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp);
+      }
+      fclose(fp);
+    }
   }
 
-  // global descriptors for five-body quadratic33 potential
+  if (comm->me == 0)
+    utils::logmesg(lmp, "**************** End Calculating Descriptors ****************\n");
+}
 
-  if (nd33 > 0) {
-    int nq3 = podptr->pod.quadratic33[0]*podptr->pod.nc3;
-    podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24)],
-        &desc.gd[nd1+nd2], fatom3, nq3, dim*natom);
+void FitPOD::environment_cluster_calculation(const datastruct &data)
+{
+  if (comm->me == 0)
+    utils::logmesg(lmp, "**************** Begin Calculating Enviroment Descriptor Matrix ****************\n");
+    
+  //printf("number of configurations = %d\n", (int) data.num_atom.size());  
+
+  int nComponents = fastpodptr->nComponents;
+  int Mdesc = fastpodptr->Mdesc;
+  int nClusters = fastpodptr->nClusters;
+  int nelements = fastpodptr->nelements;  
+  memory->create(fastpodptr->Centroids, nClusters*nComponents*nelements, "fitpod:centroids");
+  memory->create(fastpodptr->Proj, Mdesc*nComponents*nelements, "fitpod:P");
+
+  int nAtoms = 0;
+  int nTotalAtoms = 0;  
+  for (int ci=0; ci < (int) data.num_atom.size(); ci++) {
+    if ((ci % comm->nprocs) == comm->me) nAtoms += data.num_atom[ci];    
+    nTotalAtoms += data.num_atom[ci];    
   }
 
-  // global descriptors for six-body quadratic34 potential
+  double *basedescmatrix = (double *) malloc(nAtoms*Mdesc*sizeof(double));
+  double *pca = (double *) malloc(nAtoms*nComponents*sizeof(double));
+  double *A = (double *) malloc(Mdesc*Mdesc*sizeof(double));
+  double *b = (double *) malloc(Mdesc*sizeof(double));
+  double *Lambda = (double *) malloc(Mdesc*nelements*sizeof(double));
+  int *clusterSizes = (int *) malloc(nClusters*nelements*sizeof(int));
+  int *assignments = (int *) malloc(nAtoms*sizeof(int));
+  int *nElemAtoms = (int *) malloc(nelements*sizeof(int));
+  int *nElemAtomsCumSum = (int *) malloc((1+nelements)*sizeof(int));
+  int *nElemAtomsCount = (int *) malloc(nelements*sizeof(int));
+
+  char chn = 'N';
+  char cht = 'T';
+  char chv = 'V';
+  char chu = 'U';
+  double alpha = 1.0, beta = 0.0;
 
-  if (nd34 > 0) {
-    int nq3 = podptr->pod.quadratic34[0]*podptr->pod.nc3;
-    int nq4 = podptr->pod.quadratic34[1]*podptr->pod.nc4;
-    podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33)],
-        &desc.gd[nd1+nd2], &desc.gd[nd1+nd2+nd3], fatom3, fatom4, nq3, nq4, dim*natom);
+  for (int elem=0; elem < nelements; elem++) {    
+    nElemAtoms[elem] = 0; // number of atoms for this element
   }
+  for (int ci=0; ci < (int) data.num_atom.size(); ci++) {
+    if ((ci % comm->nprocs) == comm->me) {        
+      int natom = data.num_atom[ci];
+      int natom_cumsum = data.num_atom_cumsum[ci];
+      int *atomtype = &data.atomtype[natom_cumsum];
+      for (int n=0; n<natom; n++)         
+        nElemAtoms[atomtype[n]-1] += 1;                
+    }
+  }
+  
+  nElemAtomsCumSum[0] = 0;
+  for (int elem=0; elem < nelements; elem++) {
+    nElemAtomsCumSum[elem+1] = nElemAtomsCumSum[elem] + nElemAtoms[elem];
+    nElemAtomsCount[elem] = 0;
+  }
+
+// loop over each configuration in the data set
+  for (int ci=0; ci < (int) data.num_atom.size(); ci++) {
+    if ((ci % 100)==0) {
+      if (comm->me == 0)
+        utils::logmesg(lmp, "Configuration: # {}\n", ci+1);
+    }
 
-  // global descriptors for seven-body quadratic44 potential
+    if ((ci % comm->nprocs) == comm->me) {
+      base_descriptors_fastpod(data, ci);
 
-  if (nd44 > 0) {
-    int nq4 = podptr->pod.quadratic44[0]*podptr->pod.nc4;
-    podptr->quadratic_descriptors(&desc.gd[nd1234+nd22+nd23+nd24+nd33+nd34], &desc.gdd[dim*natom*(nd1234+nd22+nd23+nd24+nd33+nd34)],
-        &desc.gd[nd1+nd2+nd3], fatom4, nq4, dim*natom);
+      // basedescmatrix is a Mdesc x nAtoms matrix
+      int natom =  data.num_atom[ci];      
+      int natom_cumsum = data.num_atom_cumsum[ci];
+      int *atomtype = &data.atomtype[natom_cumsum];
+      for (int n=0; n<natom; n++) {
+        int elem = atomtype[n]-1;  // offset by 1 to match the element index in the C++ code    
+        nElemAtomsCount[elem] += 1;
+        int k = nElemAtomsCumSum[elem] + nElemAtomsCount[elem] - 1;
+        for (int m=0; m<Mdesc; m++)        
+          basedescmatrix[m + Mdesc*k] = desc.bd[n + natom*(m)];      
+      }      
+    }
   }
 
-  // normalize quadratic descriptors
+  for (int elem=0; elem < nelements; elem++) {  // loop over each element   
+    nAtoms = nElemAtoms[elem];
+    nTotalAtoms = nAtoms;
+    MPI_Allreduce(MPI_IN_PLACE, &nTotalAtoms, 1, MPI_INT, MPI_SUM, world);
 
-  for (int i=0; i<(nd22+nd23+nd24+nd33+nd34+nd44); i++)
-    desc.gd[nd1234+i] = desc.gd[nd1234+i]/(natom);
+    double *descmatrix = &basedescmatrix[nElemAtomsCumSum[elem]];
+    double *Proj = &fastpodptr->Proj[nComponents*Mdesc*elem];
+    double *centroids = &fastpodptr->Centroids[nComponents*nClusters*elem];
 
-  for (int i=0; i<dim*natom*(nd22+nd23+nd24+nd33+nd34+nd44); i++)
-    desc.gdd[dim*natom*nd1234+i] = desc.gdd[dim*natom*nd1234+i]/(natom);
-}
+    // Calculate covariance matrix A = basedescmatrix*basedescmatrix'. A is a Mdesc x Mdesc matrix
+    DGEMM(&chn, &cht, &Mdesc, &Mdesc, &nAtoms, &alpha, descmatrix, &Mdesc, descmatrix, &Mdesc, &beta, A, &Mdesc);
 
-void FitPOD::cubic_descriptors(const datastruct &data, int ci)
-{
-  int dim = 3;
-  int natom = data.num_atom[ci];
-  int nd1 = podptr->pod.nd1;
-  int nd2 = podptr->pod.nd2;
-  int nd3 = podptr->pod.nd3;
-  int nd4 = podptr->pod.nd4;
-  int nd22 = podptr->pod.nd22;
-  int nd23 = podptr->pod.nd23;
-  int nd24 = podptr->pod.nd24;
-  int nd33 = podptr->pod.nd33;
-  int nd34 = podptr->pod.nd34;
-  int nd44 = podptr->pod.nd44;
-  int nd234 = podptr->pod.nd234;
-  int nd333 = podptr->pod.nd333;
-  int nd444 = podptr->pod.nd444;
-  int nd123 = nd1+nd2+nd3;
-  int nd1234 = nd1+nd2+nd3+nd4;
-
-  // global descriptors for seven-body cubic234 potential
-  if (nd234 > 0) {
-    int nq2 = podptr->pod.cubic234[0]*podptr->pod.nc2;
-    int nq3 = podptr->pod.cubic234[1]*podptr->pod.nc3;
-    int nq4 = podptr->pod.cubic234[2]*podptr->pod.nc4;
-    int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44;
-    double *eatom2 = &desc.gd[nd1];
-    double *eatom3 = &desc.gd[nd1+nd2];
-    double *eatom4 = &desc.gd[nd123];
-    double *fatom2 = &desc.gdd[dim*natom*(nd1)];
-    double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)];
-    double *fatom4 = &desc.gdd[dim*natom*(nd123)];
-    podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3],
-        eatom2, eatom3, eatom4, fatom2, fatom3, fatom4, nq2, nq3, nq4, dim*natom);
-  }
+    MPI_Allreduce(MPI_IN_PLACE, A, Mdesc*Mdesc, MPI_DOUBLE, MPI_SUM, world);  
 
-  // global descriptors for seven-body cubic333 potential
+    if (comm->me == 0) 
+      savematrix2binfile(data.filenametag + "_covariance_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc);
 
-  if (nd333 > 0) {
-    int nq3 = podptr->pod.cubic333[0]*podptr->pod.nc3;
-    int np3 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234;
-    double *eatom3 = &desc.gd[nd1+nd2];
-    double *fatom3 = &desc.gdd[dim*natom*(nd1+nd2)];
-    podptr->cubic_descriptors(&desc.gd[np3], &desc.gdd[dim*natom*np3],
-        eatom3, fatom3, nq3, dim*natom);
-  }
+    // Calculate eigenvalues and eigenvectors of A
+    int lwork = Mdesc * Mdesc;  // the length of the array work, lwork >= max(1,3*N-1)
+    int info = 1;     // = 0:  successful exit
+    double work[lwork];
+
+    DSYEV(&chv, &chu, &Mdesc, A, &Mdesc, b, work, &lwork, &info);
 
-  // global descriptors for ten-body cubic444 potential
+    // order eigenvalues and eigenvectors from largest to smallest
+    for (int i=0; i<Mdesc; i++)
+      Lambda[(Mdesc-i-1)] = b[i];
 
-  if (nd444 > 0) {
-    int nq4 = podptr->pod.cubic444[0]*podptr->pod.nc4;
-    int np4 = nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333;
-    double *eatom4 = &desc.gd[nd123];
-    double *fatom4 = &desc.gdd[dim*natom*(nd123)];
-    podptr->cubic_descriptors(&desc.gd[np4], &desc.gdd[dim*natom*(np4)],
-        eatom4, fatom4, nq4, dim*natom);
+    // P is a nComponents x Mdesc matrix
+    for (int j=0; j<nComponents; j++)
+      for (int i=0; i<Mdesc; i++)
+        Proj[j + nComponents*i] = A[i + Mdesc*(Mdesc-j-1)]*sqrt(fabs(b[(Mdesc-j-1)]/Lambda[0]));
+
+    // Calculate principal compoment analysis matrix pca = P*descmatrix. pca is a nComponents x nAtoms matrix
+    DGEMM(&chn, &chn, &nComponents, &nAtoms, &Mdesc, &alpha, Proj, &nComponents, descmatrix, &Mdesc, &beta, pca, &nComponents);
+
+    // initialize centroids 
+    for (int i = 0; i < nClusters * nComponents; i++) centroids[i] = 0.0;  
+    for (int i=0; i < nAtoms; i++) {    
+      int m = (i*nClusters)/nAtoms;
+      for (int j=0; j < nComponents; j++)
+        centroids[j + nComponents*m] += pca[j + nComponents*i];
+    }
+    MPI_Allreduce(MPI_IN_PLACE, centroids, nClusters * nComponents, MPI_DOUBLE, MPI_SUM, world);  
+    double fac = ((double) nClusters)/((double) nTotalAtoms);
+    for (int i = 0; i < nClusters * nComponents; i++) centroids[i] = centroids[i]*fac;
+    //for (int i = 0; i < desc.nClusters * nComponents; i++) printf("centroids[%d] = %f\n", i, centroids[i]);
+
+    // Calculate centroids using k-means clustering
+    int max_iter = 100;
+    KmeansClustering(pca, centroids, assignments, clusterSizes, nAtoms, nClusters, nComponents, max_iter);
+
+    if (comm->me == 0) {
+      savematrix2binfile(data.filenametag + "_eigenvector_matrix_elem" + std::to_string(elem+1) + ".bin", A, Mdesc, Mdesc);
+      savematrix2binfile(data.filenametag + "_eigenvalues_elem" + std::to_string(elem+1) + ".bin", b, Mdesc, 1);
+    }
+    savematrix2binfile(data.filenametag + "_desc_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", descmatrix, Mdesc, nAtoms);  
+    savematrix2binfile(data.filenametag + "_pca_matrix_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", pca, nComponents, nAtoms);
+    saveintmatrix2binfile(data.filenametag + "_cluster_assignments_elem" + std::to_string(elem+1) + "_proc" + std::to_string(comm->me+1) + ".bin", assignments, nAtoms, 1);
   }
 
-  // normalize cubic descriptors
-  int nd = podptr->pod.nd;
-  for (int i=(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); i<nd; i++)
-    desc.gd[i] = desc.gd[i]/(natom*natom);
+  savedata2textfile(data.filenametag + "_projection_matrix"  + ".pod", "projection_matrix: {}\n ", fastpodptr->Proj, nComponents*Mdesc*nelements, 1, 1);
+  savedata2textfile(data.filenametag + "_centroids"  + ".pod", "centroids: {} \n", fastpodptr->Centroids, nComponents*nClusters*nelements, 1, 1);  
+
+  free(basedescmatrix);
+  free(pca);
+  free(A);
+  free(b);
+  free(clusterSizes);
+  free(Lambda);
+  free(assignments);
+  free(nElemAtoms);
+  free(nElemAtomsCumSum);
+  free(nElemAtomsCount);
 
-  for (int i=dim*natom*(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); i<dim*natom*nd; i++)
-    desc.gdd[i] = desc.gdd[i]/(natom*natom);
+  if (comm->me == 0)
+    utils::logmesg(lmp, "**************** End Calculating Enviroment Descriptor Matrix ****************\n");
 }
 
 void FitPOD::least_squares_matrix(const datastruct &data, int ci)
@@ -1169,15 +1441,15 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci)
   int dim = 3;
   int natom = data.num_atom[ci];
   int natom_cumsum = data.num_atom_cumsum[ci];
-  int nd = podptr->pod.nd;
+  int nCoeffAll = desc.nCoeffAll;
   int nforce = dim*natom;
 
   // compute energy weight and force weight
 
   double normconst = 1.0;
   if (data.normalizeenergy==1) normconst = 1.0/natom;
-  double we = data.fitting_weights[0];
-  double wf = data.fitting_weights[1];
+  double we = data.we[ci];
+  double wf = data.wf[ci];
   double we2 = (we*we)*(normconst*normconst);
   double wf2 = (wf*wf);
 
@@ -1188,7 +1460,7 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci)
 
   // least-square matrix for all descriptors: A = A + (we*we)*(gd^T * gd)
 
-  podKron(desc.A, desc.gd, desc.gd, we2, nd, nd);
+  podKron(desc.A, desc.gd, desc.gd, we2, nCoeffAll, nCoeffAll);
 
   // least-square matrix for all descriptors derivatives: A =  A + (wf*wf) * (gdd^T * gdd)
 
@@ -1196,18 +1468,17 @@ void FitPOD::least_squares_matrix(const datastruct &data, int ci)
   char chn = 'N';
   double one = 1.0;
   int inc1 = 1;
-  DGEMM(&cht, &chn, &nd, &nd, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nd);
+  DGEMM(&cht, &chn, &nCoeffAll, &nCoeffAll, &nforce, &wf2, desc.gdd, &nforce, desc.gdd, &nforce, &one, desc.A, &nCoeffAll);
 
   // least-square vector for all descriptors: b = b + (we*we*energy)*gd
 
   double wee = we2*energy;
-  for (int i = 0; i< nd; i++)
+  for (int i = 0; i< nCoeffAll; i++)
     desc.b[i] += wee*desc.gd[i];
 
   // least-square vector for all descriptors derivatives: b = b + (wf*wf) * (gdd^T * f)
 
-  DGEMV(&cht, &nforce, &nd, &wf2, desc.gdd, &nforce, force, &inc1, &one, desc.b, &inc1);
-
+  DGEMV(&cht, &nforce, &nCoeffAll, &wf2, desc.gdd, &nforce, force, &inc1, &one, desc.b, &inc1);
 }
 
 void FitPOD::least_squares_fit(const datastruct &data)
@@ -1227,16 +1498,17 @@ void FitPOD::least_squares_fit(const datastruct &data)
     if ((ci % comm->nprocs) == comm->me) {
 
       // compute linear POD descriptors
+      local_descriptors_fastpod(data, ci);
 
-      linear_descriptors(data, ci);
-
-      // compute quadratic POD descriptors
-
-      quadratic_descriptors(data, ci);
-
-      // compute cubic POD descriptors
-
-      cubic_descriptors(data, ci);
+      if (save_descriptors > 0) {
+        std::string filename = data.data_path + "/descriptors_config" + std::to_string(ci+1) + ".bin";
+        FILE *fp = fopen(filename.c_str(), "wb");
+        fwrite( reinterpret_cast<char*>( desc.gd ), sizeof(double) * (desc.nCoeffAll), 1, fp);
+        if (save_descriptors==2) {
+          fwrite( reinterpret_cast<char*>( desc.gdd ), sizeof(double) * (3*data.num_atom[ci]*desc.nCoeffAll), 1, fp);
+        }
+        fclose(fp);
+      }
 
       // assemble the least-squares linear system
 
@@ -1244,59 +1516,62 @@ void FitPOD::least_squares_fit(const datastruct &data)
     }
   }
 
-  int nd = podptr->pod.nd;
+  int nCoeffAll = desc.nCoeffAll;
 
-  MPI_Allreduce(MPI_IN_PLACE, desc.b, nd, MPI_DOUBLE, MPI_SUM, world);
-  MPI_Allreduce(MPI_IN_PLACE, desc.A, nd*nd, MPI_DOUBLE, MPI_SUM, world);
+  MPI_Allreduce(MPI_IN_PLACE, desc.b, nCoeffAll, MPI_DOUBLE, MPI_SUM, world);
+  MPI_Allreduce(MPI_IN_PLACE, desc.A, nCoeffAll*nCoeffAll, MPI_DOUBLE, MPI_SUM, world);
 
   if (comm->me == 0) {
 
     // symmetrize A
 
-    for (int i = 0; i<nd; i++)
-      for (int j = i; j<nd; j++) {
-        double a1 = desc.A[i + nd*j];
-        double a2 = desc.A[j + nd*i];
-        desc.A[i + nd*j] = 0.5*(a1+a2);
-        desc.A[j + nd*i] = 0.5*(a1+a2);
+    for (int i = 0; i<nCoeffAll; i++)
+      for (int j = i; j<nCoeffAll; j++) {
+        double a1 = desc.A[i + nCoeffAll*j];
+        double a2 = desc.A[j + nCoeffAll*i];
+        desc.A[i + nCoeffAll*j] = 0.5*(a1+a2);
+        desc.A[j + nCoeffAll*i] = 0.5*(a1+a2);
       }
 
-    // scale A and b
-
-    double maxb = 0.0;
-    for (int i = 0; i<nd; i++)
-      maxb = (maxb > fabs(desc.b[i])) ? maxb : fabs(desc.b[i]);
-
-    maxb = 1.0/maxb;
-    for (int i = 0; i<nd; i++)
-      desc.b[i] = desc.b[i]*maxb;
-
-    for (int i = 0; i<nd*nd; i++)
-      desc.A[i] = desc.A[i]*maxb;
+//     // scale A and b
+//
+//     double maxb = 0.0;
+//     for (int i = 0; i<nCoeffAll; i++)
+//       maxb = (maxb > fabs(desc.b[i])) ? maxb : fabs(desc.b[i]);
+//
+//     maxb = 1.0/maxb;
+//     for (int i = 0; i<nCoeffAll; i++)
+//       desc.b[i] = desc.b[i]*maxb;
+//
+//     for (int i = 0; i<nCoeffAll*nCoeffAll; i++)
+//       desc.A[i] = desc.A[i]*maxb;
 
     double regularizing_parameter = data.fitting_weights[11];
 
-    for (int i = 0; i<nd; i++) {
+    for (int i = 0; i<nCoeffAll; i++) {
       desc.c[i] = desc.b[i];
-      desc.A[i + nd*i] = desc.A[i + nd*i]*(1.0 + regularizing_parameter);
-      if (desc.A[i + nd*i] < regularizing_parameter) desc.A[i + nd*i] = regularizing_parameter;
+      desc.A[i + nCoeffAll*i] = desc.A[i + nCoeffAll*i]*(1.0 + regularizing_parameter);
+      if (desc.A[i + nCoeffAll*i] < regularizing_parameter) desc.A[i + nCoeffAll*i] = regularizing_parameter;
     }
 
     // solving the linear system A * c = b
 
     int nrhs=1, info;
     char chu = 'U';
-    DPOSV(&chu, &nd, &nrhs, desc.A, &nd, desc.c, &nd, &info);
+    DPOSV(&chu, &nCoeffAll, &nrhs, desc.A, &nCoeffAll, desc.c, &nCoeffAll, &info);
   }
 
-  MPI_Bcast(desc.c, nd, MPI_DOUBLE, 0, world);
+  MPI_Bcast(desc.c, nCoeffAll, MPI_DOUBLE, 0, world);
+
+  // update coefficients in POD class to compute energy and force
+  fastpodptr->mknewcoeff(desc.c, nCoeffAll);
 
   if (comm->me == 0) {     // save coefficients into a text file
     std::string filename = data.filenametag + "_coefficients"  + ".pod";
     FILE *fp = fopen(filename.c_str(), "w");
 
-    fmt::print(fp, "POD_coefficients: {}\n", nd);
-    for (int count = 0; count < nd; count++) {
+    fmt::print(fp, "POD_coefficients: {}\n", nCoeffAll);
+    for (int count = 0; count < nCoeffAll; count++) {
       fmt::print(fp, "{:<10.{}f}\n",  desc.c[count], data.precision);
     }
     fclose(fp);
@@ -1304,12 +1579,24 @@ void FitPOD::least_squares_fit(const datastruct &data)
   }
 }
 
-double FitPOD::energyforce_calculation(double *force, double *coeff, const datastruct &data, int ci)
+double latticevolume(double *lattice)
+{
+  double *v1 = &lattice[0];
+  double *v2 = &lattice[3];
+  double *v3 = &lattice[6];
+
+  double b0 = v1[1] * v2[2] - v1[2] * v2[1];
+  double b1 = v1[2] * v2[0] - v1[0] * v2[2];
+  double b2 = v1[0] * v2[1] - v1[1] * v2[0];
+
+  return (b0*v3[0] + b1*v3[1] + b2*v3[2]);
+}
+
+double FitPOD::energyforce_calculation_fastpod(double *force, const datastruct &data, int ci)
 {
   int dim = 3;
-  int *pbc = podptr->pod.pbc;
-  double rcut = podptr->pod.rcut;
-  int nd1234 = podptr->pod.nd1 + podptr->pod.nd2 + podptr->pod.nd3 + podptr->pod.nd4;
+  int *pbc = fastpodptr->pbc;
+  double rcut = fastpodptr->rcut;
 
   int natom = data.num_atom[ci];
   int natom_cumsum2 = data.num_atom_cumsum[ci];
@@ -1320,26 +1607,11 @@ double FitPOD::energyforce_calculation(double *force, double *coeff, const datas
   double *a2 = &lattice[3];
   double *a3 = &lattice[6];
 
-  // neighbor list
-
-  int Nij = podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum,
-        position, a1, a2, a3, rcut, pbc, natom);
-
-  double *tmpmem = &desc.gdd[0];
-  int *tmpint = &desc.tmpint[0];
-  double *rij = &tmpmem[0]; // 3*Nij
-  int *ai = &tmpint[0];   // Nij
-  int *aj = &tmpint[Nij];   // Nij
-  int *ti = &tmpint[2*Nij]; // Nij
-  int *tj = &tmpint[3*Nij]; // Nij
-  int *idxi = &tmpint[4*Nij]; // Nij
-  podptr->podNeighPairs(rij, nb.y, idxi, ai, aj, ti, tj, nb.pairnum_cumsum, atomtype, nb.pairlist, nb.alist, natom);
+  podfullneighborlist(nb.y, nb.alist, nb.pairlist, nb.pairnum, nb.pairnum_cumsum,
+         position, a1, a2, a3, rcut, pbc, natom);
 
-  double *effectivecoeff = &tmpmem[3*Nij]; // 3*Nij
-  podArraySetValue(effectivecoeff, 0.0, nd1234);
-
-  double energy = podptr->energyforce_calculation(force, coeff, effectivecoeff, desc.gd, rij,
-    &tmpmem[3*Nij+nd1234], nb.pairnum_cumsum, atomtype, idxi, ai, aj, ti, tj, natom, Nij);
+  double energy = fastpodptr->energyforce(force, nb.y, atomtype, nb.alist, nb.pairlist,
+          nb.pairnum_cumsum, natom);
 
   return energy;
 }
@@ -1376,7 +1648,7 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er
   int ci=0, m=8, nc=0, nf=0;
   for (int file = 0; file < nfiles; file++) {
     fmt::print(fp_analysis, "# {}\n", data.filenames[file]);
-    fmt::print(fp_analysis, "  config   # atoms      energy        DFT energy     energy error   "
+    fmt::print(fp_analysis, "  config   # atoms       volume        energy        DFT energy     energy error   "
                "  force          DFT force       force error\n");
 
     int nforceall = 0;
@@ -1384,6 +1656,10 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er
     nc += nconfigs;
     for (int ii=0; ii < nconfigs; ii++) { // loop over each configuration in a file
       fmt::print(fp_analysis, "{:6}   {:8}    ", outarray[m*ci], outarray[1 + m*ci]);
+
+      double vol = latticevolume(&data.lattice[9*ci]);
+      fmt::print(fp_analysis, "{:<15.10} ", vol);
+
       for(int count = 2; count < m; count ++)
         fmt::print(fp_analysis, "{:<15.10} ", outarray[count + m*ci]);
       fmt::print(fp_analysis, "\n");
@@ -1421,6 +1697,7 @@ void FitPOD::print_analysis(const datastruct &data, double *outarray, double *er
 void FitPOD::error_analysis(const datastruct &data, double *coeff)
 {
   int dim = 3;
+  int nCoeffAll = desc.nCoeffAll;
   double energy;
   std::vector<double> force(dim*data.num_atom_max);
 
@@ -1440,21 +1717,8 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff)
   for (int i=0; i<4*(nfiles+1); i++)
     errors[i] = 0.0;
 
-  int nd1 = podptr->pod.nd1;
-  int nd2 = podptr->pod.nd2;
-  int nd3 = podptr->pod.nd3;
-  int nd4 = podptr->pod.nd4;
-  int nd22 = podptr->pod.nd22;
-  int nd23 = podptr->pod.nd23;
-  int nd24 = podptr->pod.nd24;
-  int nd33 = podptr->pod.nd33;
-  int nd34 = podptr->pod.nd34;
-  int nd44 = podptr->pod.nd44;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int nd = podptr->pod.nd;
-
-  std::vector<double> newcoeff(nd);
-  for (int j=0; j<nd; j++)
+  std::vector<double> newcoeff(nCoeffAll);
+  for (int j=0; j<nCoeffAll; j++)
     newcoeff[j] = coeff[j];
 
   if (comm->me == 0)
@@ -1475,13 +1739,7 @@ void FitPOD::error_analysis(const datastruct &data, double *coeff)
         int natom = data.num_atom[ci];
         int nforce = dim*natom;
 
-        for (int j=nd1234; j<(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); j++)
-          newcoeff[j] = coeff[j]/(natom);
-
-        for (int j=(nd1234+nd22+nd23+nd24+nd33+nd34+nd44); j<nd; j++)
-          newcoeff[j] = coeff[j]/(natom*natom);
-
-        energy = energyforce_calculation(force.data(), newcoeff.data(), data, ci);
+        energy = energyforce_calculation_fastpod(force.data(), data, ci);
 
         double DFTenergy = data.energy[ci];
         int natom_cumsum = data.num_atom_cumsum[ci];
@@ -1586,10 +1844,9 @@ void FitPOD::energyforce_calculation(const datastruct &data, double *coeff)
       int nforce = dim*natom;
 
       if ((ci % comm->nprocs) == comm->me) {
-        energy = energyforce_calculation(force.data()+1, coeff, data, ci);
+        energy = energyforce_calculation_fastpod(force.data()+1, data, ci);
 
         // save energy and force into a binary file
-
         force[0] = energy;
         std::string filename = "energyforce_config" + std::to_string(ci+1) + ".bin";
 
@@ -1817,3 +2074,117 @@ void FitPOD::triclinic_lattice_conversion(double *a, double *b, double *c, doubl
   b[0] = bx; b[1] = by;  b[2] = 0.0;
   c[0] = cx; c[1] = cy;  c[2] = cz;
 }
+
+// Function to calculate Euclidean distance between two points in N-dimensional space
+double FitPOD::squareDistance(const double *a, const double *b, int DIMENSIONS) {
+  double sum = 0.0;
+  for (int i = 0; i < DIMENSIONS; i++) {
+    sum += (a[i] - b[i]) * (a[i] - b[i]);
+  }
+  return sum;
+}
+
+// Function to assign points to the nearest cluster
+void FitPOD::assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS) {
+  // Initialize clusterSizes to zero
+  for (int i = 0; i < NUM_CLUSTERS; i++) {
+    clusterSizes[i] = 0;
+  }
+
+  for (int i = 0; i < NUM_POINTS; i++) {
+    double minDist = squareDistance(&points[i * DIMENSIONS], &centroids[0], DIMENSIONS);
+    int closestCluster = 0;
+    for (int j = 1; j < NUM_CLUSTERS; j++) {
+      double dist = squareDistance(&points[i * DIMENSIONS], &centroids[j * DIMENSIONS], DIMENSIONS);
+      if (dist < minDist) {
+        minDist = dist;
+        closestCluster = j;
+      }
+    }
+    assignments[i] = closestCluster;
+    clusterSizes[closestCluster]++;
+  }
+}
+
+// Function to update centroids based on point assignments
+void FitPOD::updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS) {
+  // Reset centroids for recalculation
+  for (int i = 0; i < NUM_CLUSTERS * DIMENSIONS; i++) {
+    centroids[i] = 0.0;
+  }
+
+  // Accumulate sum of points in each cluster
+  for (int i = 0; i < NUM_POINTS; i++) {
+    int cluster = assignments[i];
+    for (int j = 0; j < DIMENSIONS; j++) {
+      centroids[cluster * DIMENSIONS + j] += points[i * DIMENSIONS + j];
+    }
+  }
+
+  // Use MPI_Allreduce to sum up the local sums and cluster sizes across all processes
+  MPI_Allreduce(MPI_IN_PLACE, centroids, NUM_CLUSTERS * DIMENSIONS, MPI_DOUBLE, MPI_SUM, world);
+  MPI_Allreduce(MPI_IN_PLACE, clusterSizes, NUM_CLUSTERS, MPI_INT, MPI_SUM, world);
+
+  // Divide by number of points to get the mean (centroid)
+  for (int i = 0; i < NUM_CLUSTERS; i++) {
+    if (clusterSizes[i] != 0) {
+      for (int j = 0; j < DIMENSIONS; j++) {
+        centroids[i * DIMENSIONS + j] /= clusterSizes[i];
+      }
+    }
+  }
+}
+
+// Function for K-means clustering
+void FitPOD::KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER) {
+  for (int iter = 0; iter < MAX_ITER; iter++) {
+    assignPointsToClusters(points, centroids, assignments, clusterSizes, NUM_POINTS, NUM_CLUSTERS, DIMENSIONS);
+    updateCentroids(points, centroids, assignments, clusterSizes, NUM_POINTS, NUM_CLUSTERS, DIMENSIONS);
+  }
+}
+
+void FitPOD::savematrix2binfile(std::string filename, double *A, int nrows, int ncols)
+{
+  FILE *fp = fopen(filename.c_str(), "wb");
+  double sz[2];
+  sz[0] = (double) nrows;
+  sz[1] = (double) ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A ), sizeof(double) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+void FitPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols)
+{
+  FILE *fp = fopen(filename.c_str(), "wb");
+  int sz[2];
+  sz[0] = nrows;
+  sz[1] = ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(int) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A ), sizeof(int) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+void FitPOD::savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim)
+{
+  if (comm->me == 0) {     
+    int precision = 15;
+    FILE *fp = fopen(filename.c_str(), "w");    
+    if (dim==1) {
+      fmt::print(fp, text, n);
+      for (int i = 0; i < n; i++)
+        fmt::print(fp, "{:<10.{}f} \n",  A[i], precision);      
+    }
+    else if (dim==2) {
+      fmt::print(fp, text, n);
+      fmt::print(fp, "{} \n", m);
+      for (int j = 0; j < n; j++) {
+        for (int i = 0; i < m; i++)
+          fmt::print(fp, "{:<10.{}f}     ",  A[j + i*n], precision);
+        fmt::print(fp, "   \n");
+      }
+    }
+    fclose(fp);
+  }
+}
+
diff --git a/src/ML-POD/fitpod_command.h b/src/ML-POD/fitpod_command.h
index b3591302405..c6a2e167c50 100644
--- a/src/ML-POD/fitpod_command.h
+++ b/src/ML-POD/fitpod_command.h
@@ -11,6 +11,7 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
+
 #ifdef COMMAND_CLASS
 // clang-format off
 CommandStyle(fitpod,FitPOD);
@@ -21,22 +22,25 @@ CommandStyle(fitpod,FitPOD);
 #define LMP_FITPOD_COMMAND_H
 
 #include "command.h"
+#include <unordered_map>
 
 namespace LAMMPS_NS {
 
 class FitPOD : public Command {
- public:
+public:
   FitPOD(LAMMPS *);
   void command(int, char **) override;
 
- private:
+private:
   struct datastruct {
     std::string file_format = "extxyz";
     std::string file_extension = "xyz";
     std::string data_path;
-    std::vector<std::string> data_files;
+    std::vector<std::string> data_files; // sorted file names
+    std::vector<std::string> group_names; // sorted group names
     std::vector<std::string> filenames;
     std::string filenametag = "pod";
+    std::string group_weight_type = "global";
 
     std::vector<int> num_atom;
     std::vector<int> num_atom_cumsum;
@@ -48,12 +52,15 @@ class FitPOD : public Command {
     int num_atom_max;
     int num_config_sum;
 
-    double *lattice;
-    double *energy;
-    double *stress;
-    double *position;
-    double *force;
-    int *atomtype;
+    double *lattice=nullptr;
+    double *energy=nullptr;
+    double *stress=nullptr;
+    double *position=nullptr;
+    double *force=nullptr;
+    int *atomtype=nullptr;
+    // Group weights will have same size as energy.
+    double *we=nullptr;
+    double *wf=nullptr;
 
     int training = 1;
     int normalizeenergy = 1;
@@ -65,6 +72,9 @@ class FitPOD : public Command {
     int precision = 8;
     double fraction = 1.0;
 
+    std::unordered_map<std::string, double> we_map;
+    std::unordered_map<std::string, double> wf_map;
+
     double fitting_weights[12] = {100.0, 1.0, 0.0, 1, 1, 0, 0, 1, 1, 1, 1, 1e-10};
 
     void copydatainfo(datastruct &data) const
@@ -84,42 +94,50 @@ class FitPOD : public Command {
       data.precision = precision;
       data.training = training;
       data.normalizeenergy = normalizeenergy;
-      for (int i = 0; i < 12; i++) data.fitting_weights[i] = fitting_weights[i];
+      for (int i = 0; i < 12; i++)
+        data.fitting_weights[i] = fitting_weights[i];
+      data.we_map = we_map;
+      data.wf_map = wf_map;
     }
   };
 
   struct neighborstruct {
-    int *alist;
-    int *pairnum;
-    int *pairnum_cumsum;
-    int *pairlist;
-    double *y;
-
-    int natom;
-    int nalist;
-    int natom_max;
-    int sze;
-    int sza;
-    int szy;
-    int szp;
+    int *alist=nullptr;
+    int *pairnum=nullptr;
+    int *pairnum_cumsum=nullptr;
+    int *pairlist=nullptr;
+    double *y=nullptr;
+
+    //int natom;
+    //int nalist;
+    int natom_max = 0;
+    int sze = 0;
+    int sza = 0;
+    int szy = 0;
+    int szp = 0;
   };
 
   struct descriptorstruct {
-    double *gd;     // global descriptors
-    double *gdd;    // derivatives of global descriptors and peratom descriptors
-    double *A;      // least-square matrix for all descriptors
-    double *b;      // least-square vector for all descriptors
-    double *c;      // coefficents of descriptors
-    int *tmpint;
-    int szd;
-    int szi;
+    double *bd=nullptr;  // base descriptors
+    double *pd=nullptr;  // multi-environment descriptors (probabilities)
+    double *gd=nullptr;  // global descriptors
+    double *gdd=nullptr; // derivatives of global descriptors and peratom descriptors
+    double *A=nullptr;  // least-square matrix for all descriptors
+    double *b=nullptr;  // least-square vector for all descriptors
+    double *c=nullptr;  // coefficents of descriptors    
+    int szd = 0;    
+    int nCoeffAll = 0; // number of global descriptors
+    int nClusters = 0; // number of environment clusters 
   };
 
+  int save_descriptors = 0;
+  int compute_descriptors = 0;  
   datastruct traindata;
   datastruct testdata;
+  datastruct envdata;
   descriptorstruct desc;
   neighborstruct nb;
-  class MLPOD *podptr;
+  class EAPOD *fastpodptr;
 
   // functions for collecting/collating arrays
 
@@ -144,18 +162,26 @@ class FitPOD : public Command {
   void matrix33_multiplication(double *xrot, double *Rmat, double *x, int natom);
   void matrix33_inverse(double *invA, double *A1, double *A2, double *A3);
 
+  double squareDistance(const double *a, const double *b, int DIMENSIONS);  
+  void assignPointsToClusters(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSION);
+  void updateCentroids(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS);
+  void KmeansClustering(double *points, double *centroids, int *assignments, int *clusterSizes, int NUM_POINTS, int NUM_CLUSTERS, int DIMENSIONS, int MAX_ITER);
+
+  void savedata2textfile(std::string filename, std::string text, double *A, int n, int m, int dim);
+  void savematrix2binfile(std::string filename, double *A, int nrows, int ncols);
+  void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols);
+
   // functions for reading input files and fitting
 
-  int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension,
-                     std::string &test_path, std::string &training_path, std::string &filenametag,
-                     const std::string &data_file);
-  void get_exyz_files(std::vector<std::string> &, const std::string &, const std::string &);
-  int get_number_atom_exyz(std::vector<int> &num_atom, int &num_atom_sum, std::string file);
-  int get_number_atoms(std::vector<int> &num_atom, std::vector<int> &num_atom_sum,
-                       std::vector<int> &num_config, std::vector<std::string> training_files);
-  void read_exyz_file(double *lattice, double *stress, double *energy, double *pos, double *forces,
-                      int *atomtype, std::string file, std::vector<std::string> species);
-  void get_data(datastruct &data, const std::vector<std::string>& species);
+  int read_data_file(double *fitting_weights, std::string &file_format, std::string &file_extension, std::string &env_path,
+    std::string &test_path, std::string &training_path, std::string &filenametag, const std::string &data_file, std::string &group_weight_type,
+    std::unordered_map<std::string, double> &we_map, std::unordered_map<std::string, double> &wf_map);
+  void get_exyz_files(std::vector<std::string> &, std::vector<std::string> &, const std::string &, const std::string &);
+  int get_number_atom_exyz(std::vector<int>& num_atom, int& num_atom_sum, std::string file);
+  int get_number_atoms(std::vector<int>& num_atom, std::vector<int> &num_atom_sum, std::vector<int>& num_config, std::vector<std::string> training_files);
+  void read_exyz_file(double *lattice, double *stress, double *energy, double *we, double *wf, double *pos, double *forces,
+    int *atomtype, std::string file, std::vector<std::string> species, double we_group, double wf_group);
+  void get_data(datastruct &data, const std::vector<std::string> &species);
   std::vector<int> linspace(int start_in, int end_in, int num_in);
   std::vector<int> shuffle(int start_in, int end_in, int num_in);
   std::vector<int> select(int n, double fraction, int randomize);
@@ -166,17 +192,20 @@ class FitPOD : public Command {
   int podneighborlist(int *neighlist, int *numneigh, double *r, double rcutsq, int nx, int N,
                       int dim);
   int podfullneighborlist(double *y, int *alist, int *neighlist, int *numneigh, int *numneighsum,
-                          double *x, double *a1, double *a2, double *a3, double rcut, int *pbc,
-                          int nx);
-  void allocate_memory(const datastruct &data);
-  void linear_descriptors(const datastruct &data, int ci);
-  void quadratic_descriptors(const datastruct &data, int ci);
-  void cubic_descriptors(const datastruct &data, int ci);
+    double *x, double *a1, double *a2, double *a3, double rcut, int *pbc, int nx);
+  void estimate_memory_neighborstruct(const datastruct &data, int *pbc, double rcut, int nelements);
+  void allocate_memory_neighborstruct();
+  void allocate_memory_descriptorstruct(int nd);
+  void estimate_memory_fastpod(const datastruct &data);  
+  void local_descriptors_fastpod(const datastruct &data, int ci);
+  void base_descriptors_fastpod(const datastruct &data, int ci);
   void least_squares_matrix(const datastruct &data, int ci);
   void least_squares_fit(const datastruct &data);
+  void descriptors_calculation(const datastruct &data);
+  void environment_cluster_calculation(const datastruct &data);
   void print_analysis(const datastruct &data, double *outarray, double *errors);
-  void error_analysis(const datastruct &data, double *coeff);
-  double energyforce_calculation(double *force, double *coeff, const datastruct &data, int ci);
+  void error_analysis(const datastruct &data, double *coeff);  
+  double energyforce_calculation_fastpod(double *force, const datastruct &data, int ci);
   void energyforce_calculation(const datastruct &data, double *coeff);
 };
 
diff --git a/src/ML-POD/mlpod.cpp b/src/ML-POD/mlpod.cpp
deleted file mode 100644
index 088b9abadc1..00000000000
--- a/src/ML-POD/mlpod.cpp
+++ /dev/null
@@ -1,3714 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/ Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Ngoc Cuong Nguyen (MIT) and Andrew Rohskopf (SNL)
-------------------------------------------------------------------------- */
-
-// POD header file
-
-#include "mlpod.h"
-
-// LAMMPS header files
-
-#include "comm.h"
-#include "error.h"
-#include "math_const.h"
-#include "math_special.h"
-#include "memory.h"
-#include "tokenizer.h"
-
-#include <cmath>
-
-using namespace LAMMPS_NS;
-using MathConst::MY_PI;
-using MathSpecial::cube;
-using MathSpecial::powint;
-
-static constexpr int MAXLINE = 1024;
-
-MLPOD::podstruct::podstruct() :
-    twobody{4, 8, 6}, threebody{4, 8, 5, 4}, fourbody{0, 0, 0, 0}, pbc(nullptr),
-    elemindex(nullptr), quadratic22{0, 0}, quadratic23{0, 0}, quadratic24{0, 0}, quadratic33{0, 0},
-    quadratic34{0, 0}, quadratic44{0, 0}, cubic234{0, 0, 0}, cubic333{0, 0, 0}, cubic444{0, 0, 0},
-    besselparams(nullptr), coeff(nullptr), Phi2(nullptr), Phi3(nullptr), Phi4(nullptr),
-    Lambda2(nullptr), Lambda3(nullptr), Lambda4(nullptr),
-    snapelementradius{0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5},
-    snapelementweight{1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}
-{
-  snaptwojmax = 0;
-  snapchemflag = 0;
-  snaprfac0 = 0.99363;
-}
-
-MLPOD::podstruct::~podstruct()
-{
-  delete[] pbc;
-  delete[] elemindex;
-  delete[] besselparams;
-}
-
-MLPOD::MLPOD(LAMMPS *_lmp, const std::string &pod_file, const std::string &coeff_file) :
-    Pointers(_lmp)
-{
-  // read pod input file to podstruct
-
-  read_pod(pod_file);
-
-  // read pod coefficient file to podstruct
-
-  if (coeff_file != "") read_coeff_file(coeff_file);
-
-  if (pod.snaptwojmax > 0) InitSnap();
-}
-
-MLPOD::~MLPOD()
-{
-  // deallocate pod arrays
-
-  memory->destroy(pod.coeff);
-  if (pod.ns2 > 0) {
-    memory->destroy(pod.Phi2);
-    memory->destroy(pod.Lambda2);
-  }
-  if (pod.ns3 > 0) {
-    memory->destroy(pod.Phi3);
-    memory->destroy(pod.Lambda3);
-  }
-  if (pod.ns4 > 0) {
-    memory->destroy(pod.Phi4);
-    memory->destroy(pod.Lambda4);
-  }
-
-  // deallocate snap arrays if used
-
-  if (pod.snaptwojmax > 0) {
-    memory->destroy(sna.map);
-    memory->destroy(sna.idx_max);
-    memory->destroy(sna.idxz);
-    memory->destroy(sna.idxb);
-    memory->destroy(sna.idxb_block);
-    memory->destroy(sna.idxu_block);
-    memory->destroy(sna.idxz_block);
-    memory->destroy(sna.idxcg_block);
-    memory->destroy(sna.rootpqarray);
-    memory->destroy(sna.cglist);
-    memory->destroy(sna.fac);
-    memory->destroy(sna.bzero);
-    memory->destroy(sna.wjelem);
-    memory->destroy(sna.radelem);
-    memory->destroy(sna.rcutsq);
-  }
-}
-
-// clang-format off
-
-void MLPOD::podMatMul(double *c, double *a, double *b, int r1, int c1, int c2)
-{
-  int i, j, k;
-
-  for(j = 0; j < c2; j++)
-    for(i = 0; i < r1; i++)
-      c[i + r1*j] = 0.0;
-
-  for(j = 0; j < c2; j++)
-    for(i = 0; i < r1; i++)
-      for(k = 0; k < c1; k++)
-        c[i + r1*j] += a[i + r1*k] * b[k + c1*j];
-}
-
-void MLPOD::podArrayFill(int* output, int start, int length)
-{
-        for (int j = 0; j < length; ++j)
-                output[j] = start + j;
-}
-
-void MLPOD::podArraySetValue(double *y, double a, int n)
-{
-  for (int i=0; i<n; i++)
-    y[i] = a;
-}
-
-void MLPOD::podArrayCopy(double *y, double *x, int n)
-{
-  for (int i=0; i<n; i++)
-    y[i] = x[i];
-}
-
-void podsnapshots(double *rbf, double *xij, double *besselparams, double rin, double rcut,
-    int besseldegree, int inversedegree, int nbesselpars, int N)
-{
-  double rmax = rcut-rin;
-  for (int n=0; n<N; n++) {
-    double dij = xij[n];
-
-    double r = dij - rin;
-    double y = r/rmax;
-    double y2 = y*y;
-    double y3 = 1.0 - y2*y;
-    double y4 = y3*y3 + 1e-6;
-    double y5 = sqrt(y4);
-    double y6 = exp(-1.0/y5);
-    double fcut = y6/exp(-1.0);
-
-    for (int j=0; j<nbesselpars; j++) {
-      double alpha = besselparams[j];
-      if (fabs(alpha) <= 1.0e-6) alpha = 1e-3;
-      double x =  (1.0 - exp(-alpha*r/rmax))/(1.0-exp(-alpha));
-
-      for (int i=0; i<besseldegree; i++) {
-        double a = (i+1)*MY_PI;
-        double b = (sqrt(2.0/(rmax))/(i+1));
-        int nij = n + N*i + N*besseldegree*j;
-        rbf[nij] = b*fcut*sin(a*x)/r;
-      }
-    }
-
-    for (int i=0; i<inversedegree; i++) {
-      int p = besseldegree*nbesselpars + i;
-      int nij = n + N*p;
-      double a = powint(dij, i+1);
-      rbf[nij] = fcut/a;
-    }
-  }
-}
-
-void MLPOD::podeigenvaluedecomposition(double *Phi, double *Lambda, double *besselparams, double rin, double rcut,
-    int besseldegree, int inversedegree, int nbesselpars, int N)
-{
-  int ns = besseldegree*nbesselpars + inversedegree;
-
-  // variables used in eigenvaluedecomposition
-
-  double *xij;
-  double *S;
-  double *Q;
-  double *A;
-  double *b;
-
-  memory->create(xij, N, "pod:xij");
-  memory->create(S, N*ns, "pod:S");
-  memory->create(Q, N*ns, "pod:Q");
-  memory->create(A, ns*ns, "pod:A");
-  memory->create(b, ns, "pod:ns");
-
-  for (int i=0; i<N; i++)
-    xij[i] = (rin+1e-6) + (rcut-rin-1e-6)*(i*1.0/(N-1));
-
-  podsnapshots(S, xij, besselparams, rin, rcut, besseldegree, inversedegree, nbesselpars, N);
-
-  char chn = 'N';
-  char cht = 'T';
-  char chv = 'V';
-  char chu = 'U';
-  double alpha = 1.0, beta = 0.0;
-  DGEMM(&cht, &chn, &ns, &ns, &N, &alpha, S, &N, S, &N, &beta, A, &ns);
-
-  for (int i=0; i<ns*ns; i++)
-    A[i] = A[i]*(1.0/N);
-
-  // declare function input for DSYEV
-  // char jobz = 'V';  // 'V':  Compute eigenvalues and eigenvectors
-  // char uplo = 'U';  // 'U':  Upper triangle of A is stored
-
-  int lwork = ns * ns;  // the length of the array work, lwork >= max(1,3*N-1)
-  int info = 1;     // = 0:  successful exit
-  std::vector<double> work(lwork);
-  DSYEV(&chv, &chu, &ns, A, &ns, b, work.data(), &lwork, &info);
-
-  // order eigenvalues and eigenvectors from largest to smallest
-
-  for (int j=0; j<ns; j++)
-    for (int i=0; i<ns; i++)
-      Phi[i + ns*(ns-j-1)] = A[i + ns*j];
-
-  for (int i=0; i<ns; i++)
-    Lambda[(ns-i-1)] = b[i];
-
-  DGEMM(&chn, &chn, &N, &ns, &ns, &alpha, S, &N, Phi, &ns, &beta, Q, &N);
-  for (int i=0; i<(N-1); i++)
-    xij[i] = xij[i+1] - xij[i];
-  double area;
-  for (int m=0; m<ns; m++) {
-    area = 0.0;
-    for (int i=0; i<(N-1); i++)
-      area += 0.5*xij[i]*(Q[i + N*m]*Q[i + N*m] + Q[i+1 + N*m]*Q[i+1 + N*m]);
-    for (int i=0; i<ns; i++)
-      Phi[i + ns*m] = Phi[i + ns*m]/sqrt(area);
-  }
-
- // enforce consistent signs for the eigenvectors
-
-  for (int m=0; m<ns; m++) {
-    if (Phi[m + ns*m] < 0.0) {
-      for (int i=0; i<ns; i++)
-        Phi[i + ns*m] = -Phi[i + ns*m];
-    }
-  }
-
-  memory->destroy(xij);
-  memory->destroy(S);
-  memory->destroy(A);
-  memory->destroy(b);
-  memory->destroy(Q);
-}
-
-void MLPOD::read_pod(const std::string &pod_file)
-{
-  pod.nbesselpars = 3;
-  delete[] pod.besselparams;
-  pod.besselparams = new double[3];
-  delete[] pod.pbc;
-  pod.pbc = new int[3];
-
-  pod.besselparams[0] = 0.0;
-  pod.besselparams[1] = 2.0;
-  pod.besselparams[2] = 4.0;
-
-  pod.nelements = 0;
-  pod.onebody = 1;
-  pod.besseldegree = 3;
-  pod.inversedegree = 6;
-  pod.quadraticpod = 0;
-  pod.rin = 0.5;
-  pod.rcut = 4.6;
-
-  pod.snaptwojmax = 0;
-  pod.snapchemflag = 0;
-  pod.snaprfac0 = 0.99363;
-
-  sna.twojmax = 0;
-  sna.ntypes = 0;
-
-  std::string podfilename = pod_file;
-  FILE *fppod;
-  if (comm->me == 0) {
-
-    fppod = utils::open_potential(podfilename,lmp,nullptr);
-    if (fppod == nullptr)
-      error->one(FLERR,"Cannot open POD coefficient file {}: ",
-                                   podfilename, utils::getsyserror());
-  }
-
-  // loop through lines of POD file and parse keywords
-
-  char line[MAXLINE] = {'\0'};
-  char *ptr;
-  int eof = 0;
-  while (true) {
-    if (comm->me == 0) {
-      ptr = fgets(line,MAXLINE,fppod);
-      if (ptr == nullptr) {
-        eof = 1;
-        fclose(fppod);
-      }
-    }
-    MPI_Bcast(&eof,1,MPI_INT,0,world);
-    if (eof) break;
-    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
-
-    // words = ptrs to all words in line
-    // strip single and double quotes from words
-
-    std::vector<std::string> words;
-    try {
-      words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
-    } catch (TokenizerException &) {
-      // ignore
-    }
-
-    if (words.size() == 0) continue;
-
-    const auto &keywd = words[0];
-
-    if (keywd == "species") {
-      pod.nelements = words.size()-1;
-      for (int ielem = 1; ielem <= pod.nelements; ielem++) {
-        pod.species.push_back(words[ielem]);
-      }
-    }
-
-    if (keywd == "pbc") {
-      if (words.size() != 4)
-        error->one(FLERR,"Improper POD file.", utils::getsyserror());
-      pod.pbc[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      pod.pbc[1] = utils::inumeric(FLERR,words[2],false,lmp);
-      pod.pbc[2] = utils::inumeric(FLERR,words[3],false,lmp);
-    }
-
-    if ((keywd != "#") && (keywd != "species") && (keywd != "pbc")) {
-
-      if (words.size() != 2)
-        error->one(FLERR,"Improper POD file.", utils::getsyserror());
-
-      if (keywd == "rin") pod.rin = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "rcut") pod.rcut = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "bessel_scaling_parameter1")
-        pod.besselparams[0] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "bessel_scaling_parameter2")
-        pod.besselparams[1] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "bessel_scaling_parameter3")
-        pod.besselparams[2] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "bessel_polynomial_degree")
-        pod.besseldegree = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "inverse_polynomial_degree")
-        pod.inversedegree = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "onebody") pod.onebody = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "twobody_bessel_polynomial_degree")
-        pod.twobody[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "twobody_inverse_polynomial_degree")
-        pod.twobody[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "twobody_number_radial_basis_functions")
-        pod.twobody[2] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "threebody_bessel_polynomial_degree")
-        pod.threebody[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "threebody_inverse_polynomial_degree")
-        pod.threebody[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "threebody_number_radial_basis_functions")
-        pod.threebody[2] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "threebody_number_angular_basis_functions")
-        pod.threebody[3] = utils::inumeric(FLERR,words[1],false,lmp)-1;
-      if (keywd == "fourbody_bessel_polynomial_degree")
-        pod.fourbody[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_inverse_polynomial_degree")
-        pod.fourbody[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_number_radial_basis_functions")
-        pod.fourbody[2] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_twojmax")
-        pod.snaptwojmax = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_chemflag")
-        pod.snapchemflag = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_rfac0")
-        pod.snaprfac0 = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_neighbor_weight1")
-        pod.snapelementweight[0] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_neighbor_weight2")
-        pod.snapelementweight[1] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_neighbor_weight3")
-        pod.snapelementweight[2] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_neighbor_weight4")
-        pod.snapelementweight[3] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "fourbody_snap_neighbor_weight5")
-        pod.snapelementweight[4] = utils::numeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic_pod_potential")
-        pod.quadraticpod = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic22_number_twobody_basis_functions")
-        pod.quadratic22[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic22_number_twobody_basis_functions")
-        pod.quadratic22[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic23_number_twobody_basis_functions")
-        pod.quadratic23[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic23_number_threebody_basis_functions")
-        pod.quadratic23[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic24_number_twobody_basis_functions")
-        pod.quadratic24[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic24_number_fourbody_basis_functions")
-        pod.quadratic24[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic33_number_threebody_basis_functions")
-        pod.quadratic33[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic33_number_threebody_basis_functions")
-        pod.quadratic33[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic34_number_threebody_basis_functions")
-        pod.quadratic34[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic34_number_fourbody_basis_functions")
-        pod.quadratic34[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic44_number_fourbody_basis_functions")
-        pod.quadratic44[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "quadratic44_number_fourbody_basis_functions")
-        pod.quadratic44[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "cubic234_number_twobody_basis_functions")
-        pod.cubic234[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "cubic234_number_threebody_basis_functions")
-        pod.cubic234[1] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "cubic234_number_fourbody_basis_functions")
-        pod.cubic234[2] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "cubic333_number_threebody_basis_functions")
-        pod.cubic333[0] = utils::inumeric(FLERR,words[1],false,lmp);
-      if (keywd == "cubic444_number_fourbody_basis_functions")
-        pod.cubic444[0] = utils::inumeric(FLERR,words[1],false,lmp);
-    }
-  }
-
-  pod.twobody[0] = pod.besseldegree;
-  pod.twobody[1] = pod.inversedegree;
-  pod.threebody[0] = pod.besseldegree;
-  pod.threebody[1] = pod.inversedegree;
-
-  // number of snapshots
-
-  pod.ns2 = pod.nbesselpars*pod.twobody[0] + pod.twobody[1];
-  pod.ns3 = pod.nbesselpars*pod.threebody[0] + pod.threebody[1];
-  pod.ns4 = pod.nbesselpars*pod.fourbody[0] + pod.fourbody[1];
-
-  for (int i = 0; i < pod.nbesselpars; i++)
-    if (fabs(pod.besselparams[i]) < 1e-3) pod.besselparams[i] = 1e-3;
-
-  // allocate memory for eigenvectors and eigenvalues
-
-  if (pod.ns2 > 0) {
-    memory->create(pod.Phi2, pod.ns2*pod.ns2, "pod:pod_Phi2");
-    memory->create(pod.Lambda2, pod.ns2, "pod:pod_Lambda2");
-  }
-  if (pod.ns3 > 0) {
-    memory->create(pod.Phi3, pod.ns3*pod.ns3, "pod:pod_Phi3");
-    memory->create(pod.Lambda3, pod.ns3, "pod:pod_Lambda3");
-  }
-  if (pod.ns4 > 0) {
-    memory->create(pod.Phi4, pod.ns4*pod.ns4, "pod:pod_Phi4");
-    memory->create(pod.Lambda4, pod.ns4, "pod:pod_Lambda4");
-  }
-
-  if (pod.ns2 > 0) {
-    podeigenvaluedecomposition(pod.Phi2, pod.Lambda2, pod.besselparams, pod.rin, pod.rcut,
-      pod.twobody[0], pod.twobody[1], pod.nbesselpars, 2000);
-
-//     /* Print eigenvalues */
-//     print_matrix( "Eigenvalues for two-body potential:", 1, pod.ns2, pod.Lambda2, 1 );
-//
-//     /* Print eigenvectors */
-//     print_matrix( "Eigenvectors for two-body potential:", pod.ns2, pod.ns2, pod.Phi2, pod.ns2);
-  }
-  if (pod.ns3 > 0) {
-    podeigenvaluedecomposition(pod.Phi3, pod.Lambda3, pod.besselparams, pod.rin, pod.rcut,
-      pod.threebody[0], pod.threebody[1], pod.nbesselpars, 2000);
-  }
-  if (pod.ns4 > 0) {
-    podeigenvaluedecomposition(pod.Phi4, pod.Lambda4, pod.besselparams, pod.rin, pod.rcut,
-      pod.fourbody[0], pod.fourbody[1], pod.nbesselpars, 2000);
-  }
-
-  // number of chemical combinations
-
-  pod.nc2 = pod.nelements*(pod.nelements+1)/2;
-  pod.nc3 = pod.nelements*pod.nelements*(pod.nelements+1)/2;
-  pod.nc4 = pod.snapchemflag ? pod.nelements*pod.nelements*pod.nelements*pod.nelements : pod.nelements;
-
-  // number of basis functions and descriptors for one-body potential
-
-  if (pod.onebody==1) {
-    pod.nbf1 = 1;
-    pod.nd1 = pod.nelements;
-  } else {
-    pod.nbf1 = 0;
-    pod.nd1 = 0;
-  }
-
-  // number of basis functions and descriptors for two-body potential
-
-  pod.nbf2 = pod.twobody[2];
-  pod.nd2 = pod.nbf2*pod.nc2;
-
-  // number of basis functions and descriptors for three-body potential
-
-  pod.nrbf3 = pod.threebody[2];
-  pod.nabf3 = pod.threebody[3];
-  pod.nbf3 = pod.nrbf3*(1 + pod.nabf3);
-  pod.nd3 = pod.nbf3*pod.nc3;
-
-  // number of basis functions and descriptors for four-body potential
-
-  int twojmax = pod.snaptwojmax;
-  int idxb_count = 0;
-  if (twojmax > 0) {
-    for(int j1 = 0; j1 <= twojmax; j1++)
-      for(int j2 = 0; j2 <= j1; j2++)
-        for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-          if (j >= j1) idxb_count++;
-  }
-  pod.nbf4 = idxb_count;
-  pod.nd4 = pod.nbf4*pod.nc4;
-
-  if (pod.quadraticpod==1) {
-    pod.quadratic23[0] = pod.nbf2;
-    pod.quadratic23[1] = pod.nbf3;
-  }
-
-  pod.quadratic22[0] = MIN(pod.quadratic22[0], pod.nbf2);
-  pod.quadratic22[1] = MIN(pod.quadratic22[1], pod.nbf2);
-  pod.quadratic23[0] = MIN(pod.quadratic23[0], pod.nbf2);
-  pod.quadratic23[1] = MIN(pod.quadratic23[1], pod.nbf3);
-  pod.quadratic24[0] = MIN(pod.quadratic24[0], pod.nbf2);
-  pod.quadratic24[1] = MIN(pod.quadratic24[1], pod.nbf4);
-  pod.quadratic33[0] = MIN(pod.quadratic33[0], pod.nbf3);
-  pod.quadratic33[1] = MIN(pod.quadratic33[1], pod.nbf3);
-  pod.quadratic34[0] = MIN(pod.quadratic34[0], pod.nbf3);
-  pod.quadratic34[1] = MIN(pod.quadratic34[1], pod.nbf4);
-  pod.quadratic44[0] = MIN(pod.quadratic44[0], pod.nbf4);
-  pod.quadratic44[1] = MIN(pod.quadratic44[1], pod.nbf4);
-
-  pod.cubic234[0] = MIN(pod.cubic234[0], pod.nbf2);
-  pod.cubic234[1] = MIN(pod.cubic234[1], pod.nbf3);
-  pod.cubic234[2] = MIN(pod.cubic234[2], pod.nbf4);
-  pod.cubic333[0] = MIN(pod.cubic333[0], pod.nbf3);
-  pod.cubic333[1] = MIN(pod.cubic333[0], pod.nbf3);
-  pod.cubic333[2] = MIN(pod.cubic333[0], pod.nbf3);
-  pod.cubic444[0] = MIN(pod.cubic444[0], pod.nbf4);
-  pod.cubic444[1] = MIN(pod.cubic444[0], pod.nbf4);
-  pod.cubic444[2] = MIN(pod.cubic444[0], pod.nbf4);
-
-  // number of descriptors for quadratic POD potentials
-
-  pod.nd22 = pod.quadratic22[0]*pod.quadratic22[1]*pod.nc2*pod.nc2;
-  pod.nd23 = pod.quadratic23[0]*pod.quadratic23[1]*pod.nc2*pod.nc3;
-  pod.nd24 = pod.quadratic24[0]*pod.quadratic24[1]*pod.nc2*pod.nc4;
-  pod.nd33 = pod.quadratic33[0]*pod.quadratic33[1]*pod.nc3*pod.nc3;
-  pod.nd34 = pod.quadratic34[0]*pod.quadratic34[1]*pod.nc3*pod.nc4;
-  pod.nd44 = pod.quadratic44[0]*pod.quadratic44[1]*pod.nc4*pod.nc4;
-
-  int nq;
-  nq = pod.quadratic22[0]*pod.nc2; pod.nd22 = nq*(nq+1)/2;
-  nq = pod.quadratic33[0]*pod.nc3; pod.nd33 = nq*(nq+1)/2;
-  nq = pod.quadratic44[0]*pod.nc4; pod.nd44 = nq*(nq+1)/2;
-
-  // number of descriptors for cubic POD potentials
-
-  pod.nd234 = pod.cubic234[0]*pod.cubic234[1]*pod.cubic234[2]*pod.nc2*pod.nc3*pod.nc4;
-  nq = pod.cubic333[0]*pod.nc3; pod.nd333 = nq*(nq+1)*(nq+2)/6;
-  nq = pod.cubic444[0]*pod.nc4; pod.nd444 = nq*(nq+1)*(nq+2)/6;
-
-  // total number of descriptors for all POD potentials
-
-  pod.nd = pod.nd1 + pod.nd2 + pod.nd3 + pod.nd4 + pod.nd22 + pod.nd23 + pod.nd24 +
-       pod.nd33 + pod.nd34 + pod.nd44 + pod.nd234 + pod.nd333 + pod.nd444;
-  pod.nd1234 = pod.nd1 + pod.nd2 + pod.nd3 + pod.nd4;
-
-  int nelements = pod.nelements;
-  delete[] pod.elemindex;
-  pod.elemindex = new int[nelements*nelements];
-
-  int k = 1;
-  for (int i=0; i < nelements; i++) {
-    for (int j=i; j < nelements; j++) {
-      pod.elemindex[i + nelements*j] = k;
-      pod.elemindex[j + nelements*i] = k;
-      k += 1;
-    }
-  }
-
-  if (comm->me == 0) {
-    utils::logmesg(lmp, "**************** Begin of POD Potentials ****************\n");
-    utils::logmesg(lmp, "species: ");
-    for (int i=0; i<pod.nelements; i++)
-      utils::logmesg(lmp, "{} ", pod.species[i]);
-    utils::logmesg(lmp, "\n");
-    utils::logmesg(lmp, "periodic boundary conditions: {} {} {}\n", pod.pbc[0], pod.pbc[1], pod.pbc[2]);
-    utils::logmesg(lmp, "inner cut-off radius: {}\n", pod.rin);
-    utils::logmesg(lmp, "outer cut-off radius: {}\n", pod.rcut);
-    utils::logmesg(lmp, "bessel polynomial degree: {}\n", pod.besseldegree);
-    utils::logmesg(lmp, "inverse polynomial degree: {}\n", pod.inversedegree);
-    utils::logmesg(lmp, "one-body potential: {}\n", pod.onebody);
-    utils::logmesg(lmp, "two-body potential: {} {} {}\n", pod.twobody[0], pod.twobody[1], pod.twobody[2]);
-    utils::logmesg(lmp, "three-body potential: {} {} {} {}\n", pod.threebody[0], pod.threebody[1], pod.threebody[2], pod.threebody[3]+1);
-    utils::logmesg(lmp, "four-body SNAP potential: {} {}\n", pod.snaptwojmax, pod.snapchemflag);
-    utils::logmesg(lmp, "quadratic POD potential: {}\n", pod.quadraticpod);
-    utils::logmesg(lmp, "number of basis functions for one-body potential: {}\n", pod.nbf1);
-    utils::logmesg(lmp, "number of basis functions for two-body potential: {}\n", pod.nbf2);
-    utils::logmesg(lmp, "number of basis functions for three-body potential: {}\n", pod.nbf3);
-    utils::logmesg(lmp, "number of basis functions for four-body potential: {}\n", pod.nbf4);
-    utils::logmesg(lmp, "number of descriptors for one-body potential: {}\n", pod.nd1);
-    utils::logmesg(lmp, "number of descriptors for two-body potential: {}\n", pod.nd2);
-    utils::logmesg(lmp, "number of descriptors for three-body potential: {}\n", pod.nd3);
-    utils::logmesg(lmp, "number of descriptors for four-body potential: {}\n", pod.nd4);
-    utils::logmesg(lmp, "number of descriptors for quadratic POD potential: {}\n", pod.nd23);
-    utils::logmesg(lmp, "total number of descriptors for all potentials: {}\n", pod.nd);
-    utils::logmesg(lmp, "**************** End of POD Potentials ****************\n\n");
-  }
-}
-
-void MLPOD::read_coeff_file(const std::string &coeff_file)
-{
-
-  std::string coefffilename = coeff_file;
-  FILE *fpcoeff;
-  if (comm->me == 0) {
-
-    fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
-    if (fpcoeff == nullptr)
-      error->one(FLERR,"Cannot open POD coefficient file {}: ",
-                                   coefffilename, utils::getsyserror());
-  }
-
-  // check format for first line of file
-
-  char line[MAXLINE] = {'\0'};
-  char *ptr;
-  int eof = 0;
-  int nwords = 0;
-  while (nwords == 0) {
-    if (comm->me == 0) {
-      ptr = fgets(line,MAXLINE,fpcoeff);
-      if (ptr == nullptr) {
-        eof = 1;
-        fclose(fpcoeff);
-      }
-    }
-    MPI_Bcast(&eof,1,MPI_INT,0,world);
-    if (eof) break;
-    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
-
-    // strip comment, skip line if blank
-
-    nwords = utils::count_words(utils::trim_comment(line));
-  }
-
-  if (nwords != 2)
-    error->all(FLERR,"Incorrect format in POD coefficient file");
-
-  // strip single and double quotes from words
-
-  int ncoeffall;
-  std::string tmp_str;
-  try {
-    ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
-    tmp_str = words.next_string();
-    ncoeffall = words.next_int();
-  } catch (TokenizerException &e) {
-    error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what());
-  }
-
-  // loop over single block of coefficients and insert values in pod.coeff
-
-  memory->create(pod.coeff, ncoeffall, "pod:pod_coeff");
-
-  for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
-    if (comm->me == 0) {
-      ptr = fgets(line,MAXLINE,fpcoeff);
-      if (ptr == nullptr) {
-        eof = 1;
-        fclose(fpcoeff);
-      }
-    }
-
-    MPI_Bcast(&eof,1,MPI_INT,0,world);
-    if (eof)
-      error->all(FLERR,"Incorrect format in POD coefficient file");
-    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
-
-    try {
-      ValueTokenizer coeff(utils::trim_comment(line));
-      if (coeff.count() != 1)
-        error->all(FLERR,"Incorrect format in POD coefficient file");
-
-      pod.coeff[icoeff] = coeff.next_double();
-    } catch (TokenizerException &e) {
-      error->all(FLERR,"Incorrect format in POD coefficient file: {}", e.what());
-    }
-  }
-  if (comm->me == 0) {
-    if (!eof) fclose(fpcoeff);
-  }
-}
-
-/*********************************************************************************************************/
-
-void MLPOD::linear_descriptors(double *gd, double *efatom, double *y, double *tmpmem,
-                               int *atomtype, int *alist, int *pairlist, int * /*pairnum*/,
-                               int *pairnumsum, int *tmpint, int natom, int Nij)
-{
-  int dim = 3;
-  int nelements = pod.nelements;
-  int nbesselpars = pod.nbesselpars;
-  int nrbf2 = pod.nbf2;
-  int nabf3 = pod.nabf3;
-  int nrbf3 = pod.nrbf3;
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int nd4 = pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int *pdegree2 = pod.twobody;
-  int *elemindex = pod.elemindex;
-  double rin = pod.rin;
-  double rcut = pod.rcut;
-  double *Phi2 = pod.Phi2;
-  double *besselparams = pod.besselparams;
-
-  double *fatom1 = &efatom[0];
-  double *fatom2 = &efatom[dim*natom*(nd1)];
-  double *fatom3 = &efatom[dim*natom*(nd1+nd2)];
-  double *fatom4 = &efatom[dim*natom*(nd1+nd2+nd3)];
-  double *eatom1 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)];
-  double *eatom2 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*nd1];
-  double *eatom3 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*(nd1+nd2)];
-  double *eatom4 = &efatom[dim*natom*(nd1+nd2+nd3+nd4)+natom*(nd1+nd2+nd3)];
-
-  podArraySetValue(fatom1, 0.0, (1+dim)*natom*(nd1+nd2+nd3+nd4));
-
-  double *rij = &tmpmem[0]; // 3*Nij
-  int *ai = &tmpint[0];   // Nij
-  int *aj = &tmpint[Nij];   // Nij
-  int *ti = &tmpint[2*Nij]; // Nij
-  int *tj = &tmpint[3*Nij]; // Nij
-  podNeighPairs(rij, y, ai, aj, ti, tj, pairlist, pairnumsum, atomtype,
-        alist, natom, dim);
-
-  // peratom descriptors for one-body, two-body, and three-body linear potentials
-
-  poddesc(eatom1, fatom1, eatom2, fatom2, eatom3, fatom3, rij, Phi2, besselparams,
-      &tmpmem[3*Nij], rin, rcut, pairnumsum, atomtype, ai, aj, ti, tj, elemindex, pdegree2,
-      nbesselpars, nrbf2, nrbf3, nabf3, nelements, Nij, natom);
-
-  if (pod.snaptwojmax > 0)
-    snapdesc(eatom4, fatom4, rij, &tmpmem[3*Nij], atomtype, ai, aj, ti, tj, natom, Nij);
-
-  // global descriptors for one-body, two-body, three-body, and four-bodt linear potentials
-
-  podArraySetValue(tmpmem, 1.0, natom);
-
-  char cht = 'T';
-  double one = 1.0, zero = 0.0;
-  int inc1 = 1;
-  DGEMV(&cht, &natom, &nd1234, &one, eatom1, &natom, tmpmem, &inc1, &zero, gd, &inc1);
-}
-
-void MLPOD::quadratic_descriptors(double* d23, double *dd23, double* d2, double *d3, double* dd2, double *dd3,
-    int M2, int M3, int N)
-{
-  for (int m3 = 0; m3<M3; m3++)
-    for (int m2 = 0; m2<M2; m2++)
-    {
-      int m = m2 + M2*m3;
-      d23[m] = d2[m2]*d3[m3];
-      for (int n=0; n<N; n++)
-        dd23[n + N*m] = d2[m2]*dd3[n + N*m3] + dd2[n + N*m2]*d3[m3];
-    }
-}
-
-void MLPOD::quadratic_descriptors(double* d33, double *dd33, double *d3, double *dd3, int M3, int N)
-{
-  int m = 0;
-  for (int m3 = 0; m3<M3; m3++)
-    for (int m2 = m3; m2<M3; m2++)
-    {
-      d33[m] = d3[m2]*d3[m3];
-      for (int n=0; n<N; n++)
-        dd33[n + N*m] = d3[m2]*dd3[n + N*m3] + dd3[n + N*m2]*d3[m3];
-      m += 1;
-    }
-}
-
-void MLPOD::cubic_descriptors(double* d234, double *dd234, double* d2, double *d3, double *d4,
-    double* dd2, double *dd3, double *dd4, int M2, int M3, int M4, int N)
-{
-  for (int m4 = 0; m4<M4; m4++)
-    for (int m3 = 0; m3<M3; m3++)
-      for (int m2 = 0; m2<M2; m2++)
-      {
-        int m = m2 + M2*m3 + M2*M3*m4;
-        d234[m] = d2[m2]*d3[m3]*d4[m4];
-        for (int n=0; n<N; n++)
-          dd234[n + N*m] = d2[m2]*d3[m3]*dd4[n + N*m4] +
-                   d2[m2]*dd3[n + N*m3]*d4[m4] +
-                   dd2[n + N*m2]*d3[m3]*d4[m4];
-      }
-}
-
-void MLPOD::cubic_descriptors(double* d333, double *Dd333, double *d3, double *Dd3, int M3, int N)
-{
-  int m = 0;
-  for (int m3 = 0; m3<M3; m3++)
-    for (int m2 = m3; m2<M3; m2++)
-      for (int m1 = m2; m1<M3; m1++)
-      {
-        d333[m] = d3[m1]*d3[m2]*d3[m3];
-        for (int n=0; n<N; n++)
-          Dd333[n + N*m] = d3[m1]*d3[m2]*Dd3[n + N*m3] + d3[m1]*Dd3[n + N*m2]*d3[m3] + Dd3[n + N*m1]*d3[m2]*d3[m3];
-        m += 1;
-      }
-}
-
-double MLPOD::quadratic_coefficients(double *c2, double *c3, double *d2, double *d3,
-    double *coeff23, int *quadratic, int nc2, int nc3)
-{
-  int nd2 = quadratic[0]*nc2;
-  int nd3 = quadratic[1]*nc3;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int j=0; j< nd3; j++)
-    for (int k=0; k< nd2; k++) {
-      energy += coeff23[m]*d3[j]*d2[k];
-      c2[k] += coeff23[m]*d3[j];
-      c3[j] += coeff23[m]*d2[k];
-      m += 1;
-    }
-
-  return energy;
-}
-
-double MLPOD::quadratic_coefficients(double *c3, double *d3, double *coeff33,
-    int *quadratic, int nc3)
-{
-  int nd3 = quadratic[0]*nc3;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int j=0; j< nd3; j++)
-    for (int k=j; k< nd3; k++) {
-      energy += coeff33[m]*d3[j]*d3[k];
-      c3[k] += coeff33[m]*d3[j];
-      c3[j] += coeff33[m]*d3[k];
-      m += 1;
-    }
-
-  return energy;
-}
-
-double MLPOD::cubic_coefficients(double *c2, double *c3, double *c4, double *d2, double *d3, double *d4,
-    double *coeff234, int *cubic, int nc2, int nc3, int nc4)
-{
-  int nd2 = cubic[0]*nc2;
-  int nd3 = cubic[1]*nc3;
-  int nd4 = cubic[2]*nc4;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int i=0; i< nd4; i++)
-    for (int j=0; j< nd3; j++)
-      for (int k=0; k< nd2; k++) {
-        energy += coeff234[m]*d4[i]*d3[j]*d2[k];
-        c2[k] += coeff234[m]*d4[i]*d3[j];
-        c3[j] += coeff234[m]*d4[i]*d2[k];
-        c4[i] += coeff234[m]*d3[j]*d2[k];
-        m += 1;
-      }
-
-  return energy;
-}
-
-double MLPOD::cubic_coefficients(double *c3, double *d3, double *coeff333, int *cubic, int nc3)
-{
-  int nd3 = cubic[0]*nc3;
-
-  double energy = 0.0;
-
-  int m = 0;
-  for (int i=0; i< nd3; i++)
-    for (int j=i; j< nd3; j++)
-      for (int k=j; k< nd3; k++) {
-        energy += coeff333[m]*d3[i]*d3[j]*d3[k];
-        c3[k] += coeff333[m]*d3[i]*d3[j];
-        c3[j] += coeff333[m]*d3[i]*d3[k];
-        c3[i] += coeff333[m]*d3[j]*d3[k];
-        m += 1;
-      }
-
-  return energy;
-}
-
-double MLPOD::quadratic_coefficients(double *ce2, double *ce3, double *c2, double *c3, double *d2, double *d3,
-    double *coeff23, int *quadratic, int nc2, int nc3)
-{
-  int nd2 = quadratic[0]*nc2;
-  int nd3 = quadratic[1]*nc3;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int j=0; j< nd3; j++)
-    for (int k=0; k< nd2; k++) {
-      energy += coeff23[m]*d3[j]*d2[k];
-      c2[k] += coeff23[m]*d3[j];
-      c3[j] += coeff23[m]*d2[k];
-      ce2[k] += coeff23[m]*d3[j]/2.0;
-      ce3[j] += coeff23[m]*d2[k]/2.0;
-      m += 1;
-    }
-
-  return energy;
-}
-
-double MLPOD::quadratic_coefficients(double *ce3, double *c3, double *d3, double *coeff33,
-    int *quadratic, int nc3)
-{
-  int nd3 = quadratic[0]*nc3;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int j=0; j< nd3; j++)
-    for (int k=j; k< nd3; k++) {
-      energy += coeff33[m]*d3[j]*d3[k];
-      c3[k] += coeff33[m]*d3[j];
-      c3[j] += coeff33[m]*d3[k];
-      ce3[k] += coeff33[m]*d3[j];
-      ce3[j] += coeff33[m]*d3[k];
-      m += 1;
-    }
-
-  return energy;
-}
-
-double MLPOD::cubic_coefficients(double *ce2, double *ce3, double *ce4, double *c2, double *c3, double *c4,
-    double *d2, double *d3, double *d4, double *coeff234, int *cubic, int nc2, int nc3, int nc4)
-{
-  int nd2 = cubic[0]*nc2;
-  int nd3 = cubic[1]*nc3;
-  int nd4 = cubic[2]*nc4;
-
-  double energy = 0.0;
-  int m = 0;
-  for (int i=0; i< nd4; i++)
-    for (int j=0; j< nd3; j++)
-      for (int k=0; k< nd2; k++) {
-        energy += coeff234[m]*d4[i]*d3[j]*d2[k];
-        c2[k] += coeff234[m]*d4[i]*d3[j];
-        c3[j] += coeff234[m]*d4[i]*d2[k];
-        c4[i] += coeff234[m]*d3[j]*d2[k];
-        ce2[k] += coeff234[m]*d4[i]*d3[j]/3.0;
-        ce3[j] += coeff234[m]*d4[i]*d2[k]/3.0;
-        ce4[i] += coeff234[m]*d3[j]*d2[k]/3.0;
-        m += 1;
-      }
-
-  return energy;
-}
-
-double MLPOD::cubic_coefficients(double *ce3, double *c3, double *d3, double *coeff333, int *cubic, int nc3)
-{
-  int nd3 = cubic[0]*nc3;
-
-  double energy = 0.0;
-
-  int m = 0;
-  for (int i=0; i< nd3; i++)
-    for (int j=i; j< nd3; j++)
-      for (int k=j; k< nd3; k++) {
-        energy += coeff333[m]*d3[i]*d3[j]*d3[k];
-        c3[k] += coeff333[m]*d3[i]*d3[j];
-        c3[j] += coeff333[m]*d3[i]*d3[k];
-        c3[i] += coeff333[m]*d3[j]*d3[k];
-        ce3[k] += coeff333[m]*d3[i]*d3[j];
-        ce3[j] += coeff333[m]*d3[i]*d3[k];
-        ce3[i] += coeff333[m]*d3[j]*d3[k];
-        m += 1;
-      }
-
-  return energy;
-}
-
-double MLPOD::calculate_energyforce(double *force, double *gd, double *gdd, double *coeff, double *tmp, int natom)
-{
-  int dim = 3;
-  int nforce = dim*natom;
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int nd4 = pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int nd22 = pod.nd22;
-  int nd23 = pod.nd23;
-  int nd24 = pod.nd24;
-  int nd33 = pod.nd33;
-  int nd34 = pod.nd34;
-  int nd44 = pod.nd44;
-  int nd234 = pod.nd234;
-  int nd333 = pod.nd333;
-  int nd444 = pod.nd444;
-  int nc2 = pod.nc2;
-  int nc3 = pod.nc3;
-  int nc4 = pod.nc4;
-
-  // two-body, three-body, and four-body descriptors
-
-  double *d2 = &gd[nd1];
-  double *d3 = &gd[nd1+nd2];
-  double *d4 = &gd[nd1+nd2+nd3];
-
-  // quadratic and cubic POD coefficients
-
-  double *coeff22 = &coeff[nd1234];
-  double *coeff23 = &coeff[nd1234+nd22];
-  double *coeff24 = &coeff[nd1234+nd22+nd23];
-  double *coeff33 = &coeff[nd1234+nd22+nd23+nd24];
-  double *coeff34 = &coeff[nd1234+nd22+nd23+nd24+nd33];
-  double *coeff44 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34];
-  double *coeff234 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44];
-  double *coeff333 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234];
-  double *coeff444 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333];
-
-  // effective POD coefficients for calculating force
-
-  double *c1 = &tmp[0];
-  double *c2 = &tmp[nd1];
-  double *c3 = &tmp[nd1+nd2];
-  double *c4 = &tmp[nd1+nd2+nd3];
-
-  // calculate energy for linear potentials
-
-  double energy = 0.0;
-  for (int i=0; i< nd1234; i++) {
-    c1[i] = 0.0;
-    energy += coeff[i]*gd[i];
-  }
-
-  // calculate energy for quadratic22 potential
-
-  if (nd22 > 0) energy += quadratic_coefficients(c2, d2, coeff22, pod.quadratic22, nc2);
-
-  // calculate energy for quadratic23 potential
-
-  if (nd23 > 0) energy += quadratic_coefficients(c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3);
-
-  // calculate energy for quadratic24 potential
-
-  if (nd24 > 0) energy += quadratic_coefficients(c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4);
-
-  // calculate energy for quadratic33 potential
-
-  if (nd33 > 0) energy += quadratic_coefficients(c3, d3, coeff33, pod.quadratic33, nc3);
-
-  // calculate energy for quadratic34 potential
-
-  if (nd34 > 0) energy += quadratic_coefficients(c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4);
-
-  // calculate energy for quadratic44 potential
-
-  if (nd44 > 0) energy += quadratic_coefficients(c4, d4, coeff44, pod.quadratic44, nc4);
-
-  // calculate energy for cubic234 potential
-
-  if (nd234 > 0) energy += cubic_coefficients(c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4);
-
-  // calculate energy for cubic333 potential
-
-  if (nd333 > 0) energy += cubic_coefficients(c3, d3, coeff333, pod.cubic333, nc3);
-
-  // calculate energy for cubic444 potential
-
-  if (nd444 > 0) energy += cubic_coefficients(c4, d4, coeff444, pod.cubic444, nc4);
-
-  // calculate effective POD coefficients
-
-  for (int i=0; i< nd1234; i++) c1[i] += coeff[i];
-
-  // calculate force = gdd * c1
-
-  char chn = 'N';
-  double one = 1.0, zero = 0.0;
-  int inc1 = 1;
-  DGEMV(&chn, &nforce, &nd1234, &one, gdd, &nforce, c1, &inc1, &zero, force, &inc1);
-
-  return energy;
-}
-
-double MLPOD::energyforce_calculation(double *force, double *gd, double *gdd, double *coeff, double *y,
-  int *atomtype, int *alist, int *pairlist, int *pairnum, int *pairnumsum, int *tmpint, int natom, int Nij)
-{
-  int dim = 3;
-  int nd1234 = pod.nd1+pod.nd2+pod.nd3+pod.nd4;
-  double *tmpmem = &gdd[dim*natom*nd1234+natom*nd1234];
-
-  // calculate POD and SNAP descriptors and their derivatives
-
-  linear_descriptors(gd, gdd, y, tmpmem, atomtype, alist,
-      pairlist, pairnum, pairnumsum, tmpint, natom, Nij);
-
-  // calculate energy and force
-
-  double energy = 0.0;
-  energy = calculate_energyforce(force, gd, gdd, coeff, &gdd[dim*natom*nd1234], natom);
-
-  return energy;
-}
-
-void MLPOD::podNeighPairs(double *xij, double *x, int *ai, int *aj,  int *ti, int *tj,
-    int *pairlist, int *pairnumsum, int *atomtype, int *alist, int inum, int dim)
-{
-  for (int ii=0; ii<inum; ii++) {  // for each atom i in the simulation box
-    int i = ii;     // atom i
-    int itype = atomtype[i];
-    int start = pairnumsum[ii];
-    int m = pairnumsum[ii+1] - start; // number of neighbors around i
-    for (int l=0; l<m ; l++) {   // loop over each atom around atom i
-      int j = pairlist[l + start];  // atom j
-      int k = start + l;
-      ai[k]    = i;
-      aj[k]    = alist[j];
-      ti[k]    = itype;
-      tj[k]    = atomtype[alist[j]];
-      for (int d=0; d<dim; d++)
-        xij[k*dim+d]   = x[j*dim+d] -  x[i*dim+d];  // xj - xi
-    }
-  }
-};
-
-void MLPOD::podradialbasis(double *rbf, double *drbf, double *xij, double *besselparams, double rin,
-    double rmax, int besseldegree, int inversedegree, int nbesselpars, int N)
-{
-  for (int n=0; n<N; n++) {
-    double xij1 = xij[0+3*n];
-    double xij2 = xij[1+3*n];
-    double xij3 = xij[2+3*n];
-
-    double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3);
-    double dr1 = xij1/dij;
-    double dr2 = xij2/dij;
-    double dr3 = xij3/dij;
-
-    double r = dij - rin;
-    double y = r/rmax;
-    double y2 = y*y;
-    double y3 = 1.0 - y2*y;
-    double y4 = y3*y3 + 1e-6;
-    double y5 = sqrt(y4);
-    double y6 = exp(-1.0/y5);
-    double y7 = y4*sqrt(y4);
-    double fcut = y6/exp(-1.0);
-    double dfcut = ((3.0/(rmax*exp(-1.0)))*(y2)*y6*(y*y2 - 1.0))/y7;
-
-    for (int j=0; j<nbesselpars; j++) {
-      double alpha = besselparams[j];
-      if (fabs(alpha) <= 1.0e-6) alpha = 1e-3;
-      double x =  (1.0 - exp(-alpha*r/rmax))/(1.0-exp(-alpha));
-      double dx = (alpha/rmax)*exp(-(alpha*r/rmax))/(1.0 - exp(-alpha));
-
-      for (int i=0; i<besseldegree; i++) {
-        double a = (i+1)*MY_PI;
-        double b = (sqrt(2.0/(rmax))/(i+1));
-        int nij = n + N*i + N*besseldegree*j;
-        rbf[nij] = b*fcut*sin(a*x)/r;
-        double drbfdr = b*(dfcut*sin(a*x)/r - fcut*sin(a*x)/(r*r) + a*cos(a*x)*fcut*dx/r);
-        drbf[0 + 3*nij] = drbfdr*dr1;
-        drbf[1 + 3*nij] = drbfdr*dr2;
-        drbf[2 + 3*nij] = drbfdr*dr3;
-      }
-    }
-
-    for (int i=0; i<inversedegree; i++) {
-      int p = besseldegree*nbesselpars + i;
-      int nij = n + N*p;
-      double a = powint(dij, i+1);
-      rbf[nij] = fcut/a;
-      double drbfdr = dfcut/a - (i+1.0)*fcut/(a*dij);
-      drbf[0 + 3*nij] = drbfdr*dr1;
-      drbf[1 + 3*nij] = drbfdr*dr2;
-      drbf[2 + 3*nij] = drbfdr*dr3;
-    }
-  }
-}
-
-void MLPOD::podtally2b(double *eatom, double *fatom, double *eij, double *fij, int *ai, int *aj,
-    int *ti, int *tj, int *elemindex, int nelements, int nbf, int natom, int N)
-{
-  int nelements2 = nelements*(nelements+1)/2;
-  for (int n=0; n<N; n++) {
-    int i1 = ai[n];
-    int j1 = aj[n];
-    int typei = ti[n]-1;
-    int typej = tj[n]-1;
-    for (int m=0; m<nbf; m++) {
-      int im =  i1 + natom*((elemindex[typei + typej*nelements] - 1) + nelements2*m);
-      int jm =  j1 + natom*((elemindex[typei + typej*nelements] - 1) + nelements2*m);
-      int nm = n + N*m;
-      eatom[im] += eij[nm];
-      fatom[0 + 3*im] += fij[0 + 3*nm];
-      fatom[1 + 3*im] += fij[1 + 3*nm];
-      fatom[2 + 3*im] += fij[2 + 3*nm];
-      fatom[0 + 3*jm] -= fij[0 + 3*nm];
-      fatom[1 + 3*jm] -= fij[1 + 3*nm];
-      fatom[2 + 3*jm] -= fij[2 + 3*nm];
-    }
-  }
-}
-
-void MLPOD::pod1body(double *eatom, double *fatom, int *atomtype, int nelements, int natom)
-{
-  for (int m=1; m<=nelements; m++)
-    for (int i=0; i<natom; i++)
-      eatom[i + natom*(m-1)] = (atomtype[i] == m) ? 1.0 : 0.0;
-
-  for (int i=0; i<3*natom*nelements; i++)
-    fatom[i] = 0.0;
-}
-
-void MLPOD::pod3body(double *eatom, double *fatom, double *yij, double *e2ij, double *f2ij, double *tmpmem,
-       int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, int *tj, int nrbf, int nabf,
-       int nelements, int natom, int Nij)
-{
-  int dim = 3, nabf1 = nabf + 1;
-  int nelements2 = nelements*(nelements+1)/2;
-  int n, nijk, nijk3, typei, typej, typek, ij, ik, i, j, k;
-
-  double xij1, xij2, xij3, xik1, xik2, xik3;
-  double xdot, rijsq, riksq, rij, rik;
-  double costhe, sinthe, theta, dtheta;
-  double tm, tm1, tm2, dct1, dct2, dct3, dct4, dct5, dct6;
-  double uj, uk, rbf, drbf1, drbf2, drbf3, drbf4, drbf5, drbf6;
-  double eijk, fj1, fj2, fj3, fk1, fk2, fk3;
-
-  double *abf = &tmpmem[0];
-  double *dabf1 = &tmpmem[nabf1];
-  double *dabf2 = &tmpmem[2*nabf1];
-  double *dabf3 = &tmpmem[3*nabf1];
-  double *dabf4 = &tmpmem[4*nabf1];
-  double *dabf5 = &tmpmem[5*nabf1];
-  double *dabf6 = &tmpmem[6*nabf1];
-
-  for (int ii=0; ii<natom; ii++) {
-    int numneigh = pairnumsum[ii+1] - pairnumsum[ii];    // number of pairs (i,j) around i
-    int s = pairnumsum[ii];
-    for (int lj=0; lj<numneigh ; lj++) {   // loop over each atom j around atom i
-      ij = lj + s;
-      i = ai[ij];  // atom i
-      j = aj[ij];  // atom j
-      typei = ti[ij] - 1;
-      typej = tj[ij] - 1;
-      xij1 = yij[0+dim*ij];  // xj - xi
-      xij2 = yij[1+dim*ij];  // xj - xi
-      xij3 = yij[2+dim*ij];  // xj - xi
-      rijsq = xij1*xij1 + xij2*xij2 + xij3*xij3;
-      rij = sqrt(rijsq);
-      for (int lk=lj+1; lk<numneigh; lk++) { // loop over each atom k around atom i (k > j)
-        ik = lk + s;
-        k = aj[ik];  // atom k
-        typek = tj[ik] - 1;
-        xik1 = yij[0+dim*ik];  // xk - xi
-        xik2 = yij[1+dim*ik];  // xk - xi
-        xik3 = yij[2+dim*ik];  // xk - xi
-        riksq = xik1*xik1 + xik2*xik2 + xik3*xik3;
-        rik = sqrt(riksq);
-
-        xdot  = xij1*xik1 + xij2*xik2 + xij3*xik3;
-        costhe = xdot/(rij*rik);
-        costhe = costhe > 1.0 ? 1.0 : costhe;
-        costhe = costhe < -1.0 ? -1.0 : costhe;
-        xdot = costhe*(rij*rik);
-
-        sinthe = sqrt(1.0 - costhe*costhe);
-        sinthe = sinthe > 1e-12 ? sinthe : 1e-12;
-        theta = acos(costhe);
-        dtheta = -1.0/sinthe;
-
-        tm1 = 1.0/(rij*rijsq*rik);
-        tm2 = 1.0/(rij*riksq*rik);
-        dct1 = (xik1*rijsq - xij1*xdot)*tm1;
-        dct2 = (xik2*rijsq - xij2*xdot)*tm1;
-        dct3 = (xik3*rijsq - xij3*xdot)*tm1;
-        dct4 = (xij1*riksq - xik1*xdot)*tm2;
-        dct5 = (xij2*riksq - xik2*xdot)*tm2;
-        dct6 = (xij3*riksq - xik3*xdot)*tm2;
-
-        for (int p=0; p <nabf1; p++) {
-          abf[p] = cos(p*theta);
-          tm = -p*sin(p*theta)*dtheta;
-          dabf1[p] = tm*dct1;
-          dabf2[p] = tm*dct2;
-          dabf3[p] = tm*dct3;
-          dabf4[p] = tm*dct4;
-          dabf5[p] = tm*dct5;
-          dabf6[p] = tm*dct6;
-        }
-
-        for (int m=0; m<nrbf; m++) {
-          uj = e2ij[lj + s + Nij*m];
-          uk = e2ij[lk + s + Nij*m];
-          rbf = uj*uk;
-          drbf1 = f2ij[0 + dim*(lj + s) + dim*Nij*m]*uk;
-          drbf2 = f2ij[1 + dim*(lj + s) + dim*Nij*m]*uk;
-          drbf3 = f2ij[2 + dim*(lj + s) + dim*Nij*m]*uk;
-          drbf4 = f2ij[0 + dim*(lk + s) + dim*Nij*m]*uj;
-          drbf5 = f2ij[1 + dim*(lk + s) + dim*Nij*m]*uj;
-          drbf6 = f2ij[2 + dim*(lk + s) + dim*Nij*m]*uj;
-
-          for (int p=0; p <nabf1; p++) {
-            eijk = rbf*abf[p];
-            fj1 = drbf1*abf[p] + rbf*dabf1[p];
-            fj2 = drbf2*abf[p] + rbf*dabf2[p];
-            fj3 = drbf3*abf[p] + rbf*dabf3[p];
-            fk1 = drbf4*abf[p] + rbf*dabf4[p];
-            fk2 = drbf5*abf[p] + rbf*dabf5[p];
-            fk3 = drbf6*abf[p] + rbf*dabf6[p];
-
-            n = p + (nabf1)*m;
-            nijk = natom*((elemindex[typej + typek*nelements] - 1) + nelements2*typei + nelements2*nelements*n);
-            eatom[i + nijk] += eijk;
-
-            nijk3 = 3*i + 3*nijk;
-            fatom[0 + nijk3] += fj1 + fk1;
-            fatom[1 + nijk3] += fj2 + fk2;
-            fatom[2 + nijk3] += fj3 + fk3;
-
-            nijk3 = 3*j + 3*nijk;
-            fatom[0 + nijk3] -= fj1;
-            fatom[1 + nijk3] -= fj2;
-            fatom[2 + nijk3] -= fj3;
-
-            nijk3 = 3*k + 3*nijk;
-            fatom[0 + nijk3] -= fk1;
-            fatom[1 + nijk3] -= fk2;
-            fatom[2 + nijk3] -= fk3;
-          }
-        }
-      }
-    }
-  }
-}
-
-void MLPOD::poddesc(double *eatom1, double *fatom1, double *eatom2, double *fatom2, double *eatom3,
-      double *fatom3, double *rij, double *Phi, double *besselparams, double *tmpmem, double rin,
-      double rcut, int *pairnumsum, int *atomtype, int *ai, int *aj, int *ti, int *tj, int *elemindex,
-      int *pdegree, int nbesselpars, int nrbf2, int nrbf3, int nabf, int nelements, int Nij, int natom)
-{
-  int nrbf = MAX(nrbf2, nrbf3);
-  int ns = pdegree[0]*nbesselpars + pdegree[1];
-
-  double *e2ij = &tmpmem[0]; // Nij*nrbf
-  double *f2ij = &tmpmem[Nij*nrbf]; // dim*Nij*nrbf
-  double *e2ijt = &tmpmem[4*Nij*nrbf]; // Nij*ns
-  double *f2ijt = &tmpmem[4*Nij*nrbf+Nij*ns]; // dim*Nij*ns
-
-  // orthogonal radial basis functions
-
-  podradialbasis(e2ijt, f2ijt, rij, besselparams, rin, rcut-rin, pdegree[0], pdegree[1], nbesselpars, Nij);
-  podMatMul(e2ij, e2ijt, Phi, Nij, ns, nrbf);
-  podMatMul(f2ij, f2ijt, Phi, 3*Nij, ns, nrbf);
-
-  // one-body descriptors
-
-  pod1body(eatom1, fatom1, atomtype, nelements, natom);
-
-  // two-body descriptors
-
-  podtally2b(eatom2, fatom2, e2ij, f2ij, ai, aj, ti, tj, elemindex, nelements, nrbf2, natom, Nij);
-
-  // three-body descriptors
-
-  pod3body(eatom3, fatom3, rij, e2ij, f2ij, &tmpmem[4*Nij*nrbf], elemindex, pairnumsum,
-       ai, aj, ti, tj, nrbf3, nabf, nelements, natom, Nij);
-}
-
-void snapBuildIndexList(int *idx_max, int *idxz, int *idxz_block, int *idxb, int *idxb_block, int *idxu_block,
-    int *idxcg_block, int twojmax)
-{
-  // index list for cglist
-
-  int jdim = twojmax + 1;
-
-  int idxcg_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
-    idxcg_block[j + j2*jdim + j1*jdim*jdim] = idxcg_count;
-    for (int m1 = 0; m1 <= j1; m1++)
-      for (int m2 = 0; m2 <= j2; m2++)
-      idxcg_count++;
-    }
-  idx_max[0] = idxcg_count;
-
-  int idxu_count = 0;
-
-  for(int j = 0; j <= twojmax; j++) {
-  idxu_block[j] = idxu_count;
-  for(int mb = 0; mb <= j; mb++)
-    for(int ma = 0; ma <= j; ma++)
-    idxu_count++;
-  }
-  idx_max[1] = idxu_count;
-
-  // index list for beta and B
-
-  int idxb_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-    if (j >= j1) idxb_count++;
-
-  int idxb_max = idxb_count;
-  idx_max[2] = idxb_max;
-
-  idxb_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-    if (j >= j1) {
-      idxb[idxb_count*3 + 0] = j1;
-      idxb[idxb_count*3 + 1] = j2;
-      idxb[idxb_count*3 + 2] = j;
-      idxb_count++;
-    }
-
-  idxb_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
-    if (j >= j1) {
-      idxb_block[j + j2*jdim + j1*jdim*jdim] = idxb_count;
-      idxb_count++;
-    }
-    }
-
-  // index list for zlist
-
-  int idxz_count = 0;
-
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-    for (int mb = 0; 2*mb <= j; mb++)
-      for (int ma = 0; ma <= j; ma++)
-      idxz_count++;
-
-  int idxz_max = idxz_count;
-  idx_max[3] = idxz_max;
-
-  idxz_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
-    idxz_block[j + j2*jdim + j1*jdim*jdim] = idxz_count;
-
-    for (int mb = 0; 2*mb <= j; mb++)
-      for (int ma = 0; ma <= j; ma++) {
-
-      idxz[idxz_count*10 + 0] = j1;
-      idxz[idxz_count*10 + 1] = j2;
-      idxz[idxz_count*10 + 2] = j;
-      idxz[idxz_count*10 + 3] = MAX(0, (2 * ma - j - j2 + j1) / 2);
-      idxz[idxz_count*10 + 4] = (2 * ma - j - (2 * idxz[idxz_count*10 + 3] - j1) + j2) / 2;
-      idxz[idxz_count*10 + 5] = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count*10 + 3] + 1;
-      idxz[idxz_count*10 + 6] = MAX(0, (2 * mb - j - j2 + j1) / 2);
-      idxz[idxz_count*10 + 7] = (2 * mb - j - (2 * idxz[idxz_count*10 + 6] - j1) + j2) / 2;
-      idxz[idxz_count*10 + 8] = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count*10 + 6] + 1;
-
-      const int jju = idxu_block[j] + (j+1)*mb + ma;
-      idxz[idxz_count*10 + 9] = jju;
-
-      idxz_count++;
-      }
-    }
-};
-
-void snapInitRootpqArray(double *rootpqarray, int twojmax)
-{
-  int jdim = twojmax + 1;
-  for (int p = 1; p <= twojmax; p++)
-  for (int q = 1; q <= twojmax; q++)
-    rootpqarray[p*jdim + q] = sqrt(((double) p)/q);
-};
-
-double snapDeltacg(double *factorial, int j1, int j2, int j)
-{
-  double sfaccg = factorial[(j1 + j2 + j) / 2 + 1];
-  return sqrt(factorial[(j1 + j2 - j) / 2] *
-        factorial[(j1 - j2 + j) / 2] *
-        factorial[(-j1 + j2 + j) / 2] / sfaccg);
-};
-
-void snapInitClebschGordan(double *cglist, double *factorial, int twojmax)
-{
-  double sum,dcg,sfaccg;
-  int m, aa2, bb2, cc2;
-  int ifac;
-
-  int idxcg_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-  for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
-    for (int m1 = 0; m1 <= j1; m1++) {
-      aa2 = 2 * m1 - j1;
-
-      for (int m2 = 0; m2 <= j2; m2++) {
-
-      bb2 = 2 * m2 - j2;
-      m = (aa2 + bb2 + j) / 2;
-
-      if(m < 0 || m > j) {
-        cglist[idxcg_count] = 0.0;
-        idxcg_count++;
-        continue;
-      }
-
-      sum = 0.0;
-
-      for (int z = MAX(0, MAX(-(j - j2 + aa2)
-                  / 2, -(j - j1 - bb2) / 2));
-         z <= MIN((j1 + j2 - j) / 2,
-              MIN((j1 - aa2) / 2, (j2 + bb2) / 2));
-         z++) {
-        ifac = z % 2 ? -1 : 1;
-        sum += ifac /
-        (factorial[z] *
-         factorial[(j1 + j2 - j) / 2 - z] *
-         factorial[(j1 - aa2) / 2 - z] *
-         factorial[(j2 + bb2) / 2 - z] *
-         factorial[(j - j2 + aa2) / 2 + z] *
-         factorial[(j - j1 - bb2) / 2 + z]);
-      }
-
-      cc2 = 2 * m - j;
-      dcg = snapDeltacg(factorial, j1, j2, j);
-      sfaccg = sqrt(factorial[(j1 + aa2) / 2] *
-              factorial[(j1 - aa2) / 2] *
-              factorial[(j2 + bb2) / 2] *
-              factorial[(j2 - bb2) / 2] *
-              factorial[(j  + cc2) / 2] *
-              factorial[(j  - cc2) / 2] *
-              (j + 1));
-
-      cglist[idxcg_count] = sum * dcg * sfaccg;
-      idxcg_count++;
-      }
-    }
-    }
-}
-
-void snapInitSna(double *rootpqarray, double *cglist, double *factorial, int *idx_max, int *idxz,
-    int *idxz_block, int *idxb, int *idxb_block, int *idxu_block, int *idxcg_block, int twojmax)
-{
-  snapBuildIndexList(idx_max, idxz, idxz_block, idxb,
-      idxb_block, idxu_block, idxcg_block, twojmax);
-
-  snapInitRootpqArray(rootpqarray, twojmax);
-  snapInitClebschGordan(cglist, factorial, twojmax);
-}
-
-void MLPOD::snapSetup(int twojmax, int ntypes)
-{
-  sna.twojmax = twojmax;
-  sna.ntypes = ntypes;
-
-  int jdim = twojmax + 1;
-  int jdimpq = twojmax + 2;
-
-  memory->create(sna.map, ntypes+1, "pod:sna_map");
-  memory->create(sna.idxcg_block, jdim*jdim*jdim, "pod:sna_idxcg_block");
-  memory->create(sna.idxz_block, jdim*jdim*jdim, "pod:sna_idxz_block");
-  memory->create(sna.idxb_block, jdim*jdim*jdim, "pod:sna_idxb_block");
-  memory->create(sna.idxu_block, jdim, "pod:sna_idxu_block");
-  memory->create(sna.idx_max, 5, "pod:sna_idx_max");
-
-  int idxb_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-    for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-      if (j >= j1) idxb_count++;
-
-  int idxz_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-    for(int j2 = 0; j2 <= j1; j2++)
-    for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
-      for (int mb = 0; 2*mb <= j; mb++)
-      for (int ma = 0; ma <= j; ma++)
-        idxz_count++;
-
-  int idxcg_count = 0;
-  for(int j1 = 0; j1 <= twojmax; j1++)
-    for(int j2 = 0; j2 <= j1; j2++)
-      for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
-        for (int m1 = 0; m1 <= j1; m1++)
-          for (int m2 = 0; m2 <= j2; m2++)
-          idxcg_count++;
-      }
-
-  memory->create(sna.idxz, idxz_count*10, "pod:sna_idxz");
-  memory->create(sna.idxb, idxb_count*3, "pod:sna_idxb");
-  memory->create(sna.rcutsq, (ntypes+1)*(ntypes+1), "pod:sna_rcutsq");
-  memory->create(sna.radelem, ntypes+1, "pod:sna_radelem");
-  memory->create(sna.wjelem, ntypes+1, "pod:sna_wjelem");
-  memory->create(sna.rootpqarray, jdimpq*jdimpq, "pod:sna_rootpqarray");
-  memory->create(sna.cglist, idxcg_count, "pod:sna_cglist");
-  memory->create(sna.bzero, jdim, "pod:sna_bzero");
-  memory->create(sna.fac, 168, "pod:sna_fac");
-
-  for (int i=0; i<jdimpq*jdimpq; i++)
-    sna.rootpqarray[i] = 0;
-
-  double facn = 1.0;
-  for (int i=0; i<168; i++) {
-    sna.fac[i] = facn;
-    facn = facn*(i+1);
-  }
-
-  snapInitSna(sna.rootpqarray, sna.cglist, sna.fac, sna.idx_max, sna.idxz,
-    sna.idxz_block, sna.idxb, sna.idxb_block, sna.idxu_block, sna.idxcg_block, sna.twojmax);
-
-  sna.idxcg_max = sna.idx_max[0];
-  sna.idxu_max = sna.idx_max[1];
-  sna.idxb_max = sna.idx_max[2];
-  sna.idxz_max = sna.idx_max[3];
-}
-
-void MLPOD::InitSnap()
-{
-  double *elemradius = pod.snapelementradius;
-  double *elemweight = pod.snapelementweight;
-  double rcutfac = pod.rcut;
-  double rmin0 = 0.0;
-  double rfac0 = pod.snaprfac0;
-  int twojmax = pod.snaptwojmax;
-  int ntypes = pod.nelements;
-  int chemflag = pod.snapchemflag;
-
-  int bzeroflag = 0;
-  int switchflag = 1;
-  int wselfallflag = 0;
-  int bnormflag = chemflag;
-  double wself=1.0;
-
-  // calculate maximum cutoff for all elements
-
-  double rcutmax = 0.0;
-  for (int ielem = 0; ielem < ntypes; ielem++)
-    rcutmax = MAX(2.0*elemradius[ielem]*rcutfac,rcutmax);
-
-  snapSetup(twojmax, ntypes);
-  for (int i=0; i<ntypes; i++) {
-    sna.radelem[1+i] = elemradius[i];
-    sna.wjelem[1+i] = elemweight[i];
-  }
-  podArrayFill(&sna.map[1], (int) 0, ntypes);
-
-  for (int i=0; i<ntypes; i++) {
-    for (int j=0; j<ntypes; j++) {
-      double cut = (elemradius[i] + elemradius[j])*rcutfac;
-      sna.rcutsq[j+1 + (i+1)*(ntypes+1)] = cut*cut;
-    }
-  }
-
-  // bzeroflag is currently always 0
-#if 0
-  if (bzeroflag) {
-    double www = wself*wself*wself;
-    for (int j = 0; j <= twojmax; j++)
-      if (bnormflag)
-        sna.bzero[j] = www;
-      else
-        sna.bzero[j] = www*(j+1);
-  }
-#endif
-
-  int nelements = ntypes;
-  if (!chemflag)
-    nelements = 1;
-
-  sna.nelements = nelements;
-  sna.ndoubles = nelements*nelements;   // number of multi-element pairs
-  sna.ntriples = nelements*nelements*nelements;   // number of multi-element triplets
-  sna.bnormflag = bnormflag;
-  sna.chemflag = chemflag;
-  sna.switchflag = switchflag;
-  sna.bzeroflag = bzeroflag;
-  sna.wselfallflag = wselfallflag;
-  sna.wself = wself;
-  sna.rmin0 = rmin0;
-  sna.rfac0 = rfac0;
-  sna.rcutfac = rcutfac;
-  sna.rcutmax = rcutmax;
-  sna.ncoeff = sna.idxb_max*sna.ntriples;
-}
-
-void MLPOD::snapComputeUlist(double *Sr, double *Si, double *dSr, double *dSi, double *rootpqarray, double *rij,
-    double *wjelem, double *radelem, double rmin0, double rfac0, double rcutfac, int *idxu_block,
-    int *ti, int *tj, int twojmax, int idxu_max, int ijnum, int switch_flag)
-{
-  double *Srx = &dSr[0];
-  double *Sry = &dSr[idxu_max*ijnum];
-  double *Srz = &dSr[2*idxu_max*ijnum];
-  double *Six = &dSi[0];
-  double *Siy = &dSi[idxu_max*ijnum];
-  double *Siz = &dSi[2*idxu_max*ijnum];
-
-  for(int ij=0; ij<ijnum; ij++) {
-  double x = rij[ij*3+0];
-  double y = rij[ij*3+1];
-  double z = rij[ij*3+2];
-  double rsq = x * x + y * y + z * z;
-  double r = sqrt(rsq);
-  double rinv = 1.0 / r;
-  double ux = x * rinv;
-  double uy = y * rinv;
-  double uz = z * rinv;
-
-  double rcutij = (radelem[ti[ij]]+radelem[tj[ij]])*rcutfac;
-  double rscale0 = rfac0 * MY_PI / (rcutij - rmin0);
-  double theta0 = (r - rmin0) * rscale0;
-  double z0 = r / tan(theta0);
-  double dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
-
-  double sfac = 0.0, dsfac = 0.0;
-  if (switch_flag == 0) {
-    sfac = 1.0;
-    dsfac = 0.0;
-  }
-  else if (switch_flag == 1) {
-    if (r <= rmin0) {
-      sfac = 1.0;
-      dsfac = 0.0;
-    }
-    else if(r > rcutij) {
-      sfac = 0.0;
-      dsfac = 0.0;
-    }
-    else {
-      double rcutfac0 = MY_PI / (rcutij - rmin0);
-      sfac =  0.5 * (cos((r - rmin0) * rcutfac0) + 1.0);
-      dsfac = -0.5 * sin((r - rmin0) * rcutfac0) * rcutfac0;
-    }
-  }
-  sfac *= wjelem[tj[ij]];
-  dsfac *= wjelem[tj[ij]];
-
-  double r0inv, dr0invdr;
-  double a_r, a_i, b_r, b_i;
-  double da_r[3], da_i[3], db_r[3], db_i[3];
-  double dz0[3], dr0inv[3];
-  double rootpq;
-  int jdim = twojmax + 1;
-
-  r0inv = 1.0 / sqrt(r * r + z0 * z0);
-  a_r = r0inv * z0;
-  a_i = -r0inv * z;
-  b_r = r0inv * y;
-  b_i = -r0inv * x;
-
-  dr0invdr = -cube(r0inv) * (r + z0 * dz0dr);
-
-  dr0inv[0] = dr0invdr * ux;
-  dr0inv[1] = dr0invdr * uy;
-  dr0inv[2] = dr0invdr * uz;
-
-  dz0[0] = dz0dr * ux;
-  dz0[1] = dz0dr * uy;
-  dz0[2] = dz0dr * uz;
-
-  for (int k = 0; k < 3; k++) {
-    da_r[k] = dz0[k] * r0inv + z0 * dr0inv[k];
-    da_i[k] = -z * dr0inv[k];
-  }
-  da_i[2] += -r0inv;
-
-  for (int k = 0; k < 3; k++) {
-    db_r[k] = y * dr0inv[k];
-    db_i[k] = -x * dr0inv[k];
-  }
-  db_i[0] += -r0inv;
-  db_r[1] += r0inv;
-
-  Sr[ij+0*ijnum] = 1.0;
-  Si[ij+0*ijnum] = 0.0;
-  Srx[ij+0*ijnum] = 0.0;
-  Six[ij+0*ijnum] = 0.0;
-  Sry[ij+0*ijnum] = 0.0;
-  Siy[ij+0*ijnum] = 0.0;
-  Srz[ij+0*ijnum] = 0.0;
-  Siz[ij+0*ijnum] = 0.0;
-  for (int j = 1; j <= twojmax; j++) {
-    int jju = idxu_block[j];
-    int jjup = idxu_block[j-1];
-
-    // fill in left side of matrix layer from previous layer
-
-    for (int mb = 0; 2*mb <= j; mb++) {
-      Sr[ij+jju*ijnum] = 0.0;
-      Si[ij+jju*ijnum] = 0.0;
-      Srx[ij+jju*ijnum] = 0.0;
-      Six[ij+jju*ijnum] = 0.0;
-      Sry[ij+jju*ijnum] = 0.0;
-      Siy[ij+jju*ijnum] = 0.0;
-      Srz[ij+jju*ijnum] = 0.0;
-      Siz[ij+jju*ijnum] = 0.0;
-      for (int ma = 0; ma < j; ma++) {
-        rootpq = rootpqarray[(j - ma)*jdim + (j - mb)];
-        int njju = ij+jju*ijnum;
-        int njju1 = ij+(jju+1)*ijnum;
-        int njjup = ij+jjup*ijnum;
-        double u_r = Sr[njjup];
-        double u_i = Si[njjup];
-        double ux_r = Srx[njjup];
-        double ux_i = Six[njjup];
-        double uy_r = Sry[njjup];
-        double uy_i = Siy[njjup];
-        double uz_r = Srz[njjup];
-        double uz_i = Siz[njjup];
-
-        Sr[njju] += rootpq * (a_r * u_r + a_i * u_i);
-        Si[njju] += rootpq * (a_r * u_i - a_i * u_r);
-        Srx[njju] += rootpq * (da_r[0] * u_r + da_i[0] * u_i + a_r * ux_r + a_i * ux_i);
-        Six[njju] += rootpq * (da_r[0] * u_i - da_i[0] * u_r + a_r * ux_i - a_i * ux_r);
-        Sry[njju] += rootpq * (da_r[1] * u_r + da_i[1] * u_i + a_r * uy_r + a_i * uy_i);
-        Siy[njju] += rootpq * (da_r[1] * u_i - da_i[1] * u_r + a_r * uy_i - a_i * uy_r);
-        Srz[njju] += rootpq * (da_r[2] * u_r + da_i[2] * u_i + a_r * uz_r + a_i * uz_i);
-        Siz[njju] += rootpq * (da_r[2] * u_i - da_i[2] * u_r + a_r * uz_i - a_i * uz_r);
-
-        rootpq = rootpqarray[(ma + 1)*jdim + (j - mb)];
-        Sr[njju1] = -rootpq * (b_r * u_r + b_i * u_i);
-        Si[njju1] = -rootpq * (b_r * u_i - b_i * u_r);
-        Srx[njju1] = -rootpq * (db_r[0] * u_r + db_i[0] * u_i + b_r * ux_r + b_i * ux_i);
-        Six[njju1] = -rootpq * (db_r[0] * u_i - db_i[0] * u_r + b_r * ux_i - b_i * ux_r);
-        Sry[njju1] = -rootpq * (db_r[1] * u_r + db_i[1] * u_i + b_r * uy_r + b_i * uy_i);
-        Siy[njju1] = -rootpq * (db_r[1] * u_i - db_i[1] * u_r + b_r * uy_i - b_i * uy_r);
-        Srz[njju1] = -rootpq * (db_r[2] * u_r + db_i[2] * u_i + b_r * uz_r + b_i * uz_i);
-        Siz[njju1] = -rootpq * (db_r[2] * u_i - db_i[2] * u_r + b_r * uz_i - b_i * uz_r);
-        jju++;
-        jjup++;
-      }
-      jju++;
-    }
-
-    jju = idxu_block[j];
-    jjup = jju+(j+1)*(j+1)-1;
-    int mbpar = 1;
-    for (int mb = 0; 2*mb <= j; mb++) {
-      int mapar = mbpar;
-      for (int ma = 0; ma <= j; ma++) {
-        int njju = ij+jju*ijnum;
-        int njjup = ij+jjup*ijnum;
-        if (mapar == 1) {
-          Sr[njjup] = Sr[njju];
-          Si[njjup] = -Si[njju];
-          if (j%2==1 && mb==(j/2)) {
-          Srx[njjup] =  Srx[njju];
-          Six[njjup] = -Six[njju];
-          Sry[njjup] =  Sry[njju];
-          Siy[njjup] = -Siy[njju];
-          Srz[njjup] =  Srz[njju];
-          Siz[njjup] = -Siz[njju];
-          }
-        } else {
-          Sr[njjup] = -Sr[njju];
-          Si[njjup] =  Si[njju];
-          if (j%2==1 && mb==(j/2)) {
-          Srx[njjup] = -Srx[njju];
-          Six[njjup] =  Six[njju];
-          Sry[njjup] = -Sry[njju];
-          Siy[njjup] =  Siy[njju];
-          Srz[njjup] = -Srz[njju];
-          Siz[njjup] =  Siz[njju];
-          }
-        }
-        mapar = -mapar;
-        jju++;
-        jjup--;
-      }
-      mbpar = -mbpar;
-    }
-  }
-
-  for (int j = 0; j <= twojmax; j++) {
-    int jju = idxu_block[j];
-    for (int mb = 0; 2*mb <= j; mb++)
-      for (int ma = 0; ma <= j; ma++) {
-      int ijk = ij+jju*ijnum;
-      Srx[ijk] = dsfac * Sr[ijk] * ux + sfac * Srx[ijk];
-      Six[ijk] = dsfac * Si[ijk] * ux + sfac * Six[ijk];
-      Sry[ijk] = dsfac * Sr[ijk] * uy + sfac * Sry[ijk];
-      Siy[ijk] = dsfac * Si[ijk] * uy + sfac * Siy[ijk];
-      Srz[ijk] = dsfac * Sr[ijk] * uz + sfac * Srz[ijk];
-      Siz[ijk] = dsfac * Si[ijk] * uz + sfac * Siz[ijk];
-      jju++;
-      }
-  }
-
-  for (int k=0; k<idxu_max; k++) {
-    int ijk = ij + ijnum*k;
-    Sr[ijk] = sfac*Sr[ijk];
-    Si[ijk] = sfac*Si[ijk];
-  }
-  }
-};
-
-void MLPOD::snapZeroUarraytot2(double *Stotr, double *Stoti, double wself, int *idxu_block,
-                               int *type, int *map, int * /*ai*/, int wselfall_flag, int chemflag,
-                               int idxu_max, int nelements, int twojmax, int inum)
-{
-  int N1 = inum;
-  int N2 = N1*(twojmax+1);
-  int N3 = N2*nelements;
-
-  for (int idx=0; idx < N3; idx++) {
-    int l = idx%N2;  // inum*(twojmax+1)
-    int ii = l%N1;  // inum
-    int j = (l-ii)/N1; // (twojmax+1)
-    int jelem = (idx-l)/N2; // nelements
-    int ielem = (chemflag) ? map[type[ii]]: 0;
-    int jju = idxu_block[j];
-    for (int mb = 0; mb <= j; mb++) {
-      for (int ma = 0; ma <= j; ma++) {
-        int n = ii + inum*jju + inum*idxu_max*jelem;
-        Stotr[n] = 0.0;
-        Stoti[n] = 0.0;
-        if (jelem == ielem || wselfall_flag)
-          if (ma==mb)
-            Stotr[n] = wself; ///// double check this
-        jju++;
-      }
-    }
-
-  }
-};
-
- void snapKernelAddUarraytot(double *Stotr, double *Stoti, double *Sr, double *Si,
-    int *map, int *ai, int *tj, int inum, int ijnum, int N1, int N2, int chemflag)
-{
-  for (int idx=0; idx < N2; idx++) {
-    int ij = idx%ijnum;  // ijnum
-    int jju = (idx-ij)/ijnum;  // idxu_max
-    int jelem = (chemflag) ? map[tj[ij]] : 0;
-    int i = ai[ij] + inum*jju + N1*jelem;
-    Stotr[i] += Sr[idx];
-    Stoti[i] += Si[idx];
-  }
-};
- void snapKernelAddUarraytot(double *Stotr, double *Stoti, double *Sr, double *Si,
-    int *ai, int inum, int ijnum, int N2)
-{
-  for (int idx=0; idx < N2; idx++) {
-    int ij = idx%ijnum;  // ijnum
-    int jju = (idx-ij)/ijnum;  // idxu_max
-    int i = ai[ij] + inum*jju;
-    Stotr[i] += Sr[idx];
-    Stoti[i] += Si[idx];
-  }
-};
-void MLPOD::snapAddUarraytot(double *Stotr, double *Stoti, double *Sr,
-    double *Si, int *map, int *ai, int *tj, int idxu_max, int inum, int ijnum, int chemflag)
-{
-  int N1 = inum*idxu_max;
-  int N2 = ijnum*idxu_max;
-  if (chemflag==0) {
-    snapKernelAddUarraytot(Stotr, Stoti, Sr, Si, ai, inum, ijnum, N2);
-  } else
-    snapKernelAddUarraytot(Stotr, Stoti, Sr, Si, map, ai, tj, inum, ijnum, N1, N2, chemflag);
-};
-
-void MLPOD::snapComputeZi2(double *zlist_r, double *zlist_i, double *Stotr, double *Stoti,
-    double *cglist, int *idxz, int *idxu_block, int *idxcg_block, int twojmax, int idxu_max,
-    int idxz_max, int nelements, int bnorm_flag, int inum)
-{
-  int jdim = twojmax + 1;
-  int N1 = inum;
-  int N2 = N1*idxz_max;
-  int N3 = N2*nelements*nelements;
-  for (int idx=0; idx < N3; idx++) {
-    int l = idx%N2;   //  inum*idxz_max
-    int ii = l%inum;  // inum
-    int jjz = (l-ii)/inum; // idxz_max
-    int ielem = (idx-l)/N2;  // nelements*nelements
-    int elem2 = ielem%nelements; // nelements
-    int elem1 = (ielem-elem2)/nelements; // nelements
-
-    const int j1 = idxz[jjz*10+0];
-    const int j2 = idxz[jjz*10+1];
-    const int j = idxz[jjz*10+2];
-    const int ma1min = idxz[jjz*10+3];
-    const int ma2max = idxz[jjz*10+4];
-    const int na = idxz[jjz*10+5];
-    const int mb1min = idxz[jjz*10+6];
-    const int mb2max = idxz[jjz*10+7];
-    const int nb = idxz[jjz*10+8];
-    int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
-    int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
-    int icgb = mb1min * (j2 + 1) + mb2max;
-
-    const double *cgblock = cglist + idxcg_block[j + j2*jdim + j1*jdim*jdim];
-    double qr = 0.0;
-    double qi = 0.0;
-    for (int ib = 0; ib < nb; ib++) {
-      double suma1_r = 0.0;
-      double suma1_i = 0.0;
-
-      // Stotr has size inum*idxu_max*nelements
-
-      const double *u1_r = &Stotr[ii + inum*jju1 + inum*idxu_max*elem1];
-      const double *u1_i = &Stoti[ii + inum*jju1 + inum*idxu_max*elem1];
-      const double *u2_r = &Stotr[ii + inum*jju2 + inum*idxu_max*elem2];
-      const double *u2_i = &Stoti[ii + inum*jju2 + inum*idxu_max*elem2];
-
-      int ma1 = ma1min;
-      int ma2 = ma2max;
-      int icga = ma1min * (j2 + 1) + ma2max;
-
-      for (int ia = 0; ia < na; ia++) {
-        suma1_r += cgblock[icga] * (u1_r[inum*ma1] * u2_r[inum*ma2] - u1_i[inum*ma1] * u2_i[inum*ma2]);
-        suma1_i += cgblock[icga] * (u1_r[inum*ma1] * u2_i[inum*ma2] + u1_i[inum*ma1] * u2_r[inum*ma2]);
-        ma1++;
-        ma2--;
-        icga += j2;
-      } // end loop over ia
-
-      qr += cgblock[icgb] * suma1_r;
-      qi += cgblock[icgb] * suma1_i;
-
-      jju1 += j1 + 1;
-      jju2 -= j2 + 1;
-      icgb += j2;
-    } // end loop over ib
-
-    if (bnorm_flag) {
-      qr /= (j+1);
-      qi /= (j+1);
-    }
-
-    zlist_r[idx] = qr;
-    zlist_i[idx] = qi;
-  }
-};
-
-void snapKernelComputeBi1(double *blist, double *zlist_r, double *zlist_i,
-    double *Stotr, double *Stoti, int *idxb, int *idxu_block, int *idxz_block, int jdim,
-    int nelements, int nelemsq, int nz_max, int nu_max, int inum, int N2, int N3)
-{
-  for (int idx=0; idx < N3; idx++) {
-    int l = idx%N2;
-    int ii = l%inum;
-    int jjb = (l-ii)/inum;
-    int jelem = (idx-l)/N2;
-    int k = jelem%nelemsq;
-    int elem3 = k%nelements;
-    int elem2 = (k-elem3)/nelements;
-    int elem1 = (jelem-k)/nelemsq;
-    int idouble = elem2 + nelements*elem1;
-    const int j1 = idxb[jjb*3 + 0];
-    const int j2 = idxb[jjb*3 + 1];
-    const int j = idxb[jjb*3 + 2];
-
-    int jjz = idxz_block[j + j2*jdim + j1*jdim*jdim];
-    int jju = idxu_block[j];
-    int idu;
-    int idz;
-    double sumzu = 0.0;
-    for (int mb = 0; 2 * mb < j; mb++)
-      for (int ma = 0; ma <= j; ma++) {
-        idu = ii + inum*jju + nu_max*elem3;
-        idz = ii + inum*jjz + nz_max*idouble;
-        sumzu += Stotr[idu] * zlist_r[idz] + Stoti[idu] * zlist_i[idz];
-        jjz++;
-        jju++;
-      } // end loop over ma, mb
-
-    // for j even, handle middle column
-
-    if (j % 2 == 0) {
-      int mb = j / 2;
-      for (int ma = 0; ma < mb; ma++) {
-        idu = ii + inum*jju + nu_max*elem3;
-        idz = ii + inum*jjz + nz_max*idouble;
-        sumzu += Stotr[idu] * zlist_r[idz] + Stoti[idu] * zlist_i[idz];
-        jjz++;
-        jju++;
-      }
-      idu = ii + inum*jju + nu_max*elem3;
-      idz = ii + inum*jjz + nz_max*idouble;
-      sumzu += 0.5 * (Stotr[idu] * zlist_r[idz] + Stoti[idu] * zlist_i[idz]);
-    } // end if jeven
-
-    blist[idx] = 2.0 * sumzu;
-  }
-}
-void snapKernelComputeBi2(double *blist, double *bzero,int *ilist, int *type,
-     int *map, int *idxb, int nelements, int nb_max, int inum, int N2, int chemflag)
-{
-  for (int idx=0; idx < N2; idx++) {
-    int ii = idx%inum;
-    int jjb = (idx-ii)/inum;
-
-    int ielem = (chemflag) ? map[type[ilist[ii]]]: 0;
-    int itriple = (ielem*nelements+ielem)*nelements+ielem;
-
-    const int j = idxb[jjb*3 + 2];
-    blist[ii + inum*jjb + nb_max*itriple] -= bzero[j];
-  }
-}
-void snapKernelComputeBi4(double *blist, double *bzero,
-     int *idxb, int inum, int N2, int N3)
-{
-  for (int idx=0; idx < N3; idx++) {
-    int l = idx%N2;
-    int ii = l%inum;
-    int jjb = (l-ii)/inum;
-    int j = idxb[jjb*3 + 2];
-    blist[idx] -= bzero[j];
-  }
-}
-void MLPOD::snapComputeBi1(double *blist, double *zlist_r, double *zlist_i, double *Stotr, double *Stoti,
-    int *idxb, int *idxu_block, int *idxz_block, int twojmax, int idxb_max, int idxu_max,
-    int idxz_max, int nelements, int inum)
-{
-  int nelemsq = nelements*nelements;
-  int nz_max = idxz_max*inum;
-  int nu_max = idxu_max*inum;
-  int N2 = inum*idxb_max;
-  int N3 = N2*nelements*nelemsq;
-  int jdim = twojmax+1;
-
-  snapKernelComputeBi1(blist, zlist_r, zlist_i, Stotr, Stoti, idxb, idxu_block, idxz_block,
-      jdim, nelements, nelemsq, nz_max, nu_max, inum, N2, N3);
-
-};
-void snapComputeBi2(double *blist, double *zlist_r, double *zlist_i, double *Stotr, double *Stoti,
-    double *bzero, int *ilist, int *type, int *map, int *idxb, int *idxu_block, int *idxz_block,
-    int twojmax, int idxb_max, int idxu_max, int idxz_max, int nelements, int bzero_flag,
-    int wselfall_flag, int chemflag, int inum)
-{
-  int nelemsq = nelements*nelements;
-  int nz_max = idxz_max*inum;
-  int nu_max = idxu_max*inum;
-  int nb_max = idxb_max*inum;
-  int N2 = inum*idxb_max;
-  int N3 = N2*nelements*nelemsq;
-  int jdim = twojmax+1;
-
-  snapKernelComputeBi1(blist, zlist_r, zlist_i, Stotr, Stoti,
-      idxb, idxu_block, idxz_block, jdim, nelements, nelemsq, nz_max, nu_max, inum, N2, N3);
-
-  if (bzero_flag) {
-    if (!wselfall_flag) {
-      snapKernelComputeBi2(blist, bzero, ilist, type, map,
-          idxb, nelements, nb_max, inum, N2, chemflag);
-    }
-    else {
-      snapKernelComputeBi4(blist, bzero, idxb, inum, N2, N3);
-    }
-  }
-};
-
-void MLPOD::snapComputeDbidrj(double *dblist, double *zlist_r, double *zlist_i,
-    double *dulist_r, double *dulist_i, int *idxb, int *idxu_block, int *idxz_block,
-    int *map, int *ai, int *tj, int twojmax, int idxb_max, int idxu_max, int idxz_max,
-    int nelements, int bnorm_flag, int chemflag, int inum, int ijnum)
-{
-  int nz_max = idxz_max*inum;
-  int nb_max = idxb_max*ijnum;
-  int nu_max = idxu_max*ijnum;
-  int N2 = ijnum*idxb_max;
-  int jdim = twojmax+1;
-
-  for (int i=0; i<nb_max*3*nelements*nelements*nelements; i++)
-    dblist[i] = 0.0;
-
-  for (int idx=0; idx < N2; idx++) {
-    int ij = idx%ijnum;
-    int jjb = (idx-ij)/ijnum;
-    int elem3 = (chemflag) ? map[tj[ij]] : 0;
-    int i = ai[ij]; // atom i
-    const int j1 = idxb[jjb*3 + 0];
-    const int j2 = idxb[jjb*3 + 1];
-    const int j = idxb[jjb*3 + 2];
-
-    for(int elem1 = 0; elem1 < nelements; elem1++)
-      for(int elem2 = 0; elem2 < nelements; elem2++) {
-
-      int jjz = idxz_block[j + j2*jdim + j1*jdim*jdim];
-      int jju = idxu_block[j];
-      int idouble = elem1*nelements+elem2;
-      int itriple = (elem1*nelements+elem2)*nelements+elem3;
-      int nimax = nz_max*idouble;
-
-      double *dbdr = &dblist[nb_max*3*itriple];
-      double sumzdu_r[3];
-      for (int k = 0; k < 3; k++)
-        sumzdu_r[k] = 0.0;
-
-      for (int mb = 0; 2 * mb < j; mb++)
-        for (int ma = 0; ma <= j; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        } //end loop over ma mb
-
-      // for j even, handle middle column
-
-      if (j % 2 == 0) {
-        int mb = j / 2;
-        for (int ma = 0; ma < mb; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        }
-
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-        sumzdu_r[k] += (dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i) * 0.5;
-      } // end if jeven
-
-      for (int k = 0; k < 3; k++)
-        dbdr[ij + ijnum*k + ijnum*3*jjb] += 2.0 * sumzdu_r[k];
-
-      // sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
-
-      double j1fac = (j + 1) / (j1 + 1.0);
-      idouble = elem1*nelements+elem2;
-      itriple = (elem3*nelements+elem2)*nelements+elem1;
-      jjz = idxz_block[j1 + j2*jdim + j*jdim*jdim];
-      jju = idxu_block[j1];
-
-      for (int k = 0; k < 3; k++)
-        sumzdu_r[k] = 0.0;
-
-      for (int mb = 0; 2 * mb < j1; mb++)
-        for (int ma = 0; ma <= j1; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        } //end loop over ma mb
-
-      // for j1 even, handle middle column
-
-      if (j1 % 2 == 0) {
-        int mb = j1 / 2;
-        for (int ma = 0; ma < mb; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        }
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-        sumzdu_r[k] += (dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i) * 0.5;
-      } // end if j1even
-
-      for (int k = 0; k < 3; k++)
-        if (bnorm_flag)
-        dbdr[ij + ijnum*k + ijnum*3*jjb] += 2.0 * sumzdu_r[k];
-        else
-        dbdr[ij + ijnum*k + ijnum*3*jjb] += 2.0 * sumzdu_r[k] * j1fac;
-
-      // sum over Conj(dudr(j2,ma2,mb2))*z(j,j1,j2,ma2,mb2)
-
-      double j2fac = (j + 1) / (j2 + 1.0);
-      idouble = elem2*nelements+elem1;
-      itriple = (elem1*nelements+elem3)*nelements+elem2;
-      jjz = idxz_block[j2 + j1*jdim + j*jdim*jdim];
-      jju = idxu_block[j2];
-
-      for (int k = 0; k < 3; k++)
-        sumzdu_r[k] = 0.0;
-
-      for (int mb = 0; 2 * mb < j2; mb++)
-        for (int ma = 0; ma <= j2; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        } //end loop over ma mb
-
-      // for j2 even, handle middle column
-
-      if (j2 % 2 == 0) {
-        int mb = j2 / 2;
-        for (int ma = 0; ma < mb; ma++) {
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-          sumzdu_r[k] += dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i;
-        jjz++;
-        jju++;
-        }
-
-        int n1 = i + inum*jjz + nimax;
-        int n2 = ij+ ijnum*jju;
-        double z_r = zlist_r[n1];
-        double z_i = zlist_i[n1];
-        for (int k = 0; k < 3; k++)
-        sumzdu_r[k] += (dulist_r[n2 + nu_max*k] * z_r + dulist_i[n2 + nu_max*k] * z_i) * 0.5;
-      } // end if j2even
-
-      for (int k = 0; k < 3; k++)
-        if (bnorm_flag)
-        dbdr[ij + ijnum*k + ijnum*3*jjb] += 2.0 * sumzdu_r[k];
-        else
-        dbdr[ij + ijnum*k + ijnum*3*jjb] += 2.0 * sumzdu_r[k] * j2fac;
-      }
-  }
-}
-
-void snapTallyBispectrumDeriv(double *db, double *dbdr,
-    int *ai, int *aj, int *ti, int inum, int ijnum, int ncoeff, int ntype)
-{
-
-  for (int i=0; i<inum*3*ncoeff*ntype; i++)
-    db[i] = 0.0;
-
-  int N2 = ijnum*ncoeff;
-  for (int idx=0; idx<N2; idx++) {
-    int ij = idx%ijnum;
-    int icoeff = (idx-ij)/ijnum;
-    int i = ai[ij]; // index of atom i
-    int j = aj[ij]; // index of atom i
-    int itype = ti[ij]; // element type of atom i
-    int n = ncoeff*(itype-1);
-    int nii = inum*3*(icoeff + n);
-    int nij = ijnum*3*icoeff;
-
-    double bix = dbdr[ij + ijnum*0 + nij];
-    double biy = dbdr[ij + ijnum*1 + nij];
-    double biz = dbdr[ij + ijnum*2 + nij];
-    db[0 + 3*i + nii] += bix;
-    db[1 + 3*i + nii] += biy;
-    db[2 + 3*i + nii] += biz;
-    db[0 + 3*j + nii] -= bix;
-    db[1 + 3*j + nii] -= biy;
-    db[2 + 3*j + nii] -= biz;
-  }
-}
-
-void MLPOD::snapdesc(double *blist, double *bd, double *rij, double *tmpmem, int *atomtype, int *ai,
-    int *aj, int *ti, int *tj, int natom, int Nij)
-{
-  int dim = 3;
-  int idxu_max = sna.idxu_max;
-  int idxb_max = sna.idxb_max;
-  int idxz_max = sna.idxz_max;
-  int twojmax = sna.twojmax;
-  int ncoeff = sna.ncoeff;
-  int ntypes = sna.ntypes;
-  int nelements = sna.nelements;
-  int ndoubles = sna.ndoubles;
-  int bnormflag = sna.bnormflag;
-  int chemflag = sna.chemflag;
-  int switchflag = sna.switchflag;
-  int wselfallflag = sna.wselfallflag;
-  int nelem = (chemflag) ? nelements : 1;
-
-  int *map = sna.map;
-  int *idxz = sna.idxz;
-  int *idxz_block = sna.idxz_block;
-  int *idxb = sna.idxb;
-  int *idxu_block = sna.idxu_block;
-  int *idxcg_block = sna.idxcg_block;
-
-  double wself = sna.wself;
-  double rmin0 = sna.rmin0;
-  double rfac0 = sna.rfac0;
-  double rcutfac = sna.rcutfac;
-  double *rootpqarray = sna.rootpqarray;
-  double *cglist = sna.cglist;
-  double *radelem = sna.radelem;
-  double *wjelem = sna.wjelem;
-
-  int ne = 0;
-  double *Ur = &tmpmem[ne];
-  double *Zr = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *Ui = &tmpmem[ne];
-  double *Zi = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *dUr = &tmpmem[ne];
-  ne += idxu_max*dim*Nij;
-  double *dUi = &tmpmem[ne];
-  ne += idxu_max*dim*Nij;
-  double *dblist = &tmpmem[ne];
-  double *Utotr = &tmpmem[ne];
-  ne += idxu_max*nelements*natom;
-  double *Utoti = &tmpmem[ne];
-
-  snapComputeUlist(Ur, Ui, dUr, dUi, rootpqarray, rij, wjelem, radelem, rmin0,
-     rfac0, rcutfac, idxu_block, ti, tj, twojmax, idxu_max, Nij, switchflag);
-
-  snapZeroUarraytot2(Utotr, Utoti, wself, idxu_block, atomtype, map, ai, wselfallflag,
-      chemflag, idxu_max, nelem, twojmax, natom);
-
-  snapAddUarraytot(Utotr, Utoti, Ur, Ui, map, ai, tj, idxu_max, natom, Nij, chemflag);
-
-  snapComputeZi2(Zr, Zi, Utotr, Utoti, cglist, idxz, idxu_block,
-      idxcg_block, twojmax, idxu_max, idxz_max, nelem, bnormflag, natom);
-
-  snapComputeBi1(blist, Zr, Zi, Utotr, Utoti, idxb, idxu_block, idxz_block, twojmax, idxb_max,
-      idxu_max, idxz_max, nelem, natom);
-
-  snapComputeDbidrj(dblist, Zr, Zi, dUr, dUi, idxb, idxu_block, idxz_block, map, ai, tj,
-      twojmax, idxb_max, idxu_max, idxz_max, nelements, bnormflag, chemflag, natom, Nij);
-
-  snapTallyBispectrumDeriv(bd, dblist, ai, aj, ti, natom, Nij, ncoeff, ntypes);
-}
-
-void MLPOD::podNeighPairs(double *rij, double *x, int *idxi, int *ai, int *aj,  int *ti, int *tj,
-    int *pairnumsum, int *atomtype, int *jlist, int *alist, int inum)
-{
-  for (int ii=0; ii<inum; ii++) {  // for each atom i in the simulation box
-    int gi = ii;     // atom i
-    int itype = atomtype[gi];
-    int start = pairnumsum[ii];
-    int m = pairnumsum[ii+1] - start;
-    for (int l=0; l<m ; l++) {   // loop over each atom around atom i
-      int k = start + l;
-      int gj = jlist[k];  // atom j
-      idxi[k]    = ii;
-      ai[k]    = alist[gi];
-      aj[k]    = alist[gj];
-      ti[k]    = itype;
-      tj[k]    = atomtype[aj[k]];
-      rij[k*3+0]   = x[gj*3+0] -  x[gi*3+0];  // xj - xi
-      rij[k*3+1]   = x[gj*3+1] -  x[gi*3+1];  // xj - xi
-      rij[k*3+2]   = x[gj*3+2] -  x[gi*3+2];  // xj - xi
-    }
-  }
-};
-
-int MLPOD::lammpsNeighPairs(double *rij, double **x, double rcutsq, int *idxi, int *ai, int *aj,  int *ti, int *tj,
-    int *pairnumsum, int *atomtype, int *numneigh, int *ilist, int **jlist, int inum)
-{
-
-  int ninside = 0;
-  for (int ii=0; ii<inum; ii++) {  // for each atom i in the simulation box
-    int gi = ilist[ii];     // atom i
-    int itype = atomtype[gi];
-    int m = numneigh[gi];
-    pairnumsum[ii+1] = 0;
-    for (int l=0; l<m ; l++) {   // loop over each atom around atom i
-      int gj = jlist[gi][l];  // atom j
-      double delx   = x[gj][0] -  x[gi][0];  // xj - xi
-      double dely   = x[gj][1] -  x[gi][1];  // xj - xi
-      double delz   = x[gj][2] -  x[gi][2];  // xj - xi
-      double rsq = delx*delx + dely*dely + delz*delz;
-      if (rsq < rcutsq && rsq > 1e-20) {
-        rij[ninside*3 + 0] = delx;
-        rij[ninside*3 + 1] = dely;
-        rij[ninside*3 + 2] = delz;
-        idxi[ninside]    = ii;
-        ai[ninside]    = gi;
-        aj[ninside]    = gj;
-        ti[ninside]    = itype;
-        tj[ninside]    = atomtype[gj];
-        ninside++;
-        pairnumsum[ii+1] += 1;
-      }
-    }
-  }
-
-  pairnumsum[0] = 0;
-  for (int ii=0; ii<inum; ii++)
-    pairnumsum[ii+1] = pairnumsum[ii+1] + pairnumsum[ii];
-
-
-  return ninside;
-};
-
-void MLPOD::podradialbasis(double *rbf, double *xij, double *besselparams, double rin,
-    double rmax, int besseldegree, int inversedegree, int nbesselpars, int N)
-{
-  for (int n=0; n<N; n++) {
-    double xij1 = xij[0+3*n];
-    double xij2 = xij[1+3*n];
-    double xij3 = xij[2+3*n];
-
-    double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3);
-    double r = dij - rin;
-    double y = r/rmax;
-    double y2 = y*y;
-    double y3 = 1.0 - y2*y;
-    double y4 = y3*y3 + 1e-6;
-    double y5 = sqrt(y4);
-    double y6 = exp(-1.0/y5);
-    double fcut = y6/exp(-1.0);
-
-    for (int j=0; j<nbesselpars; j++) {
-      double x =  (1.0 - exp(-besselparams[j]*r/rmax))/(1.0-exp(-besselparams[j]));
-      for (int i=0; i<besseldegree; i++)
-        rbf[n + N*i + N*besseldegree*j] = ((sqrt(2.0/(rmax))/(i+1)))*fcut*sin((i+1)*MY_PI*x)/r;
-    }
-
-    for (int i=0; i<inversedegree; i++) {
-      int p = besseldegree*nbesselpars + i;
-      double a = powint(dij, i+1);
-      rbf[n + N*p] = fcut/a;
-    }
-  }
-}
-
-void MLPOD::podtally2b(double *eatom, double *eij, int *idxi, int *ti, int *tj, int *elemindex,
-    int nelements, int nbf, int natom, int N)
-{
-  int nelements2 = nelements*(nelements+1)/2;
-  for (int n=0; n<N; n++) {
-    int i1 = idxi[n];
-    int typei = ti[n]-1;
-    int typej = tj[n]-1;
-    for (int m=0; m<nbf; m++) {
-      int im =  i1 + natom*((elemindex[typei + typej*nelements] - 1) + nelements2*m);
-      int nm = n + N*m;
-      eatom[im] += eij[nm];
-    }
-  }
-}
-
-void MLPOD::pod1body(double *eatom, int *atomtype, int nelements, int natom)
-{
-  for (int m=1; m<=nelements; m++)
-    for (int i=0; i<natom; i++)
-      eatom[i + natom*(m-1)] = (atomtype[i] == m) ? 1.0 : 0.0;
-}
-
-void MLPOD::pod3body(double *eatom, double *yij, double *e2ij, double *tmpmem, int *elemindex,
-                     int *pairnumsum, int * /*idxi*/, int *ti, int *tj, int nrbf, int nabf,
-                     int nelements, int natom, int Nij)
-{
-  int dim = 3, nabf1 = nabf + 1;
-  int nelements2 = nelements*(nelements+1)/2;
-  int n, nijk, typei, typej, typek, ij, ik;
-
-  double xij1, xij2, xij3, xik1, xik2, xik3;
-  double xdot, rijsq, riksq, rij, rik;
-  double costhe, theta;
-  double uj, uk, rbf;
-
-  double *abf = &tmpmem[0];
-
-  double *etm = &tmpmem[nabf1];
-
-  for (int ii=0; ii<natom; ii++) {
-    int numneigh = pairnumsum[ii+1] - pairnumsum[ii];    // number of pairs (i,j) around i
-    int s = pairnumsum[ii];
-
-    for (int m=0; m<nrbf*nabf1*nelements2*nelements; m++)
-      etm[m] = 0.0;
-
-    for (int lj=0; lj<numneigh ; lj++) {   // loop over each atom j around atom i
-      ij = lj + s;
-      typei = ti[ij] - 1;
-      typej = tj[ij] - 1;
-      xij1 = yij[0+dim*ij];  // xj - xi
-      xij2 = yij[1+dim*ij];  // xj - xi
-      xij3 = yij[2+dim*ij];  // xj - xi
-      rijsq = xij1*xij1 + xij2*xij2 + xij3*xij3;
-      rij = sqrt(rijsq);
-      for (int lk=lj+1; lk<numneigh; lk++) { // loop over each atom k around atom i (k > j)
-        ik = lk + s;
-        typek = tj[ik] - 1;
-        xik1 = yij[0+dim*ik];  // xk - xi
-        xik2 = yij[1+dim*ik];  // xk - xi
-        xik3 = yij[2+dim*ik];  // xk - xi       s
-        riksq = xik1*xik1 + xik2*xik2 + xik3*xik3;
-        rik = sqrt(riksq);
-
-        xdot  = xij1*xik1 + xij2*xik2 + xij3*xik3;
-        costhe = xdot/(rij*rik);
-        costhe = costhe > 1.0 ? 1.0 : costhe;
-        costhe = costhe < -1.0 ? -1.0 : costhe;
-        theta = acos(costhe);
-
-        for (int p=0; p <nabf1; p++)
-          abf[p] = cos(p*theta);
-
-        for (int m=0; m<nrbf; m++) {
-          uj = e2ij[lj + s + Nij*m];
-          uk = e2ij[lk + s + Nij*m];
-          rbf = uj*uk;
-          for (int p=0; p <nabf1; p++) {
-            n = p + (nabf1)*m;
-            nijk = (elemindex[typej + typek*nelements] - 1) + nelements2*typei + nelements2*nelements*n;
-            etm[nijk] += rbf*abf[p];
-          }
-        }
-      }
-    }
-    for (int m=0; m<nrbf*nabf1*nelements2*nelements; m++)
-      eatom[ii + natom*m] += etm[m];
-  }
-}
-
-
-void MLPOD::poddesc_ij(double *eatom1, double *eatom2, double *eatom3, double *rij, double *Phi, double *besselparams,
-      double *tmpmem, double rin, double rcut, int *pairnumsum, int *atomtype, int *idxi, int *ti, int *tj,
-      int *elemindex, int *pdegree, int nbesselpars, int nrbf2, int nrbf3, int nabf, int nelements, int Nij, int natom)
-{
-  int nrbf = MAX(nrbf2, nrbf3);
-  int ns = pdegree[0]*nbesselpars + pdegree[1];
-
-  double *e2ij = &tmpmem[0]; // Nij*nrbf
-  double *e2ijt = &tmpmem[Nij*nrbf]; // Nij*ns
-
-  // orthogonal radial basis functions
-
-  podradialbasis(e2ijt, rij, besselparams, rin, rcut-rin, pdegree[0], pdegree[1], nbesselpars, Nij);
-  podMatMul(e2ij, e2ijt, Phi, Nij, ns, nrbf);
-
-  // one-body descriptors
-
-  pod1body(eatom1, atomtype, nelements, natom);
-
-  podtally2b(eatom2, e2ij, idxi, ti, tj, elemindex, nelements, nrbf2, natom, Nij);
-
-  // three-body descriptors
-
-  pod3body(eatom3, rij, e2ij, &tmpmem[Nij*nrbf], elemindex, pairnumsum,
-       idxi, ti, tj, nrbf3, nabf, nelements, natom, Nij);
-
-}
-
-void MLPOD::snapComputeUij(double *Sr, double *Si, double *rootpqarray, double *rij,
-    double *wjelem, double *radelem, double rmin0, double rfac0, double rcutfac, int *idxu_block,
-    int *ti, int *tj, int twojmax, int idxu_max, int ijnum, int switch_flag)
-{
-  for(int ij=0; ij<ijnum; ij++) {
-  double x = rij[ij*3+0];
-  double y = rij[ij*3+1];
-  double z = rij[ij*3+2];
-  double rsq = x * x + y * y + z * z;
-  double r = sqrt(rsq);
-
-  double rcutij = (radelem[ti[ij]]+radelem[tj[ij]])*rcutfac; //(radelem[type[ii]]+radelem[type[jj]])*rcutfac;
-  double rscale0 = rfac0 * MY_PI / (rcutij - rmin0);
-  double theta0 = (r - rmin0) * rscale0;
-  double z0 = r / tan(theta0);
-
-  double sfac = 0.0;
-  if (switch_flag == 0) {
-    sfac = 1.0;
-  }
-  else if (switch_flag == 1) {
-    if (r <= rmin0) {
-      sfac = 1.0;
-    }
-    else if(r > rcutij) {
-      sfac = 0.0;
-    }
-    else {
-      double rcutfac0 = MY_PI / (rcutij - rmin0);
-      sfac =  0.5 * (cos((r - rmin0) * rcutfac0) + 1.0);
-    }
-  }
-  sfac *= wjelem[tj[ij]];
-
-  double r0inv;
-  double a_r, a_i, b_r, b_i;
-  double rootpq;
-  int jdim = twojmax + 1;
-
-  r0inv = 1.0 / sqrt(r * r + z0 * z0);
-  a_r = r0inv * z0;
-  a_i = -r0inv * z;
-  b_r = r0inv * y;
-  b_i = -r0inv * x;
-
-  Sr[ij+0*ijnum] = 1.0;
-  Si[ij+0*ijnum] = 0.0;
-  for (int j = 1; j <= twojmax; j++) {
-    int jju = idxu_block[j];
-    int jjup = idxu_block[j-1];
-
-    // fill in left side of matrix layer from previous layer
-
-    for (int mb = 0; 2*mb <= j; mb++) {
-      Sr[ij+jju*ijnum] = 0.0;
-      Si[ij+jju*ijnum] = 0.0;
-      for (int ma = 0; ma < j; ma++) {
-        rootpq = rootpqarray[(j - ma)*jdim + (j - mb)];
-        int njju = ij+jju*ijnum;
-        int njju1 = ij+(jju+1)*ijnum;
-        int njjup = ij+jjup*ijnum;
-        double u_r = Sr[njjup];
-        double u_i = Si[njjup];
-
-        Sr[njju] += rootpq * (a_r * u_r + a_i * u_i);
-        Si[njju] += rootpq * (a_r * u_i - a_i * u_r);
-
-        rootpq = rootpqarray[(ma + 1)*jdim + (j - mb)];
-        Sr[njju1] = -rootpq * (b_r * u_r + b_i * u_i);
-        Si[njju1] = -rootpq * (b_r * u_i - b_i * u_r);
-        jju++;
-        jjup++;
-      }
-      jju++;
-    }
-
-    jju = idxu_block[j];
-    jjup = jju+(j+1)*(j+1)-1;
-    int mbpar = 1;
-    for (int mb = 0; 2*mb <= j; mb++) {
-      int mapar = mbpar;
-      for (int ma = 0; ma <= j; ma++) {
-        int njju = ij+jju*ijnum;
-        int njjup = ij+jjup*ijnum;
-        if (mapar == 1) {
-          Sr[njjup] = Sr[njju];
-          Si[njjup] = -Si[njju];
-        } else {
-          Sr[njjup] = -Sr[njju];
-          Si[njjup] =  Si[njju];
-        }
-        mapar = -mapar;
-        jju++;
-        jjup--;
-      }
-      mbpar = -mbpar;
-    }
-  }
-
-  for (int k=0; k<idxu_max; k++) {
-    int ijk = ij + ijnum*k;
-    Sr[ijk] = sfac*Sr[ijk];
-    Si[ijk] = sfac*Si[ijk];
-  }
-  }
-};
-
-void MLPOD::snapdesc_ij(double *blist, double *rij, double *tmpmem, int *atomtype, int *idxi,
-    int *ti, int *tj, int natom, int Nij)
-{
-  int idxu_max = sna.idxu_max;
-  int idxb_max = sna.idxb_max;
-  int idxz_max = sna.idxz_max;
-  int twojmax = sna.twojmax;
-  int nelements = sna.nelements;
-  int ndoubles = sna.ndoubles;
-  int bnormflag = sna.bnormflag;
-  int chemflag = sna.chemflag;
-  int switchflag = sna.switchflag;
-  int wselfallflag = sna.wselfallflag;
-  int nelem = (chemflag) ? nelements : 1;
-
-  int *map = sna.map;
-  int *idxz = sna.idxz;
-  int *idxz_block = sna.idxz_block;
-  int *idxb = sna.idxb;
-  int *idxu_block = sna.idxu_block;
-  int *idxcg_block = sna.idxcg_block;
-
-  double wself = sna.wself;
-  double rmin0 = sna.rmin0;
-  double rfac0 = sna.rfac0;
-  double rcutfac = sna.rcutfac;
-  double *rootpqarray = sna.rootpqarray;
-  double *cglist = sna.cglist;
-  double *radelem = sna.radelem;
-  double *wjelem = sna.wjelem;
-
-  int ne = 0;
-  double *Ur = &tmpmem[ne];
-  double *Zr = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *Ui = &tmpmem[ne];
-  double *Zi = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *Utotr = &tmpmem[ne];
-  ne += idxu_max*nelements*natom;
-  double *Utoti = &tmpmem[ne];
-
-  snapComputeUij(Ur, Ui, rootpqarray, rij, wjelem, radelem, rmin0,
-     rfac0, rcutfac, idxu_block, ti, tj, twojmax, idxu_max, Nij, switchflag);
-
-  snapZeroUarraytot2(Utotr, Utoti, wself, idxu_block, atomtype, map, idxi, wselfallflag,
-      chemflag, idxu_max, nelem, twojmax, natom);
-
-  snapAddUarraytot(Utotr, Utoti, Ur, Ui, map, idxi, tj, idxu_max, natom, Nij, chemflag);
-
-  snapComputeZi2(Zr, Zi, Utotr, Utoti, cglist, idxz, idxu_block,
-      idxcg_block, twojmax, idxu_max, idxz_max, nelem, bnormflag, natom);
-
-  snapComputeBi1(blist, Zr, Zi, Utotr, Utoti, idxb, idxu_block, idxz_block, twojmax, idxb_max,
-      idxu_max, idxz_max, nelem, natom);
-}
-
-void MLPOD::linear_descriptors_ij(double *gd, double *eatom, double *rij, double *tmpmem, int *pairnumsum,
-    int *atomtype, int *idxi, int *ti, int *tj, int natom, int Nij)
-{
-  int nelements = pod.nelements;
-  int nbesselpars = pod.nbesselpars;
-  int nrbf2 = pod.nbf2;
-  int nabf3 = pod.nabf3;
-  int nrbf3 = pod.nrbf3;
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int nd4 = pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int *pdegree2 = pod.twobody;
-  int *elemindex = pod.elemindex;
-  double rin = pod.rin;
-  double rcut = pod.rcut;
-  double *Phi2 = pod.Phi2;
-  double *besselparams = pod.besselparams;
-
-  double *eatom1 = &eatom[0];
-  double *eatom2 = &eatom[0+natom*nd1];
-  double *eatom3 = &eatom[0+natom*(nd1+nd2)];
-  double *eatom4 = &eatom[0+natom*(nd1+nd2+nd3)];
-
-  podArraySetValue(eatom1, 0.0, natom*nd1234);
-
-  // peratom descriptors for one-body, two-body, and three-body linear potentials
-
-  poddesc_ij(eatom1, eatom2, eatom3, rij, Phi2, besselparams,
-      tmpmem, rin, rcut, pairnumsum, atomtype, idxi, ti, tj, elemindex, pdegree2,
-      nbesselpars, nrbf2, nrbf3, nabf3, nelements, Nij, natom);
-
-  // peratom snap descriptors
-
-  if (pod.snaptwojmax > 0)
-    snapdesc_ij(eatom4, rij, tmpmem, atomtype, idxi, ti, tj, natom, Nij);
-
-  // global descriptors for one-body, two-body, three-body, and four-bodt linear potentials
-
-  podArraySetValue(tmpmem, 1.0, natom);
-
-  char cht = 'T';
-  double one = 1.0;
-  int inc1 = 1;
-  DGEMV(&cht, &natom, &nd1234, &one, eatom1, &natom, tmpmem, &inc1, &one, gd, &inc1);
-}
-
-double MLPOD::calculate_energy(double *effectivecoeff, double *gd, double *coeff)
-{
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int nd4 = pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int nd22 = pod.nd22;
-  int nd23 = pod.nd23;
-  int nd24 = pod.nd24;
-  int nd33 = pod.nd33;
-  int nd34 = pod.nd34;
-  int nd44 = pod.nd44;
-  int nd234 = pod.nd234;
-  int nd333 = pod.nd333;
-  int nd444 = pod.nd444;
-  int nc2 = pod.nc2;
-  int nc3 = pod.nc3;
-  int nc4 = pod.nc4;
-
-  // two-body, three-body, and four-body descriptors
-
-  double *d2 = &gd[nd1];
-  double *d3 = &gd[nd1+nd2];
-  double *d4 = &gd[nd1+nd2+nd3];
-
-  // quadratic and cubic POD coefficients
-
-  double *coeff22 = &coeff[nd1234];
-  double *coeff23 = &coeff[nd1234+nd22];
-  double *coeff24 = &coeff[nd1234+nd22+nd23];
-  double *coeff33 = &coeff[nd1234+nd22+nd23+nd24];
-  double *coeff34 = &coeff[nd1234+nd22+nd23+nd24+nd33];
-  double *coeff44 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34];
-  double *coeff234 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44];
-  double *coeff333 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234];
-  double *coeff444 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333];
-
-  // calculate energy for linear potentials
-
-  double energy = 0.0;
-  for (int i=0; i< nd1234; i++) {
-    effectivecoeff[i] = 0.0;
-    energy += coeff[i]*gd[i];
-  }
-
-  // effective POD coefficients for calculating force
-
-  double *c2 = &effectivecoeff[nd1];
-  double *c3 = &effectivecoeff[nd1+nd2];
-  double *c4 = &effectivecoeff[nd1+nd2+nd3];
-
-  // calculate energy for quadratic22 potential
-
-  if (nd22 > 0) energy += quadratic_coefficients(c2, d2, coeff22, pod.quadratic22, nc2);
-
-  // calculate energy for quadratic23 potential
-
-  if (nd23 > 0) energy += quadratic_coefficients(c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3);
-
-  // calculate energy for quadratic24 potential
-
-  if (nd24 > 0) energy += quadratic_coefficients(c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4);
-
-  // calculate energy for quadratic33 potential
-
-  if (nd33 > 0) energy += quadratic_coefficients(c3, d3, coeff33, pod.quadratic33, nc3);
-
-  // calculate energy for quadratic34 potential
-
-  if (nd34 > 0) energy += quadratic_coefficients(c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4);
-
-  // calculate energy for quadratic44 potential
-
-  if (nd44 > 0) energy += quadratic_coefficients(c4, d4, coeff44, pod.quadratic44, nc4);
-
-  // calculate energy for cubic234 potential
-
-  if (nd234 > 0) energy += cubic_coefficients(c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4);
-
-  // calculate energy for cubic333 potential
-
-  if (nd333 > 0) energy += cubic_coefficients(c3, d3, coeff333, pod.cubic333, nc3);
-
-  // calculate energy for cubic444 potential
-
-  if (nd444 > 0) energy += cubic_coefficients(c4, d4, coeff444, pod.cubic444, nc4);
-
-  // calculate effective POD coefficients
-
-  for (int i=0; i< nd1234; i++) effectivecoeff[i] += coeff[i];
-
-  return energy;
-}
-
-double MLPOD::calculate_energy(double *energycoeff, double *forcecoeff, double *gd,
-        double *gdall, double *coeff)
-{
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int nd4 = pod.nd4;
-  int nd1234 = nd1+nd2+nd3+nd4;
-  int nd22 = pod.nd22;
-  int nd23 = pod.nd23;
-  int nd24 = pod.nd24;
-  int nd33 = pod.nd33;
-  int nd34 = pod.nd34;
-  int nd44 = pod.nd44;
-  int nd234 = pod.nd234;
-  int nd333 = pod.nd333;
-  int nd444 = pod.nd444;
-  int nc2 = pod.nc2;
-  int nc3 = pod.nc3;
-  int nc4 = pod.nc4;
-
-  // quadratic and cubic POD coefficients
-
-  double *coeff22 = &coeff[nd1234];
-  double *coeff23 = &coeff[nd1234+nd22];
-  double *coeff24 = &coeff[nd1234+nd22+nd23];
-  double *coeff33 = &coeff[nd1234+nd22+nd23+nd24];
-  double *coeff34 = &coeff[nd1234+nd22+nd23+nd24+nd33];
-  double *coeff44 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34];
-  double *coeff234 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44];
-  double *coeff333 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234];
-  double *coeff444 = &coeff[nd1234+nd22+nd23+nd24+nd33+nd34+nd44+nd234+nd333];
-
-  // sum global descriptors over all MPI ranks
-
-  MPI_Allreduce(gd, gdall, nd1234, MPI_DOUBLE, MPI_SUM, world);
-
-  for (int i=0; i< nd1234; i++) {
-    energycoeff[i] = 0.0;
-    forcecoeff[i] = 0.0;
-  }
-
-  // effective POD coefficients for calculating force
-
-  double *c2 = &forcecoeff[nd1];
-  double *c3 = &forcecoeff[nd1+nd2];
-  double *c4 = &forcecoeff[nd1+nd2+nd3];
-
-  // effective POD coefficients for calculating energy
-
-  double *ce2 = &energycoeff[nd1];
-  double *ce3 = &energycoeff[nd1+nd2];
-  double *ce4 = &energycoeff[nd1+nd2+nd3];
-
-  // two-body, three-body, and four-body descriptors
-
-  double *d2 = &gdall[nd1];
-  double *d3 = &gdall[nd1+nd2];
-  double *d4 = &gdall[nd1+nd2+nd3];
-
-  // calculate energy for quadratic22 potential
-
-  if (nd22 > 0) quadratic_coefficients(ce2, c2, d2, coeff22, pod.quadratic22, nc2);
-
-  // calculate energy for quadratic23 potential
-
-  if (nd23 > 0) quadratic_coefficients(ce2, ce3, c2, c3, d2, d3, coeff23, pod.quadratic23, nc2, nc3);
-
-  // calculate energy for quadratic24 potential
-
-  if (nd24 > 0) quadratic_coefficients(ce2, ce4, c2, c4, d2, d4, coeff24, pod.quadratic24, nc2, nc4);
-
-  // calculate energy for quadratic33 potential
-
-  if (nd33 > 0) quadratic_coefficients(ce3, c3, d3, coeff33, pod.quadratic33, nc3);
-
-  // calculate energy for quadratic34 potential
-
-  if (nd34 > 0) quadratic_coefficients(ce3, ce4, c3, c4, d3, d4, coeff34, pod.quadratic34, nc3, nc4);
-
-  // calculate energy for quadratic44 potential
-
-  if (nd44 > 0) quadratic_coefficients(ce4, c4, d4, coeff44, pod.quadratic44, nc4);
-
-  // calculate energy for cubic234 potential
-
-  if (nd234 > 0) cubic_coefficients(ce2, ce3, ce4, c2, c3, c4, d2, d3, d4, coeff234, pod.cubic234, nc2, nc3, nc4);
-
-  // calculate energy for cubic333 potential
-
-  if (nd333 > 0) cubic_coefficients(ce3, c3, d3, coeff333, pod.cubic333, nc3);
-
-  // calculate energy for cubic444 potential
-
-  if (nd444 > 0) cubic_coefficients(ce4, c4, d4, coeff444, pod.cubic444, nc4);
-
-  // calculate effective POD coefficients
-
-  for (int i=0; i< nd1234; i++) {
-    energycoeff[i] += coeff[i];
-    forcecoeff[i] += coeff[i];
-  }
-
-  // calculate energy
-
-  double energy = 0.0;
-  for (int i=0; i< nd1234; i++)
-    energy += energycoeff[i]*gd[i];
-
-  return energy;
-}
-
-void MLPOD::pod2body_force(double *force, double *fij, double *coeff2, int *ai, int *aj, int *ti,
-                           int *tj, int *elemindex, int nelements, int nbf, int /*natom*/, int N)
-{
-  int nelements2 = nelements*(nelements+1)/2;
-  for (int n=0; n<N; n++) {
-    int i1 = ai[n];
-    int j1 = aj[n];
-    int typei = ti[n]-1;
-    int typej = tj[n]-1;
-    for (int m=0; m<nbf; m++) {
-      int im =  3*i1;
-      int jm =  3*j1;
-      int nm = n + N*m;
-      int km = (elemindex[typei + typej*nelements] - 1) + nelements2*m;
-      double ce = coeff2[km];
-      force[0 + im] += fij[0 + 3*nm]*ce;
-      force[1 + im] += fij[1 + 3*nm]*ce;
-      force[2 + im] += fij[2 + 3*nm]*ce;
-      force[0 + jm] -= fij[0 + 3*nm]*ce;
-      force[1 + jm] -= fij[1 + 3*nm]*ce;
-      force[2 + jm] -= fij[2 + 3*nm]*ce;
-    }
-  }
-}
-
-void MLPOD::pod3body_force(double *force, double *yij, double *e2ij, double *f2ij, double *coeff3, double *tmpmem,
-       int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, int *tj, int nrbf, int nabf,
-       int nelements, int natom, int Nij)
-{
-  int dim = 3, nabf1 = nabf + 1;
-  int nelements2 = nelements*(nelements+1)/2;
-  int n, c, nijk3, typei, typej, typek, ij, ik, i, j, k;
-
-  double xij1, xij2, xij3, xik1, xik2, xik3;
-  double xdot, rijsq, riksq, rij, rik;
-  double costhe, sinthe, theta, dtheta;
-  double tm, tm1, tm2, dct1, dct2, dct3, dct4, dct5, dct6;
-
-  double *abf = &tmpmem[0];
-  double *dabf1 = &tmpmem[nabf1];
-  double *dabf2 = &tmpmem[2*nabf1];
-  double *dabf3 = &tmpmem[3*nabf1];
-  double *dabf4 = &tmpmem[4*nabf1];
-  double *dabf5 = &tmpmem[5*nabf1];
-  double *dabf6 = &tmpmem[6*nabf1];
-
-  for (int ii=0; ii<natom; ii++) {
-    int numneigh = pairnumsum[ii+1] - pairnumsum[ii];    // number of pairs (i,j) around i
-    int s = pairnumsum[ii];
-    for (int lj=0; lj<numneigh ; lj++) {   // loop over each atom j around atom i
-      ij = lj + s;
-      i = ai[ij];  // atom i
-      j = aj[ij];  // atom j
-      typei = ti[ij] - 1;
-      typej = tj[ij] - 1;
-      xij1 = yij[0+dim*ij];  // xj - xi
-      xij2 = yij[1+dim*ij];  // xj - xi
-      xij3 = yij[2+dim*ij];  // xj - xi
-      rijsq = xij1*xij1 + xij2*xij2 + xij3*xij3;
-      rij = sqrt(rijsq);
-
-      double fixtmp,fiytmp,fiztmp;
-      fixtmp = fiytmp = fiztmp = 0.0;
-      double fjxtmp,fjytmp,fjztmp;
-      fjxtmp = fjytmp = fjztmp = 0.0;
-      for (int lk=lj+1; lk<numneigh; lk++) { // loop over each atom k around atom i (k > j)
-        ik = lk + s;
-        k = aj[ik];  // atom k
-        typek = tj[ik] - 1;
-        xik1 = yij[0+dim*ik];  // xk - xi
-        xik2 = yij[1+dim*ik];  // xk - xi
-        xik3 = yij[2+dim*ik];  // xk - xi       s
-        riksq = xik1*xik1 + xik2*xik2 + xik3*xik3;
-        rik = sqrt(riksq);
-
-        xdot  = xij1*xik1 + xij2*xik2 + xij3*xik3;
-        costhe = xdot/(rij*rik);
-        costhe = costhe > 1.0 ? 1.0 : costhe;
-        costhe = costhe < -1.0 ? -1.0 : costhe;
-        xdot = costhe*(rij*rik);
-
-        sinthe = sqrt(1.0 - costhe*costhe);
-        sinthe = sinthe > 1e-12 ? sinthe : 1e-12;
-        theta = acos(costhe);
-        dtheta = -1.0/sinthe;
-
-        tm1 = 1.0/(rij*rijsq*rik);
-        tm2 = 1.0/(rij*riksq*rik);
-        dct1 = (xik1*rijsq - xij1*xdot)*tm1;
-        dct2 = (xik2*rijsq - xij2*xdot)*tm1;
-        dct3 = (xik3*rijsq - xij3*xdot)*tm1;
-        dct4 = (xij1*riksq - xik1*xdot)*tm2;
-        dct5 = (xij2*riksq - xik2*xdot)*tm2;
-        dct6 = (xij3*riksq - xik3*xdot)*tm2;
-
-        for (int p=0; p <nabf1; p++) {
-          abf[p] = cos(p*theta);
-          tm = -p*sin(p*theta)*dtheta;
-          dabf1[p] = tm*dct1;
-          dabf2[p] = tm*dct2;
-          dabf3[p] = tm*dct3;
-          dabf4[p] = tm*dct4;
-          dabf5[p] = tm*dct5;
-          dabf6[p] = tm*dct6;
-        }
-
-        double fjx = 0.0, fjy = 0.0, fjz = 0.0;
-        double fkx = 0.0, fky = 0.0, fkz = 0.0;
-
-        for (int m=0; m<nrbf; m++) {
-          double uj = e2ij[lj + s + Nij*m];
-          double uk = e2ij[lk + s + Nij*m];
-          double rbf = uj*uk;
-          double drbf1 = f2ij[0 + dim*(lj + s) + dim*Nij*m]*uk;
-          double drbf2 = f2ij[1 + dim*(lj + s) + dim*Nij*m]*uk;
-          double drbf3 = f2ij[2 + dim*(lj + s) + dim*Nij*m]*uk;
-          double drbf4 = f2ij[0 + dim*(lk + s) + dim*Nij*m]*uj;
-          double drbf5 = f2ij[1 + dim*(lk + s) + dim*Nij*m]*uj;
-          double drbf6 = f2ij[2 + dim*(lk + s) + dim*Nij*m]*uj;
-
-          for (int p=0; p <nabf1; p++) {
-            tm = abf[p];
-            double fj1 = drbf1*tm + rbf*dabf1[p];
-            double fj2 = drbf2*tm + rbf*dabf2[p];
-            double fj3 = drbf3*tm + rbf*dabf3[p];
-            double fk1 = drbf4*tm + rbf*dabf4[p];
-            double fk2 = drbf5*tm + rbf*dabf5[p];
-            double fk3 = drbf6*tm + rbf*dabf6[p];
-
-            n = p + (nabf1)*m;
-            c = (elemindex[typej + typek*nelements] - 1) + nelements2*typei + nelements2*nelements*n;
-            tm = coeff3[c];
-
-            fjx += fj1*tm;
-            fjy += fj2*tm;
-            fjz += fj3*tm;
-            fkx += fk1*tm;
-            fky += fk2*tm;
-            fkz += fk3*tm;
-
-          }
-        }
-        nijk3 = 3*k;
-        force[0 + nijk3] -= fkx;
-        force[1 + nijk3] -= fky;
-        force[2 + nijk3] -= fkz;
-        fjxtmp += fjx;
-        fjytmp += fjy;
-        fjztmp += fjz;
-        fixtmp += fjx+fkx;
-        fiytmp += fjy+fky;
-        fiztmp += fjz+fkz;
-      }
-      nijk3 = 3*j;
-      force[0 + nijk3] -= fjxtmp;
-      force[1 + nijk3] -= fjytmp;
-      force[2 + nijk3] -= fjztmp;
-      nijk3 = 3*i;
-      force[0 + nijk3] += fixtmp;
-      force[1 + nijk3] += fiytmp;
-      force[2 + nijk3] += fiztmp;
-    }
-  }
-}
-
-void MLPOD::snapTallyForce(double *force, double *dbdr, double *coeff4,
-                           int *ai, int *aj, int *ti, int ijnum, int ncoeff, int /*ntype*/)
-{
-  int N2 = ijnum*ncoeff;
-  for (int idx=0; idx<N2; idx++) {
-    int ij = idx%ijnum;
-    int icoeff = (idx-ij)/ijnum;
-    int i = ai[ij]; // index of atom i
-    int j = aj[ij]; // index of atom i
-    int itype = ti[ij]; // element type of atom i
-    int n = ncoeff*(itype-1);
-    int nij = ijnum*3*icoeff;
-
-    double bix = dbdr[ij + ijnum*0 + nij];
-    double biy = dbdr[ij + ijnum*1 + nij];
-    double biz = dbdr[ij + ijnum*2 + nij];
-    double ce = coeff4[icoeff + n];
-
-    force[0 + 3*i] += bix*ce;
-    force[1 + 3*i] += biy*ce;
-    force[2 + 3*i] += biz*ce;
-    force[0 + 3*j] -= bix*ce;
-    force[1 + 3*j] -= biy*ce;
-    force[2 + 3*j] -= biz*ce;
-  }
-}
-
-void MLPOD::pod4body_force(double *force, double *rij, double *coeff4, double *tmpmem, int *atomtype,
-    int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij)
-{
-  int dim = 3;
-  int idxu_max = sna.idxu_max;
-  int idxb_max = sna.idxb_max;
-  int idxz_max = sna.idxz_max;
-  int twojmax = sna.twojmax;
-  int ncoeff = sna.ncoeff;
-  int ntypes = sna.ntypes;
-  int nelements = sna.nelements;
-  int ndoubles = sna.ndoubles;
-  int bnormflag = sna.bnormflag;
-  int chemflag = sna.chemflag;
-  int switchflag = sna.switchflag;
-  int wselfallflag = sna.wselfallflag;
-  int nelem = (chemflag) ? nelements : 1;
-
-  int *map = sna.map;
-  int *idxz = sna.idxz;
-  int *idxz_block = sna.idxz_block;
-  int *idxb = sna.idxb;
-  int *idxu_block = sna.idxu_block;
-  int *idxcg_block = sna.idxcg_block;
-
-  double wself = sna.wself;
-  double rmin0 = sna.rmin0;
-  double rfac0 = sna.rfac0;
-  double rcutfac = sna.rcutfac;
-  double *rootpqarray = sna.rootpqarray;
-  double *cglist = sna.cglist;
-  double *radelem = sna.radelem;
-  double *wjelem = sna.wjelem;
-
-  int ne = 0;
-  double *Ur = &tmpmem[ne];
-  double *Zr = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *Ui = &tmpmem[ne];
-  double *Zi = &tmpmem[ne];
-  ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-  double *dUr = &tmpmem[ne];
-  ne += idxu_max*dim*Nij;
-  double *dUi = &tmpmem[ne];
-  ne += idxu_max*dim*Nij;
-  double *dblist = &tmpmem[ne];
-  double *Utotr = &tmpmem[ne];
-  ne += idxu_max*nelements*natom;
-  double *Utoti = &tmpmem[ne];
-
-  snapComputeUlist(Ur, Ui, dUr, dUi, rootpqarray, rij, wjelem, radelem, rmin0,
-     rfac0, rcutfac, idxu_block, ti, tj, twojmax, idxu_max, Nij, switchflag);
-
-  snapZeroUarraytot2(Utotr, Utoti, wself, idxu_block, atomtype, map, idxi, wselfallflag,
-      chemflag, idxu_max, nelem, twojmax, natom);
-
-  snapAddUarraytot(Utotr, Utoti, Ur, Ui, map, idxi, tj, idxu_max, natom, Nij, chemflag);
-
-  snapComputeZi2(Zr, Zi, Utotr, Utoti, cglist, idxz, idxu_block,
-      idxcg_block, twojmax, idxu_max, idxz_max, nelem, bnormflag, natom);
-
-  snapComputeDbidrj(dblist, Zr, Zi, dUr, dUi, idxb, idxu_block, idxz_block, map, idxi, tj,
-      twojmax, idxb_max, idxu_max, idxz_max, nelements, bnormflag, chemflag, natom, Nij);
-
-  snapTallyForce(force, dblist, coeff4, ai, aj, ti, Nij, ncoeff, ntypes);
-}
-
-void MLPOD::calculate_force(double *force, double *effectivecoeff, double *rij, double *tmpmem, int *pairnumsum,
-    int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij)
-{
-  int nelements = pod.nelements;
-  int nbesselpars = pod.nbesselpars;
-  int nrbf2 = pod.nbf2;
-  int nabf3 = pod.nabf3;
-  int nrbf3 = pod.nrbf3;
-  int nd1 = pod.nd1;
-  int nd2 = pod.nd2;
-  int nd3 = pod.nd3;
-  int *pdegree = pod.twobody;
-  int *elemindex = pod.elemindex;
-  double rin = pod.rin;
-  double rcut = pod.rcut;
-  double *Phi = pod.Phi2;
-  double *besselparams = pod.besselparams;
-
-  // effective POD coefficients for calculating force
-
-  double *coeff2 = &effectivecoeff[nd1];
-  double *coeff3 = &effectivecoeff[nd1+nd2];
-  double *coeff4 = &effectivecoeff[nd1+nd2+nd3];
-
-  int nrbf = MAX(nrbf2, nrbf3);
-  int ns = pdegree[0]*nbesselpars + pdegree[1];
-  double *e2ij = &tmpmem[0]; // Nij*nrbf
-  double *f2ij = &tmpmem[Nij*nrbf]; // dim*Nij*nrbf
-  double *e2ijt = &tmpmem[4*Nij*nrbf]; // Nij*ns
-  double *f2ijt = &tmpmem[4*Nij*nrbf+Nij*ns]; // dim*Nij*ns
-
-  // orthogonal radial basis functions
-
-  podradialbasis(e2ijt, f2ijt, rij, besselparams, rin, rcut-rin, pdegree[0], pdegree[1], nbesselpars, Nij);
-  podMatMul(e2ij, e2ijt, Phi, Nij, ns, nrbf);
-  podMatMul(f2ij, f2ijt, Phi, 3*Nij, ns, nrbf);
-
-  pod2body_force(force, f2ij, coeff2, ai, aj, ti, tj, elemindex, nelements, nrbf2, natom, Nij);
-
-  pod3body_force(force, rij, e2ij, f2ij, coeff3, &tmpmem[4*Nij*nrbf], elemindex, pairnumsum, ai, aj,
-      ti, tj, nrbf3, nabf3, nelements, natom, Nij);
-
-  if (pod.snaptwojmax > 0)
-    pod4body_force(force, rij, coeff4, tmpmem, atomtype, idxi, ai, aj, ti, tj, natom, Nij);
-}
-
-double MLPOD::energyforce_calculation(double *force, double *podcoeff, double *effectivecoeff, double *gd, double *rij,
-    double *tmpmem, int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij)
-{
-  int nd1234 = pod.nd1+pod.nd2+pod.nd3+pod.nd4;
-  double *eatom = &tmpmem[0];
-
-  podArraySetValue(gd, 0.0, nd1234);
-  linear_descriptors_ij(gd, eatom, rij, &tmpmem[natom*nd1234], pairnumsum, atomtype, idxi, ti, tj, natom, Nij);
-
-  // Need to do MPI_Allreduce on gd for parallel
-
-  double energy = calculate_energy(effectivecoeff, gd, podcoeff);
-
-  podArraySetValue(force, 0.0, 3*natom);
-
-  calculate_force(force, effectivecoeff, rij, tmpmem, pairnumsum, atomtype, idxi, ai, aj, ti, tj, natom, Nij);
-
-  return energy;
-}
-
-
-void MLPOD::pod2body_force(double **force, double *fij, double *coeff2, int *ai, int *aj, int *ti,
-                           int *tj, int *elemindex, int nelements, int nbf, int /*natom*/, int N)
-{
-    int nelements2 = nelements*(nelements+1)/2;
-    for (int n=0; n<N; n++) {
-        int i1 = ai[n];
-        int j1 = aj[n];
-        int typei = ti[n]-1;
-        int typej = tj[n]-1;
-        for (int m=0; m<nbf; m++) {
-            int nm = n + N*m;
-            int km = (elemindex[typei + typej*nelements] - 1) + nelements2*m;
-            double ce = coeff2[km];
-            force[i1][0] += fij[0 + 3*nm]*ce;
-            force[i1][1] += fij[1 + 3*nm]*ce;
-            force[i1][2] += fij[2 + 3*nm]*ce;
-            force[j1][0] -= fij[0 + 3*nm]*ce;
-            force[j1][1] -= fij[1 + 3*nm]*ce;
-            force[j1][2] -= fij[2 + 3*nm]*ce;
-        }
-    }
-}
-
-void MLPOD::pod3body_force(double **force, double *yij, double *e2ij, double *f2ij, double *coeff3, double *tmpmem,
-             int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, int *tj, int nrbf, int nabf,
-             int nelements, int natom, int Nij)
-{
-    int dim = 3, nabf1 = nabf + 1;
-    int nelements2 = nelements*(nelements+1)/2;
-    int n, c, nijk3, typei, typej, typek, ij, ik, i, j, k;
-
-    double xij1, xij2, xij3, xik1, xik2, xik3;
-    double xdot, rijsq, riksq, rij, rik;
-    double costhe, sinthe, theta, dtheta;
-    double tm, tm1, tm2, dct1, dct2, dct3, dct4, dct5, dct6;
-
-    double *abf = &tmpmem[0];
-    double *dabf1 = &tmpmem[nabf1];
-    double *dabf2 = &tmpmem[2*nabf1];
-    double *dabf3 = &tmpmem[3*nabf1];
-    double *dabf4 = &tmpmem[4*nabf1];
-    double *dabf5 = &tmpmem[5*nabf1];
-    double *dabf6 = &tmpmem[6*nabf1];
-
-    for (int ii=0; ii<natom; ii++) {
-        int numneigh = pairnumsum[ii+1] - pairnumsum[ii];      // number of pairs (i,j) around i
-        int s = pairnumsum[ii];
-        for (int lj=0; lj<numneigh ; lj++) {   // loop over each atom j around atom i
-            ij = lj + s;
-            i = ai[ij];  // atom i
-            j = aj[ij];  // atom j
-            typei = ti[ij] - 1;
-            typej = tj[ij] - 1;
-            xij1 = yij[0+dim*ij];  // xj - xi
-            xij2 = yij[1+dim*ij];  // xj - xi
-            xij3 = yij[2+dim*ij];  // xj - xi
-            rijsq = xij1*xij1 + xij2*xij2 + xij3*xij3;
-            rij = sqrt(rijsq);
-
-            double fixtmp,fiytmp,fiztmp;
-            fixtmp = fiytmp = fiztmp = 0.0;
-            double fjxtmp,fjytmp,fjztmp;
-            fjxtmp = fjytmp = fjztmp = 0.0;
-            for (int lk=lj+1; lk<numneigh; lk++) { // loop over each atom k around atom i (k > j)
-                ik = lk + s;
-                k = aj[ik];  // atom k
-                typek = tj[ik] - 1;
-                xik1 = yij[0+dim*ik];  // xk - xi
-                xik2 = yij[1+dim*ik];  // xk - xi
-                xik3 = yij[2+dim*ik];  // xk - xi           s
-                riksq = xik1*xik1 + xik2*xik2 + xik3*xik3;
-                rik = sqrt(riksq);
-
-                xdot  = xij1*xik1 + xij2*xik2 + xij3*xik3;
-                costhe = xdot/(rij*rik);
-                costhe = costhe > 1.0 ? 1.0 : costhe;
-                costhe = costhe < -1.0 ? -1.0 : costhe;
-                xdot = costhe*(rij*rik);
-
-                sinthe = pow(1.0 - costhe*costhe, 0.5);
-                sinthe = sinthe > 1e-12 ? sinthe : 1e-12;
-                theta = acos(costhe);
-                dtheta = -1.0/sinthe;
-
-                tm1 = 1.0/(rij*rijsq*rik);
-                tm2 = 1.0/(rij*riksq*rik);
-                dct1 = (xik1*rijsq - xij1*xdot)*tm1;
-                dct2 = (xik2*rijsq - xij2*xdot)*tm1;
-                dct3 = (xik3*rijsq - xij3*xdot)*tm1;
-                dct4 = (xij1*riksq - xik1*xdot)*tm2;
-                dct5 = (xij2*riksq - xik2*xdot)*tm2;
-                dct6 = (xij3*riksq - xik3*xdot)*tm2;
-
-                for (int p=0; p <nabf1; p++) {
-                    abf[p] = cos(p*theta);
-                    tm = -p*sin(p*theta)*dtheta;
-                    dabf1[p] = tm*dct1;
-                    dabf2[p] = tm*dct2;
-                    dabf3[p] = tm*dct3;
-                    dabf4[p] = tm*dct4;
-                    dabf5[p] = tm*dct5;
-                    dabf6[p] = tm*dct6;
-                }
-
-                double fjx = 0.0, fjy = 0.0, fjz = 0.0;
-                double fkx = 0.0, fky = 0.0, fkz = 0.0;
-
-                for (int m=0; m<nrbf; m++) {
-                    double uj = e2ij[lj + s + Nij*m];
-                    double uk = e2ij[lk + s + Nij*m];
-                    double rbf = uj*uk;
-                    double drbf1 = f2ij[0 + dim*(lj + s) + dim*Nij*m]*uk;
-                    double drbf2 = f2ij[1 + dim*(lj + s) + dim*Nij*m]*uk;
-                    double drbf3 = f2ij[2 + dim*(lj + s) + dim*Nij*m]*uk;
-                    double drbf4 = f2ij[0 + dim*(lk + s) + dim*Nij*m]*uj;
-                    double drbf5 = f2ij[1 + dim*(lk + s) + dim*Nij*m]*uj;
-                    double drbf6 = f2ij[2 + dim*(lk + s) + dim*Nij*m]*uj;
-
-                    for (int p=0; p <nabf1; p++) {
-                        tm = abf[p];
-                        double fj1 = drbf1*tm + rbf*dabf1[p];
-                        double fj2 = drbf2*tm + rbf*dabf2[p];
-                        double fj3 = drbf3*tm + rbf*dabf3[p];
-                        double fk1 = drbf4*tm + rbf*dabf4[p];
-                        double fk2 = drbf5*tm + rbf*dabf5[p];
-                        double fk3 = drbf6*tm + rbf*dabf6[p];
-
-                        n = p + (nabf1)*m;
-                        c = (elemindex[typej + typek*nelements] - 1) + nelements2*typei + nelements2*nelements*n;
-                        tm = coeff3[c];
-
-                        fjx += fj1*tm;
-                        fjy += fj2*tm;
-                        fjz += fj3*tm;
-                        fkx += fk1*tm;
-                        fky += fk2*tm;
-                        fkz += fk3*tm;
-                    }
-                }
-                nijk3 = k;
-                force[nijk3][0] -= fkx;
-                force[nijk3][1] -= fky;
-                force[nijk3][2] -= fkz;
-                fjxtmp += fjx;
-                fjytmp += fjy;
-                fjztmp += fjz;
-                fixtmp += fjx+fkx;
-                fiytmp += fjy+fky;
-                fiztmp += fjz+fkz;
-            }
-            nijk3 = j;
-            force[nijk3][0] -= fjxtmp;
-            force[nijk3][1] -= fjytmp;
-            force[nijk3][2] -= fjztmp;
-            nijk3 = i;
-            force[nijk3][0] += fixtmp;
-            force[nijk3][1] += fiytmp;
-            force[nijk3][2] += fiztmp;
-        }
-    }
-}
-
-void MLPOD::snapTallyForce(double **force, double *dbdr, double *coeff4,
-                           int *ai, int *aj, int *ti, int ijnum, int ncoeff, int /*ntype*/)
-{
-    int N2 = ijnum*ncoeff;
-    for (int idx=0; idx<N2; idx++) {
-        int ij = idx%ijnum;
-        int icoeff = (idx-ij)/ijnum;
-        int i = ai[ij]; // index of atom i
-        int j = aj[ij]; // index of atom i
-        int itype = ti[ij]; // element type of atom i
-        int n = ncoeff*(itype-1);
-        int nij = ijnum*3*icoeff;
-
-        double bix = dbdr[ij + ijnum*0 + nij];
-        double biy = dbdr[ij + ijnum*1 + nij];
-        double biz = dbdr[ij + ijnum*2 + nij];
-        double ce = coeff4[icoeff + n];
-
-        force[i][0] += bix*ce;
-        force[i][1] += biy*ce;
-        force[i][2] += biz*ce;
-        force[j][0] -= bix*ce;
-        force[j][1] -= biy*ce;
-        force[j][2] -= biz*ce;
-    }
-}
-
-void MLPOD::pod4body_force(double **force, double *rij, double *coeff4, double *tmpmem, int *atomtype,
-        int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij)
-{
-    int dim = 3;
-    int idxu_max = sna.idxu_max;
-    int idxb_max = sna.idxb_max;
-    int idxz_max = sna.idxz_max;
-    int twojmax = sna.twojmax;
-    int ncoeff = sna.ncoeff;
-    int ntypes = sna.ntypes;
-    int nelements = sna.nelements;
-    int ndoubles = sna.ndoubles;
-    int bnormflag = sna.bnormflag;
-    int chemflag = sna.chemflag;
-    int switchflag = sna.switchflag;
-    int wselfallflag = sna.wselfallflag;
-    int nelem = (chemflag) ? nelements : 1;
-
-    int *map = sna.map;
-    int *idxz = sna.idxz;
-    int *idxz_block = sna.idxz_block;
-    int *idxb = sna.idxb;
-    int *idxu_block = sna.idxu_block;
-    int *idxcg_block = sna.idxcg_block;
-
-    double wself = sna.wself;
-    double rmin0 = sna.rmin0;
-    double rfac0 = sna.rfac0;
-    double rcutfac = sna.rcutfac;
-    double *rootpqarray = sna.rootpqarray;
-    double *cglist = sna.cglist;
-    double *radelem = sna.radelem;
-    double *wjelem = sna.wjelem;
-
-    int ne = 0;
-    double *Ur = &tmpmem[ne];
-    double *Zr = &tmpmem[ne];
-    ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-    double *Ui = &tmpmem[ne];
-    double *Zi = &tmpmem[ne];
-    ne += MAX(idxu_max*Nij, idxz_max*ndoubles*natom);
-    double *dUr = &tmpmem[ne];
-    ne += idxu_max*dim*Nij;
-    double *dUi = &tmpmem[ne];
-    ne += idxu_max*dim*Nij;
-    double *dblist = &tmpmem[ne]; // idxb_max*ntriples*dim*Nij
-    double *Utotr = &tmpmem[ne];
-    ne += idxu_max*nelements*natom;
-    double *Utoti = &tmpmem[ne];
-
-    snapComputeUlist(Ur, Ui, dUr, dUi, rootpqarray, rij, wjelem, radelem, rmin0,
-         rfac0, rcutfac, idxu_block, ti, tj, twojmax, idxu_max, Nij, switchflag);
-
-    snapZeroUarraytot2(Utotr, Utoti, wself, idxu_block, atomtype, map, idxi, wselfallflag,
-            chemflag, idxu_max, nelem, twojmax, natom);
-
-    snapAddUarraytot(Utotr, Utoti, Ur, Ui, map, idxi, tj, idxu_max, natom, Nij, chemflag);
-
-    snapComputeZi2(Zr, Zi, Utotr, Utoti, cglist, idxz, idxu_block,
-          idxcg_block, twojmax, idxu_max, idxz_max, nelem, bnormflag, natom);
-
-    snapComputeDbidrj(dblist, Zr, Zi, dUr, dUi, idxb, idxu_block, idxz_block, map, idxi, tj,
-            twojmax, idxb_max, idxu_max, idxz_max, nelements, bnormflag, chemflag, natom, Nij);
-
-    snapTallyForce(force, dblist, coeff4, ai, aj, ti, Nij, ncoeff, ntypes);
-}
-
-void MLPOD::calculate_force(double **force, double *effectivecoeff, double *rij, double *tmpmem, int *pairnumsum,
-        int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij)
-{
-    int nelements = pod.nelements;
-    int nbesselpars = pod.nbesselpars;
-    int nrbf2 = pod.nbf2;
-    int nabf3 = pod.nabf3;
-    int nrbf3 = pod.nrbf3;
-    int nd1 = pod.nd1;
-    int nd2 = pod.nd2;
-    int nd3 = pod.nd3;
-    int *pdegree = pod.twobody;
-    int *elemindex = pod.elemindex;
-    double rin = pod.rin;
-    double rcut = pod.rcut;
-    double *Phi = pod.Phi2;
-    double *besselparams = pod.besselparams;
-
-    // effective POD coefficients for calculating force
-
-    double *coeff2 = &effectivecoeff[nd1];
-    double *coeff3 = &effectivecoeff[nd1+nd2];
-    double *coeff4 = &effectivecoeff[nd1+nd2+nd3];
-
-    int nrbf = MAX(nrbf2, nrbf3);
-    int ns = pdegree[0]*nbesselpars + pdegree[1];
-    double *e2ij = &tmpmem[0]; // Nij*nrbf
-    double *f2ij = &tmpmem[Nij*nrbf]; // dim*Nij*nrbf
-    double *e2ijt = &tmpmem[4*Nij*nrbf]; // Nij*ns
-    double *f2ijt = &tmpmem[4*Nij*nrbf+Nij*ns]; // dim*Nij*ns
-
-    // orthogonal radial basis functions
-
-    podradialbasis(e2ijt, f2ijt, rij, besselparams, rin, rcut-rin, pdegree[0], pdegree[1], nbesselpars, Nij);
-    podMatMul(e2ij, e2ijt, Phi, Nij, ns, nrbf);
-    podMatMul(f2ij, f2ijt, Phi, 3*Nij, ns, nrbf);
-
-    pod2body_force(force, f2ij, coeff2, ai, aj, ti, tj, elemindex, nelements, nrbf2, natom, Nij);
-
-    pod3body_force(force, rij, e2ij, f2ij, coeff3, &tmpmem[4*Nij*nrbf], elemindex, pairnumsum, ai, aj,
-            ti, tj, nrbf3, nabf3, nelements, natom, Nij);
-
-    if (pod.snaptwojmax > 0)
-        pod4body_force(force, rij, coeff4, tmpmem, atomtype, idxi, ai, aj, ti, tj, natom, Nij);
-}
diff --git a/src/ML-POD/mlpod.h b/src/ML-POD/mlpod.h
deleted file mode 100644
index 54e75988bed..00000000000
--- a/src/ML-POD/mlpod.h
+++ /dev/null
@@ -1,308 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/ Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifndef LMP_MLPOD_H
-#define LMP_MLPOD_H
-
-#include "pointers.h"
-
-#define DDOT ddot_
-#define DGEMV dgemv_
-#define DGEMM dgemm_
-#define DGETRF dgetrf_
-#define DGETRI dgetri_
-#define DSYEV dsyev_
-#define DPOSV dposv_
-
-extern "C" {
-double DDOT(int *, double *, int *, double *, int *);
-void DGEMV(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *,
-           int *);
-void DGEMM(char *, char *, int *, int *, int *, double *, double *, int *, double *, int *,
-           double *, double *, int *);
-void DGETRF(int *, int *, double *, int *, int *, int *);
-void DGETRI(int *, double *, int *, int *, double *, int *, int *);
-void DSYEV(char *, char *, int *, double *, int *, double *, double *, int *, int *);
-void DPOSV(char *, int *, int *, double *, int *, double *, int *, int *);
-}
-
-namespace LAMMPS_NS {
-
-class MLPOD : protected Pointers {
-
- private:
-  // functions for reading input files
-
-  void read_pod(const std::string &pod_file);
-  void read_coeff_file(const std::string &coeff_file);
-
-  // functions for calculating/collating POD descriptors/coefficients for energies
-
-  void podradialbasis(double *rbf, double *drbf, double *xij, double *besselparams, double rin,
-                      double rmax, int besseldegree, int inversedegree, int nbesselpars, int N);
-  void pod1body(double *eatom, double *fatom, int *atomtype, int nelements, int natom);
-  void podtally2b(double *eatom, double *fatom, double *eij, double *fij, int *ai, int *aj, int *ti,
-                  int *tj, int *elemindex, int nelements, int nbf, int natom, int N);
-  void pod3body(double *eatom, double *fatom, double *rij, double *e2ij, double *f2ij,
-                double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti, int *tj,
-                int nrbf, int nabf, int nelements, int natom, int Nij);
-  void poddesc(double *eatom1, double *fatom1, double *eatom2, double *fatom2, double *eatom3,
-               double *fatom3, double *rij, double *Phi, double *besselparams, double *tmpmem,
-               double rin, double rcut, int *pairnumsum, int *atomtype, int *ai, int *aj, int *ti,
-               int *tj, int *elemindex, int *pdegree, int nbesselpars, int nrbf2, int nrbf3,
-               int nabf, int nelements, int Nij, int natom);
-  double quadratic_coefficients(double *c2, double *c3, double *d2, double *d3, double *coeff23,
-                                int *quadratic, int nc2, int nc3);
-  double quadratic_coefficients(double *c3, double *d3, double *coeff33, int *quadratic, int nc3);
-  double cubic_coefficients(double *c2, double *c3, double *c4, double *d2, double *d3, double *d4,
-                            double *coeff234, int *cubic, int nc2, int nc3, int nc4);
-  double cubic_coefficients(double *c3, double *d3, double *coeff333, int *cubic, int nc3);
-  double quadratic_coefficients(double *ce2, double *ce3, double *c2, double *c3, double *d2,
-                                double *d3, double *coeff23, int *quadratic, int nc2, int nc3);
-  double quadratic_coefficients(double *ce3, double *c3, double *d3, double *coeff33,
-                                int *quadratic, int nc3);
-  double cubic_coefficients(double *ce2, double *ce3, double *ce4, double *c2, double *c3,
-                            double *c4, double *d2, double *d3, double *d4, double *coeff234,
-                            int *cubic, int nc2, int nc3, int nc4);
-  double cubic_coefficients(double *ce3, double *c3, double *d3, double *coeff333, int *cubic,
-                            int nc3);
-
-  // functions for calculating/collating SNAP descriptors/coefficients for energies
-
-  void snapSetup(int twojmax, int ntypes);
-  void InitSnap();
-  void snapComputeUlist(double *Sr, double *Si, double *dSr, double *dSi, double *rootpqarray,
-                        double *rij, double *wjelem, double *radelem, double rmin0, double rfac0,
-                        double rcutfac, int *idxu_block, int *ti, int *tj, int twojmax,
-                        int idxu_max, int ijnum, int switch_flag);
-  void snapZeroUarraytot2(double *Stotr, double *Stoti, double wself, int *idxu_block, int *type,
-                          int *map, int *ai, int wselfall_flag, int chemflag, int idxu_max,
-                          int nelements, int twojmax, int inum);
-  void snapAddUarraytot(double *Stotr, double *Stoti, double *Sr, double *Si, int *map, int *ai,
-                        int *tj, int idxu_max, int inum, int ijnum, int chemflag);
-  void snapComputeZi2(double *zlist_r, double *zlist_i, double *Stotr, double *Stoti,
-                      double *cglist, int *idxz, int *idxu_block, int *idxcg_block, int twojmax,
-                      int idxu_max, int idxz_max, int nelements, int bnorm_flag, int inum);
-  void snapComputeBi1(double *blist, double *zlist_r, double *zlist_i, double *Stotr, double *Stoti,
-                      int *idxb, int *idxu_block, int *idxz_block, int twojmax, int idxb_max,
-                      int idxu_max, int idxz_max, int nelements, int inum);
-  void snapComputeDbidrj(double *dblist, double *zlist_r, double *zlist_i, double *dulist_r,
-                         double *dulist_i, int *idxb, int *idxu_block, int *idxz_block, int *map,
-                         int *ai, int *tj, int twojmax, int idxb_max, int idxu_max, int idxz_max,
-                         int nelements, int bnorm_flag, int chemflag, int inum, int ijnum);
-  void snapdesc(double *blist, double *bd, double *rij, double *tmpmem, int *atomtype, int *ai,
-                int *aj, int *ti, int *tj, int natom, int Nij);
-
-  // functions for calculating/collating POD descriptors/coefficients for forces
-
-  void podradialbasis(double *rbf, double *xij, double *besselparams, double rin, double rmax,
-                      int besseldegree, int inversedegree, int nbesselpars, int N);
-  void pod1body(double *eatom, int *atomtype, int nelements, int natom);
-  void podtally2b(double *eatom, double *eij, int *ai, int *ti, int *tj, int *elemindex,
-                  int nelements, int nbf, int natom, int N);
-  void pod3body(double *eatom, double *yij, double *e2ij, double *tmpmem, int *elemindex,
-                int *pairnumsum, int *ai, int *ti, int *tj, int nrbf, int nabf, int nelements,
-                int natom, int Nij);
-  void poddesc_ij(double *eatom1, double *eatom2, double *eatom3, double *rij, double *Phi,
-                  double *besselparams, double *tmpmem, double rin, double rcut, int *pairnumsum,
-                  int *atomtype, int *ai, int *ti, int *tj, int *elemindex, int *pdegree,
-                  int nbesselpars, int nrbf2, int nrbf3, int nabf, int nelements, int Nij,
-                  int natom);
-  void snapComputeUij(double *Sr, double *Si, double *rootpqarray, double *rij, double *wjelem,
-                      double *radelem, double rmin0, double rfac0, double rcutfac, int *idxu_block,
-                      int *ti, int *tj, int twojmax, int idxu_max, int ijnum, int switch_flag);
-  void snapdesc_ij(double *blist, double *rij, double *tmpmem, int *atomtype, int *ai, int *ti,
-                   int *tj, int natom, int Nij);
-  void pod2body_force(double *force, double *fij, double *coeff2, int *ai, int *aj, int *ti,
-                      int *tj, int *elemindex, int nelements, int nbf, int natom, int Nij);
-  void pod3body_force(double *force, double *yij, double *e2ij, double *f2ij, double *coeff3,
-                      double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti,
-                      int *tj, int nrbf, int nabf, int nelements, int natom, int Nij);
-  void snapTallyForce(double *force, double *dbdr, double *coeff4, int *ai, int *aj, int *ti,
-                      int ijnum, int ncoeff, int ntype);
-  void pod4body_force(double *force, double *rij, double *coeff4, double *tmpmem, int *atomtype,
-                      int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij);
-  void pod2body_force(double **force, double *fij, double *coeff2, int *ai, int *aj, int *ti,
-                      int *tj, int *elemindex, int nelements, int nbf, int natom, int Nij);
-  void pod3body_force(double **force, double *yij, double *e2ij, double *f2ij, double *coeff3,
-                      double *tmpmem, int *elemindex, int *pairnumsum, int *ai, int *aj, int *ti,
-                      int *tj, int nrbf, int nabf, int nelements, int natom, int Nij);
-  void snapTallyForce(double **force, double *dbdr, double *coeff4, int *ai, int *aj, int *ti,
-                      int ijnum, int ncoeff, int ntype);
-  void pod4body_force(double **force, double *rij, double *coeff4, double *tmpmem, int *atomtype,
-                      int *idxi, int *ai, int *aj, int *ti, int *tj, int natom, int Nij);
-
-  // eigenproblem functions
-
-  void podeigenvaluedecomposition(double *Phi, double *Lambda, double *besselparams, double rin,
-                                  double rcut, int besseldegree, int inversedegree, int nbesselpars,
-                                  int N);
-
- public:
-  MLPOD(LAMMPS *, const std::string &pod_file, const std::string &coeff_file);
-
-  MLPOD(LAMMPS *lmp) : Pointers(lmp){};
-  ~MLPOD() override;
-
-  struct podstruct {
-    podstruct();
-    virtual ~podstruct();
-
-    std::vector<std::string> species;
-    int twobody[3];
-    int threebody[4];
-    int fourbody[4];
-    int *pbc;
-    int *elemindex;
-
-    int quadratic22[2];
-    int quadratic23[2];
-    int quadratic24[2];
-    int quadratic33[2];
-    int quadratic34[2];
-    int quadratic44[2];
-    int cubic234[3];
-    int cubic333[3];
-    int cubic444[3];
-    int nelements;
-    int onebody;
-    int besseldegree;
-    int inversedegree;
-
-    int quadraticpod;
-
-    double rin;
-    double rcut;
-    double *besselparams;
-    double *coeff;
-    double *Phi2, *Phi3, *Phi4, *Lambda2, *Lambda3, *Lambda4;
-
-    // variables declaring number of snapshots, descriptors, and combinations
-
-    int nbesselpars = 3;
-    int ns2, ns3,
-        ns4;    // number of snapshots for radial basis functions for linear POD potentials
-    int nc2, nc3, nc4;             // number of chemical  combinations for linear POD potentials
-    int nbf1, nbf2, nbf3, nbf4;    // number of basis functions for linear POD potentials
-    int nd1, nd2, nd3, nd4;        // number of descriptors for linear POD potentials
-    int nd22, nd23, nd24, nd33, nd34, nd44;    // number of descriptors for quadratic POD potentials
-    int nd234, nd333, nd444;                   // number of descriptors for cubic POD potentials
-    int nrbf3, nabf3, nrbf4, nabf4;
-    int nd, nd1234;
-
-    int snaptwojmax;    // also used to tell if SNAP is used when allocating/deallocating
-    int snapchemflag;
-    double snaprfac0;
-    double snapelementradius[10];
-    double snapelementweight[10];
-  };
-
-  struct snastruct {
-    int twojmax;
-    int ncoeff;
-    int idxb_max;
-    int idxu_max;
-    int idxz_max;
-    int idxcg_max;
-    int ntypes;
-    int nelements;
-    int ndoubles;    // number of multi-element pairs
-    int ntriples;    // number of multi-element triplets
-    int bnormflag;
-    int chemflag;
-    int switchflag;
-    int bzeroflag;
-    int wselfallflag;
-
-    double wself;
-    double rmin0;
-    double rfac0;
-    double rcutfac;
-    double rcutmax;
-
-    int *map;    // map types to [0,nelements)
-    int *idx_max;
-    int *idxz;
-    int *idxz_block;
-    int *idxb;
-    int *idxb_block;
-    int *idxu_block;
-    int *idxcg_block;
-
-    double *rcutsq;
-    double *radelem;
-    double *wjelem;
-    double *bzero;
-    double *fac;
-    double *rootpqarray;
-    double *cglist;
-  };
-
-  podstruct pod;
-  snastruct sna;
-
-  // functions for collecting/collating arrays
-
-  void podMatMul(double *c, double *a, double *b, int r1, int c1, int c2);
-  void podArraySetValue(double *y, double a, int n);
-  void podArrayCopy(double *y, double *x, int n);
-  void podArrayFill(int *output, int start, int length);
-
-  // functions for calculating energy and force descriptors
-
-  void podNeighPairs(double *xij, double *x, int *ai, int *aj, int *ti, int *tj, int *pairlist,
-                     int *pairnumsum, int *atomtype, int *alist, int inum, int dim);
-  void linear_descriptors(double *gd, double *efatom, double *y, double *tmpmem, int *atomtype,
-                          int *alist, int *pairlist, int *pairnum, int *pairnumsum, int *tmpint,
-                          int natom, int Nij);
-  void quadratic_descriptors(double *d23, double *dd23, double *d2, double *d3, double *dd2,
-                             double *dd3, int M2, int M3, int N);
-  void quadratic_descriptors(double *d33, double *dd33, double *d3, double *dd3, int M3, int N);
-  void cubic_descriptors(double *d234, double *dd234, double *d2, double *d3, double *d4,
-                         double *dd2, double *dd3, double *dd4, int M2, int M3, int M4, int N);
-  void cubic_descriptors(double *d333, double *Dd333, double *d3, double *Dd3, int M3, int N);
-  double calculate_energyforce(double *force, double *gd, double *gdd, double *coeff, double *tmp,
-                               int natom);
-  double energyforce_calculation(double *f, double *gd, double *gdd, double *coeff, double *y,
-                                 int *atomtype, int *alist, int *pairlist, int *pairnum,
-                                 int *pairnumsum, int *tmpint, int natom, int Nij);
-
-  // functions for calculating energies and forces
-
-  void podNeighPairs(double *rij, double *x, int *idxi, int *ai, int *aj, int *ti, int *tj,
-                     int *pairnumsum, int *atomtype, int *jlist, int *alist, int inum);
-  int lammpsNeighPairs(double *rij, double **x, double rcutsq, int *idxi, int *ai, int *aj, int *ti,
-                       int *tj, int *pairnumsum, int *atomtype, int *numneigh, int *ilist,
-                       int **jlist, int inum);
-  void linear_descriptors_ij(double *gd, double *eatom, double *rij, double *tmpmem,
-                             int *pairnumsum, int *atomtype, int *ai, int *ti, int *tj, int natom,
-                             int Nij);
-  double calculate_energy(double *effectivecoeff, double *gd, double *coeff);
-  double calculate_energy(double *energycoeff, double *forcecoeff, double *gd, double *gdall,
-                          double *coeff);
-  void calculate_force(double *force, double *effectivecoeff, double *rij, double *tmpmem,
-                       int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti,
-                       int *tj, int natom, int Nij);
-  void calculate_force(double **force, double *effectivecoeff, double *rij, double *tmpmem,
-                       int *pairnumsum, int *atomtype, int *idxi, int *ai, int *aj, int *ti,
-                       int *tj, int natom, int Nij);
-  double energyforce_calculation(double *force, double *podcoeff, double *effectivecoeff,
-                                 double *gd, double *rij, double *tmpmem, int *pairnumsum,
-                                 int *atomtype, int *idxi, int *ai, int *aj, int *ti, int *tj,
-                                 int natom, int Nij);
-
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
diff --git a/src/ML-POD/pair_pod.cpp b/src/ML-POD/pair_pod.cpp
index d106b11a18e..b17aa5d08f7 100644
--- a/src/ML-POD/pair_pod.cpp
+++ b/src/ML-POD/pair_pod.cpp
@@ -17,54 +17,138 @@
 
 #include "pair_pod.h"
 
-#include "mlpod.h"
+#include "eapod.h"
 
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
+#include "math_const.h"
+#include "math_special.h"
 #include "memory.h"
 #include "neigh_list.h"
 #include "neighbor.h"
+#include "tokenizer.h"
+
+#include <cmath>
+#include <cstring>
+#include <chrono>
 
 using namespace LAMMPS_NS;
+using MathConst::MY_PI;
+using MathSpecial::powint;
+
+#define MAXLINE 1024
 
 /* ---------------------------------------------------------------------- */
 
-PairPOD::PairPOD(LAMMPS *lmp) :
-    Pair(lmp), gd(nullptr), gdall(nullptr), podcoeff(nullptr), newpodcoeff(nullptr),
-    energycoeff(nullptr), forcecoeff(nullptr), podptr(nullptr), tmpmem(nullptr), typeai(nullptr),
-    numneighsum(nullptr), rij(nullptr), idxi(nullptr), ai(nullptr), aj(nullptr), ti(nullptr),
-    tj(nullptr)
+PairPOD::PairPOD(LAMMPS *lmp) : Pair(lmp), fastpodptr(nullptr)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
   centroidstressflag = CENTROID_NOTAVAIL;
-  peratom_warn = true;
+  peratom_warn = false;
 
-  dim = 3;
-  nablockmax = 0;
+  ni = 0;
+  nimax = 0;
   nij = 0;
-  nijmax = 0;
-  szd = 0;
+  nijmax = 0;  
+  atomBlockSize = 4096;
+  nAtomBlocks = 0;
+
+  rij = nullptr;
+  fij = nullptr;
+  ei = nullptr;
+  typeai = nullptr;
+  numij =  nullptr;
+  idxi = nullptr;
+  ai = nullptr;
+  aj = nullptr;
+  ti = nullptr;
+  tj = nullptr;
+  Phi = nullptr;
+  rbf = nullptr;
+  rbfx = nullptr;
+  rbfy = nullptr;
+  rbfz = nullptr;
+  abf = nullptr;
+  abfx = nullptr;
+  abfy = nullptr;
+  abfz = nullptr;
+  sumU = nullptr;
+  Centroids = nullptr;
+  Proj = nullptr;
+  bd = nullptr;
+  bdd = nullptr;
+  pd = nullptr;
+  pdd = nullptr;
+  coefficients = nullptr;
+  pn3 = nullptr;
+  pc3 = nullptr;
+  pa4 = nullptr;
+  pb4 = nullptr;
+  pc4 = nullptr;
+  ind23 = nullptr;
+  ind32 = nullptr;
+  ind33l = nullptr;
+  ind33r = nullptr;
+  ind34l = nullptr;
+  ind34r = nullptr;
+  ind44l = nullptr;
+  ind44r = nullptr;
+  elemindex = nullptr;  
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairPOD::~PairPOD()
 {
-  free_tempmemory();
-  memory->destroy(podcoeff);
-  memory->destroy(newpodcoeff);
-  memory->destroy(gd);
-  memory->destroy(gdall);
-  memory->destroy(energycoeff);
-  memory->destroy(forcecoeff);
-
-  delete podptr;
-
+  memory->destroy(rij);
+  memory->destroy(fij);
+  memory->destroy(ei);
+  memory->destroy(typeai);
+  memory->destroy(numij);
+  memory->destroy(idxi);
+  memory->destroy(ai);
+  memory->destroy(aj);
+  memory->destroy(ti);
+  memory->destroy(tj);
+  memory->destroy(Phi);
+  memory->destroy(rbf);
+  memory->destroy(rbfx);
+  memory->destroy(rbfy);
+  memory->destroy(rbfz);
+  memory->destroy(abf);
+  memory->destroy(abfx);
+  memory->destroy(abfy);
+  memory->destroy(abfz);
+  memory->destroy(sumU);
+  memory->destroy(Centroids);
+  memory->destroy(Proj);
+  memory->destroy(bd);
+  memory->destroy(bdd);
+  memory->destroy(pd);
+  memory->destroy(pdd);
+  memory->destroy(coefficients);
+  memory->destroy(pn3);
+  memory->destroy(pc3);
+  memory->destroy(pa4);
+  memory->destroy(pb4);
+  memory->destroy(pc4);
+  memory->destroy(ind23);
+  memory->destroy(ind32);
+  memory->destroy(ind33l);
+  memory->destroy(ind33r);
+  memory->destroy(ind34l);
+  memory->destroy(ind34r);
+  memory->destroy(ind44l);
+  memory->destroy(ind44r);
+  memory->destroy(elemindex);
+
+  delete fastpodptr;
+  
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
@@ -91,67 +175,94 @@ void PairPOD::compute(int eflag, int vflag)
   int *type = atom->type;
   int *ilist = list->ilist;
   int inum = list->inum;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
 
-  // initialize global descriptors to zero
-
-  int nd1234 = podptr->pod.nd1234;
-  podptr->podArraySetValue(gd, 0.0, nd1234);
+  double rcutsq = rcut*rcut;
+  double evdwl = 0.0;
 
+  int blockMode = 0;  
+  if (blockMode==0) {    
   for (int ii = 0; ii < inum; ii++) {
     int i = ilist[ii];
     int jnum = numneigh[i];
 
     // allocate temporary memory
-
     if (nijmax < jnum) {
       nijmax = MAX(nijmax, jnum);
-      nablockmax = 1;
-      free_tempmemory();
-      estimate_tempmemory();
-      allocate_tempmemory();
+      fastpodptr->free_temp_memory();
+      fastpodptr->allocate_temp_memory(nijmax);
     }
 
-    // get neighbor pairs for atom i
-
-    lammpsNeighPairs(x, firstneigh, type, map, numneigh, i);
-
-    // compute global POD descriptors for atom i
-
-    podptr->linear_descriptors_ij(gd, tmpmem, rij, &tmpmem[nd1234], numneighsum, typeai, idxi, ti,
-                                  tj, 1, nij);
+    double *rij1 = &fastpodptr->tmpmem[0];    
+    double *fij1 = &fastpodptr->tmpmem[3*nijmax];   
+    double *tmp = &fastpodptr->tmpmem[6*nijmax]; 
+    int *ai1 = &fastpodptr->tmpint[0];      
+    int *aj1 = &fastpodptr->tmpint[nijmax]; 
+    int *ti1 = &fastpodptr->tmpint[2*nijmax];
+    int *tj1 = &fastpodptr->tmpint[3*nijmax];    
+    lammpsNeighborList(rij1, ai1, aj1, ti1, tj1, x, firstneigh, type, map, numneigh, rcutsq, i);
+    
+    evdwl = fastpodptr->peratomenergyforce(fij1, rij1, tmp, ti1, tj1, nij);
+            
+    // tally atomic energy to global energy
+    ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0);
+
+    // tally atomic force to global force
+    tallyforce(f, fij1, ai1, aj1, nij);
+
+    // tally atomic stress
+    if (vflag) {
+      for (int jj = 0; jj < nij; jj++) {
+        int j = aj1[jj];
+        ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,
+                    fij1[0 + 3*jj],fij1[1 + 3*jj],fij1[2 + 3*jj],
+                    -rij1[0 + 3*jj], -rij1[1 + 3*jj], -rij1[2 + 3*jj]);
+      }
+    }            
+  }  
   }
-
-  int nd22 = podptr->pod.nd22;
-  int nd23 = podptr->pod.nd23;
-  int nd24 = podptr->pod.nd24;
-  int nd33 = podptr->pod.nd33;
-  int nd34 = podptr->pod.nd34;
-  int nd44 = podptr->pod.nd44;
-  int nd = podptr->pod.nd;
-  bigint natom = atom->natoms;
-
-  for (int j = nd1234; j < (nd1234 + nd22 + nd23 + nd24 + nd33 + nd34 + nd44); j++)
-    newpodcoeff[j] = podcoeff[j] / (natom);
-
-  for (int j = (nd1234 + nd22 + nd23 + nd24 + nd33 + nd34 + nd44); j < nd; j++)
-    newpodcoeff[j] = podcoeff[j] / (natom * natom);
-
-  // compute energy and effective coefficients
-  eng_vdwl = podptr->calculate_energy(energycoeff, forcecoeff, gd, gdall, newpodcoeff);
-
-  for (int ii = 0; ii < inum; ii++) {
-    int i = ilist[ii];
-
-    // get neighbor pairs for atom i
-
-    lammpsNeighPairs(x, firstneigh, type, map, numneigh, i);
-
-    // compute atomic force for atom i
-
-    podptr->calculate_force(f, forcecoeff, rij, tmpmem, numneighsum, typeai, idxi, ai, aj, ti, tj,
-                            1, nij);
+  else if (blockMode == 1) {
+ // determine the number of atom blocks and divide atoms into blocks
+  nAtomBlocks = calculateNumberOfIntervals(inum, atomBlockSize);
+  if (nAtomBlocks > 100) nAtomBlocks = 100; 
+  divideInterval(atomBlocks, inum, nAtomBlocks);
+
+  int nmax = 0;
+  for (int block =0; block<nAtomBlocks; block++) {    
+    int n = atomBlocks[block+1] - atomBlocks[block]; 
+    if (nmax < n) nmax = n;
   }
-
+  grow_atoms(nmax); // reallocate memory only if necessary  
+  
+  for (int block =0; block<nAtomBlocks; block++) {
+    int gi1 = atomBlocks[block]-1;
+    int gi2 = atomBlocks[block+1]-1;
+    ni = gi2 - gi1; // total number of atoms in the current atom block
+        
+    NeighborCount(x, firstneigh, ilist, numneigh, rcutsq, gi1, gi2);                    
+    nij = numberOfNeighbors(); // total number of pairs (i,j) in the current atom block        
+    grow_pairs(nij); // reallocate memory only if necessary
+
+    // get neighbor list for atoms i in the current atom block
+    NeighborList(x, firstneigh, type, map, ilist, numneigh, rcutsq, gi1, gi2);       
+    
+    // compute atomic energy and force for the current atom block
+    blockatomenergyforce(ei, fij, ni, nij);
+
+    // tally atomic energy to global energy
+    tallyenergy(ei, gi1, ni);
+
+    // tally atomic force to global force
+    tallyforce(f, fij, ai, aj, nij);
+
+    // tally atomic stress
+    if (vflag) tallystress(fij, rij, ai, aj, nlocal, nij);
+        
+    //savedatafordebugging();
+  }    
+  }
+  
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
@@ -179,34 +290,30 @@ void PairPOD::coeff(int narg, char **arg)
   map = new int[np1];
   allocated = 1;
 
-  if (narg < 4) utils::missing_cmd_args(FLERR, "pair_coeff", error);
-  map_element2type(narg - 4, arg + 4);
-
+  if (narg < 7) utils::missing_cmd_args(FLERR, "pair_coeff", error);
+  
   std::string pod_file = std::string(arg[2]);      // pod input file
   std::string coeff_file = std::string(arg[3]);    // coefficient input file
-
-  delete podptr;
-  podptr = new MLPOD(lmp, pod_file, coeff_file);
-
-  if (coeff_file != "") {
-    memory->destroy(podcoeff);
-    memory->destroy(newpodcoeff);
-    memory->destroy(energycoeff);
-    memory->destroy(forcecoeff);
-    memory->destroy(gd);
-    memory->destroy(gdall);
-    memory->create(podcoeff, podptr->pod.nd, "pair:podcoeff");
-    memory->create(newpodcoeff, podptr->pod.nd, "pair:newpodcoeff");
-    memory->create(energycoeff, podptr->pod.nd1234, "pair:energycoeff");
-    memory->create(forcecoeff, podptr->pod.nd1234, "pair:forcecoeff");
-    memory->create(gd, podptr->pod.nd1234, "pair:gd");
-    memory->create(gdall, podptr->pod.nd1234, "pair:gdall");
-    podptr->podArrayCopy(podcoeff, podptr->pod.coeff, podptr->pod.nd);
-    podptr->podArrayCopy(newpodcoeff, podptr->pod.coeff, podptr->pod.nd);
+  std::string proj_file = std::string(arg[4]);     // projection matrix file
+  std::string centroid_file = std::string(arg[5]); // centroid matrix file
+  map_element2type(narg - 6, arg + 6);    
+  
+  delete fastpodptr;
+  fastpodptr = new EAPOD(lmp, pod_file, coeff_file, proj_file, centroid_file);
+
+  if (fastpodptr->nClusters > 1) {
+    if (proj_file == "") error->all(FLERR,"The projection file name can not be empty when the number of clusters is greater than 1.");    
+    if (centroid_file == "") error->all(FLERR,"The centroids file name can not be empty when the number of clusters is greater than 1.");
   }
+  
+  copy_data_from_pod_class();
+  rcut = fastpodptr->rcut;
+  
+  memory->destroy(fastpodptr->tmpmem);
+  memory->destroy(fastpodptr->tmpint);
 
   for (int ii = 0; ii < np1; ii++)
-    for (int jj = 0; jj < np1; jj++) cutsq[ii][jj] = podptr->pod.rcut * podptr->pod.rcut;
+    for (int jj = 0; jj < np1; jj++) cutsq[ii][jj] = fastpodptr->rcut * fastpodptr->rcut;
 }
 
 /* ----------------------------------------------------------------------
@@ -222,7 +329,7 @@ void PairPOD::init_style()
   neighbor->add_request(this, NeighConst::REQ_FULL);
 
   // reset flag to print warning about per-atom energies or stresses
-  peratom_warn = true;
+  peratom_warn = false;
 }
 
 /* ----------------------------------------------------------------------
@@ -232,7 +339,16 @@ void PairPOD::init_style()
 double PairPOD::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
-  return podptr->pod.rcut;
+
+  double rcut = 0.0;
+  rcut = fastpodptr->rcut;
+
+  return rcut;
+}
+
+void PairPOD::allocate()
+{
+  allocated = 1;
 }
 
 /* ----------------------------------------------------------------------
@@ -245,86 +361,1307 @@ double PairPOD::memory_usage()
   return bytes;
 }
 
-void PairPOD::free_tempmemory()
+void PairPOD::lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, 
+                               double **x, int **firstneigh, int *atomtypes, int *map,
+                               int *numneigh, double rcutsq, int gi)
 {
-  memory->destroy(rij);
-  memory->destroy(idxi);
-  memory->destroy(ai);
-  memory->destroy(aj);
-  memory->destroy(ti);
-  memory->destroy(tj);
-  memory->destroy(numneighsum);
-  memory->destroy(typeai);
-  memory->destroy(tmpmem);
+  nij = 0;
+  int itype = map[atomtypes[gi]] + 1;
+  int m = numneigh[gi];
+  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+    int gj = firstneigh[gi][l];           // atom j
+    double delx = x[gj][0] - x[gi][0];    // xj - xi
+    double dely = x[gj][1] - x[gi][1];    // xj - xi
+    double delz = x[gj][2] - x[gi][2];    // xj - xi
+    double rsq = delx * delx + dely * dely + delz * delz;
+    if (rsq < rcutsq && rsq > 1e-20) {
+      rij1[nij * 3 + 0] = delx;
+      rij1[nij * 3 + 1] = dely;
+      rij1[nij * 3 + 2] = delz;
+      ai1[nij] = gi;
+      aj1[nij] = gj;
+      ti1[nij] = itype;
+      tj1[nij] = map[atomtypes[gj]] + 1;
+      nij++;
+    }
+  }
+}
+
+void PairPOD::NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int gi1, int gi2)
+{  
+  for (int i=0; i<ni; i++) {
+    int gi = ilist[gi1 + i];
+    double xi0 = x[gi][0];    
+    double xi1 = x[gi][1];    
+    double xi2 = x[gi][2];        
+    int m = numneigh[gi];
+    int n = 0;
+    for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+      int gj = firstneigh[gi][l];           // atom j
+      double delx = x[gj][0] - xi0;    // xj - xi
+      double dely = x[gj][1] - xi1;    // xj - xi
+      double delz = x[gj][2] - xi2;    // xj - xi
+      double rsq = delx * delx + dely * dely + delz * delz;
+      if (rsq < rcutsq && rsq > 1e-20) n++;      
+    }
+    numij[1+i] = n;
+  }  
+}
+
+int PairPOD::numberOfNeighbors()
+{
+  int n = 0;
+  for (int i=1; i<=ni; i++) {
+    n += numij[i];  
+    numij[i] += numij[i-1];   
+  }
+  return n;
+}
+
+void PairPOD::NeighborList(double **x, int **firstneigh, int *atomtypes, int *map,
+                               int *ilist, int *numneigh, double rcutsq, int gi1, int gi2)
+{  
+  for (int i=0; i<ni; i++) {
+    int gi = ilist[gi1 + i];
+    double xi0 = x[gi][0];    
+    double xi1 = x[gi][1];    
+    double xi2 = x[gi][2];    
+    int itype = map[atomtypes[gi]] + 1;
+    typeai[i] = itype;    
+    int m = numneigh[gi];
+    int nij0 = numij[i];    
+    int k = 0;    
+    for (int l = 0; l < m; l++) {           // loop over each atom around atom i
+      int gj = firstneigh[gi][l];           // atom j
+      double delx = x[gj][0] - xi0;    // xj - xi
+      double dely = x[gj][1] - xi1;    // xj - xi
+      double delz = x[gj][2] - xi2;    // xj - xi
+      double rsq = delx * delx + dely * dely + delz * delz;
+      if (rsq < rcutsq && rsq > 1e-20) {
+        int nij1 = nij0 + k;
+        rij[nij1 * 3 + 0] = delx;
+        rij[nij1 * 3 + 1] = dely;
+        rij[nij1 * 3 + 2] = delz;
+        idxi[nij1] = i;
+        ai[nij1] = gi;
+        aj[nij1] = gj;
+        ti[nij1] = itype;
+        tj[nij1] = map[atomtypes[gj]] + 1;
+        k++;
+      }
+    }
+  }  
+}
+
+void PairPOD::tallyforce(double **force, double *fij,  int *ai, int *aj, int N)
+{
+  for (int n=0; n<N; n++) {
+    int im =  ai[n];
+    int jm =  aj[n];
+    int nm = 3*n;
+    force[im][0] += fij[0 + nm];
+    force[im][1] += fij[1 + nm];
+    force[im][2] += fij[2 + nm];
+    force[jm][0] -= fij[0 + nm];
+    force[jm][1] -= fij[1 + nm];
+    force[jm][2] -= fij[2 + nm];
+  }
+}
+
+void PairPOD::tallyenergy(double *ei, int istart, int Ni)
+{
+  if (eflag_global)       
+    for (int k = 0; k < Ni; k++) eng_vdwl += ei[k];      
+
+  if (eflag_atom) 
+    for (int k = 0; k < Ni; k++) eatom[istart+k] += ei[k];            
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global or per-atom accumulators
+   for virial, have delx,dely,delz and fx,fy,fz
+------------------------------------------------------------------------- */
+
+void PairPOD::tallystress(double *fij, double *rij, int *ai, int *aj, int nlocal, int N)
+{
+  double v[6];
+
+  if (vflag_global) {      
+    for (int k = 0; k < N; k++) {
+      int k3 = 3*k;
+      v[0] = -rij[0 + k3]*fij[0 + k3]; // delx*fx;
+      v[1] = -rij[1 + k3]*fij[1 + k3]; // dely*fy;
+      v[2] = -rij[2 + k3]*fij[2 + k3]; // delz*fz;
+      v[3] = -rij[0 + k3]*fij[1 + k3]; // delx*fy;
+      v[4] = -rij[0 + k3]*fij[2 + k3]; // delx*fz;  
+      v[5] = -rij[1 + k3]*fij[2 + k3]; // dely*fz;
+      virial[0] += v[0];
+      virial[1] += v[1];
+      virial[2] += v[2];
+      virial[3] += v[3];
+      virial[4] += v[4];
+      virial[5] += v[5];
+    }      
+  }
+
+  if (vflag_atom) {
+    for (int k = 0; k < N; k++) {
+      int i = ai[k];
+      int j = aj[k];
+      int k3 = k*3;
+      v[0] = -rij[0 + k3]*fij[0 + k3]; // delx*fx;
+      v[1] = -rij[1 + k3]*fij[1 + k3]; // dely*fy;
+      v[2] = -rij[2 + k3]*fij[2 + k3]; // delz*fz;
+      v[3] = -rij[0 + k3]*fij[1 + k3]; // delx*fy;
+      v[4] = -rij[0 + k3]*fij[2 + k3]; // delx*fz;  
+      v[5] = -rij[1 + k3]*fij[2 + k3]; // dely*fz;
+
+      if (i < nlocal) {
+        vatom[i][0] += 0.5*v[0];
+        vatom[i][1] += 0.5*v[1];
+        vatom[i][2] += 0.5*v[2];
+        vatom[i][3] += 0.5*v[3];
+        vatom[i][4] += 0.5*v[4];
+        vatom[i][5] += 0.5*v[5];
+      }
+      if (j < nlocal) {
+        vatom[j][0] += 0.5*v[0];
+        vatom[j][1] += 0.5*v[1];
+        vatom[j][2] += 0.5*v[2];
+        vatom[j][3] += 0.5*v[3];
+        vatom[j][4] += 0.5*v[4];
+        vatom[j][5] += 0.5*v[5];
+      }
+    }
+  }
 }
 
-void PairPOD::allocate_tempmemory()
+void PairPOD::copy_data_from_pod_class() 
 {
-  memory->create(rij, dim * nijmax, "pair:rij");
-  memory->create(idxi, nijmax, "pair:idxi");
-  memory->create(ai, nijmax, "pair:ai");
-  memory->create(aj, nijmax, "pair:aj");
-  memory->create(ti, nijmax, "pair:ti");
-  memory->create(tj, nijmax, "pair:tj");
-  memory->create(numneighsum, nablockmax + 1, "pair:numneighsum");
-  memory->create(typeai, nablockmax, "pair:typeai");
-  memory->create(tmpmem, szd, "pair:tmpmem");
+  nelements = fastpodptr->nelements; // number of elements 
+  onebody = fastpodptr->onebody;   // one-body descriptors
+  besseldegree = fastpodptr->besseldegree; // degree of Bessel functions
+  inversedegree = fastpodptr->inversedegree; // degree of inverse functions
+  nbesselpars = fastpodptr->nbesselpars;  // number of Bessel parameters
+  nCoeffPerElement = fastpodptr->nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters)
+  ns = fastpodptr->ns;      // number of snapshots for radial basis functions
+  nl1 = fastpodptr->nl1;  // number of one-body descriptors
+  nl2 = fastpodptr->nl2;  // number of two-body descriptors
+  nl3 = fastpodptr->nl3;  // number of three-body descriptors
+  nl4 = fastpodptr->nl4;  // number of four-body descriptors
+  nl23 = fastpodptr->nl23; // number of two-body x three-body descriptors
+  nl33 = fastpodptr->nl33; // number of three-body x three-body descriptors
+  nl34 = fastpodptr->nl34; // number of three-body x four-body descriptors
+  nl44 = fastpodptr->nl44; // number of four-body x four-body descriptors
+  n23 = fastpodptr->n23;
+  n32 = fastpodptr->n32;
+  nl = fastpodptr->nl;   // number of local descriptors
+  nrbf2 = fastpodptr->nrbf2;
+  nrbf3 = fastpodptr->nrbf3;
+  nrbf4 = fastpodptr->nrbf4;
+  nrbfmax = fastpodptr->nrbfmax; // number of radial basis functions
+  nabf3 = fastpodptr->nabf3;     // number of three-body angular basis functions
+  nabf4 = fastpodptr->nabf4;     // number of four-body angular basis functions  
+  K3 = fastpodptr->K3;           // number of three-body monomials
+  K4 = fastpodptr->K4;           // number of four-body monomials
+  Q4 = fastpodptr->Q4;           // number of four-body monomial coefficients
+  nClusters = fastpodptr->nClusters; // number of environment clusters
+  nComponents = fastpodptr->nComponents; // number of principal components
+  Mdesc = fastpodptr->Mdesc; // number of base descriptors 
+
+  rin = fastpodptr->rin;
+  rcut = fastpodptr->rcut;
+  rmax = rcut - rin;
+  besselparams[0] = fastpodptr->besselparams[0];
+  besselparams[1] = fastpodptr->besselparams[1];
+  besselparams[2] = fastpodptr->besselparams[2];
+  
+  memory->create(abftm, 4*K3, "abftm");
+  memory->create(elemindex, nelements*nelements, "elemindex");
+  for (int i=0; i<nelements*nelements; i++) elemindex[i] = fastpodptr->elemindex[i];
+
+  memory->create(Phi, ns * ns, "pair_pod:Phi");
+  for (int i=0; i<ns*ns; i++)    
+      Phi[i] = fastpodptr->Phi[i];
+
+  memory->create(coefficients, nCoeffPerElement * nelements, "pair_pod:coefficients");
+  for (int i=0; i<nCoeffPerElement * nelements; i++)
+      coefficients[i] = fastpodptr->coeff[i];
+
+  if (nClusters > 1) {
+    memory->create(Proj, Mdesc * nComponents * nelements, "pair_pod:Proj");
+    for (int i=0; i<Mdesc * nComponents * nelements; i++)
+        Proj[i] = fastpodptr->Proj[i];
+
+    memory->create(Centroids, nClusters * nComponents * nelements, "pair_pod:Centroids");
+    for (int i=0; i<nClusters * nComponents * nelements; i++)
+        Centroids[i] = fastpodptr->Centroids[i];
+  }
+  
+  memory->create(pn3, nabf3+1, "pn3"); // array stores the number of monomials for each degree
+  memory->create(pq3, K3*2, "pq3"); // array needed for the recursive computation of the angular basis functions
+  memory->create(pc3, K3, "pc3");   // array needed for the computation of the three-body descriptors
+  memory->create(pa4, nabf4+1, "pa4"); // this array is a subset of the array {0, 1, 4, 10, 19, 29, 47, 74, 89, 119, 155, 209, 230, 275, 335, 425, 533, 561, 624, 714, 849, 949, 1129, 1345}
+  memory->create(pb4, Q4*3, "pb4"); // array stores the indices of the monomials needed for the computation of the angular basis functions
+  memory->create(pc4, Q4, "pc4");   // array of monomial coefficients needed for the computation of the four-body descriptors
+  for (int i=0; i<nabf3+1; i++) pn3[i] = fastpodptr->pn3[i];
+  for (int i=0; i<K3; i++) pc3[i] = fastpodptr->pc3[i];
+  for (int i=0; i<K3*2; i++) pq3[i] = fastpodptr->pq3[i];
+  for (int i=0; i<nabf4+1; i++) pa4[i] = fastpodptr->pa4[i];
+  for (int i=0; i<Q4*3; i++) pb4[i] = fastpodptr->pb4[i];
+  for (int i=0; i<Q4; i++) pc4[i] = fastpodptr->pc4[i];  
+  
+  memory->create(ind23, n23, "pair_pod:ind23");
+  memory->create(ind32, n32, "pair_pod:ind32");
+  memory->create(ind33l, nl33, "pair_pod:ind33l");
+  memory->create(ind33r, nl33, "pair_pod:ind33r");
+  memory->create(ind34l, nl34, "pair_pod:ind34l");
+  memory->create(ind34r, nl34, "pair_pod:ind34r");
+  memory->create(ind44l, nl44, "pair_pod:ind44l");
+  memory->create(ind44r, nl44, "pair_pod:ind44r");
+  for (int i=0; i<n23; i++) ind23[i] = fastpodptr->ind23[i];
+  for (int i=0; i<n32; i++) ind32[i] = fastpodptr->ind32[i];
+  for (int i=0; i<nl33; i++) ind33l[i] = fastpodptr->ind33l[i];
+  for (int i=0; i<nl33; i++) ind33r[i] = fastpodptr->ind33r[i];
+  for (int i=0; i<nl34; i++) ind34l[i] = fastpodptr->ind34l[i];
+  for (int i=0; i<nl34; i++) ind34r[i] = fastpodptr->ind34r[i];
+  for (int i=0; i<nl44; i++) ind44l[i] = fastpodptr->ind44l[i];
+  for (int i=0; i<nl44; i++) ind44r[i] = fastpodptr->ind44r[i];  
 }
 
-void PairPOD::estimate_tempmemory()
+void PairPOD::grow_atoms(int Ni)
 {
-  int nrbf2 = podptr->pod.nbf2;
-  int nabf3 = podptr->pod.nabf3;
-  int nrbf3 = podptr->pod.nrbf3;
-  int ns2 = podptr->pod.ns2;
-  int ns3 = podptr->pod.ns3;
+  if (Ni > nimax) {
+    memory->destroy(ei);
+    memory->destroy(typeai);
+    memory->destroy(numij);
+    memory->destroy(sumU);
+    memory->destroy(bd);
+    memory->destroy(pd);
+    nimax = Ni;
+    memory->create(ei, nimax, "pair_pod:ei");
+    memory->create(typeai, nimax, "pair_pod:typeai");
+    memory->create(numij, nimax+1, "pair_pod:typeai");
+    int n = nimax * nelements * K3 * nrbfmax;
+    if (nClusters>1) n = (n > nimax*Mdesc) ? n : nimax*Mdesc;
+    memory->create(sumU, n , "pair_pod:sumU");
+    memory->create(bd, nimax * Mdesc, "pair_pod:bd");
+    memory->create(pd, nimax * nClusters, "pair_pod:pd");    
+    
+    for (int i=0; i<=nimax; i++) numij[i] = 0;
+  }
+}
 
-  szd = dim * nijmax + (1 + dim) * nijmax * MAX(nrbf2 + ns2, nrbf3 + ns3) + (nabf3 + 1) * 7;
-  int szsnap = 0;
-  if (podptr->sna.twojmax > 0) {
-    szsnap += nijmax * dim;
-    szsnap += MAX(2 * podptr->sna.idxu_max * nijmax,
-                  2 * podptr->sna.idxz_max * podptr->sna.ndoubles *
-                      nablockmax);                        // (Ur, Ui) and (Zr, Zi)
-    szsnap += 2 * podptr->sna.idxu_max * dim * nijmax;    // dUr, dUi
-    szsnap += MAX(podptr->sna.idxb_max * podptr->sna.ntriples * dim * nijmax,
-                  2 * podptr->sna.idxu_max * podptr->sna.nelements *
-                      nablockmax);    // dblist and (Utotr, Utoti)
+void PairPOD::grow_pairs(int Nij)
+{
+  if (Nij > nijmax) {
+    memory->destroy(rij);
+    memory->destroy(fij);
+    memory->destroy(idxi);
+    memory->destroy(ai);
+    memory->destroy(aj);
+    memory->destroy(ti);
+    memory->destroy(tj);
+    memory->destroy(rbf);
+    memory->destroy(rbfx);
+    memory->destroy(rbfy);
+    memory->destroy(rbfz);
+    memory->destroy(abf);
+    memory->destroy(abfx);
+    memory->destroy(abfy);
+    memory->destroy(abfz);    
+    memory->destroy(bdd);
+    memory->destroy(pdd);        
+    nijmax = Nij;
+    memory->create(rij, 3 * nijmax,  "pair_pod:r_ij");
+    memory->create(fij, 3 * nijmax,  "pair_pod:f_ij");  
+    memory->create(idxi, nijmax, "pair_pod:idxi");
+    memory->create(ai, nijmax, "pair_pod:ai");
+    memory->create(aj, nijmax, "pair_pod:aj");
+    memory->create(ti, nijmax, "pair_pod:ti");
+    memory->create(tj, nijmax, "pair_pod:tj");
+    memory->create(rbf, nijmax * nrbfmax, "pair_pod:rbf");
+    memory->create(rbfx, nijmax * nrbfmax, "pair_pod:rbfx");
+    memory->create(rbfy, nijmax * nrbfmax, "pair_pod:rbfy");
+    memory->create(rbfz, nijmax * nrbfmax, "pair_pod:rbfz");
+    int kmax = (K3 > ns) ? K3 : ns;
+    memory->create(abf, nijmax * kmax, "pair_pod:abf");
+    memory->create(abfx, nijmax * kmax, "pair_pod:abfx");
+    memory->create(abfy, nijmax * kmax, "pair_pod:abfy");
+    memory->create(abfz, nijmax * kmax, "pair_pod:abfz");  
+    memory->create(bdd, 3 * nijmax  * Mdesc, "pair_pod:bdd");
+    memory->create(pdd, 3 * nijmax * nClusters, "pair_pod:pdd");      
   }
+}
 
-  szd = MAX(szsnap, szd);
-  szd = nablockmax * (podptr->pod.nd1234) + szd;
+void PairPOD::divideInterval(int *intervals, int N, int M) 
+{
+  int intervalSize = N / M; // Basic size of each interval
+  int remainder = N % M;    // Remainder to distribute
+  intervals[0] = 1;         // Start of the first interval
+  for (int i = 1; i <= M; i++) {
+    intervals[i] = intervals[i - 1] + intervalSize + (remainder > 0 ? 1 : 0);
+    if (remainder > 0) {
+      remainder--;
+    }
+  }  
 }
 
-void PairPOD::lammpsNeighPairs(double **x, int **firstneigh, int *atomtypes, int *map,
-                               int *numneigh, int gi)
+int PairPOD::calculateNumberOfIntervals(int N, int intervalSize) 
 {
+  if (intervalSize <= 0) {
+    printf("Interval size must be a positive integer.\n");
+    return -1;
+  }
 
-  double rcutsq = podptr->pod.rcut * podptr->pod.rcut;
+  int M = N / intervalSize;
+  if (N % intervalSize != 0) {
+    M++; // Add an additional interval to cover the remainder
+  }
 
-  nij = 0;
-  int itype = map[atomtypes[gi]] + 1;
-  int m = numneigh[gi];
-  typeai[0] = itype;
-  for (int l = 0; l < m; l++) {           // loop over each atom around atom i
-    int gj = firstneigh[gi][l];           // atom j
-    double delx = x[gj][0] - x[gi][0];    // xj - xi
-    double dely = x[gj][1] - x[gi][1];    // xj - xi
-    double delz = x[gj][2] - x[gi][2];    // xj - xi
-    double rsq = delx * delx + dely * dely + delz * delz;
-    if (rsq < rcutsq && rsq > 1e-20) {
-      rij[nij * 3 + 0] = delx;
-      rij[nij * 3 + 1] = dely;
-      rij[nij * 3 + 2] = delz;
-      idxi[nij] = 0;
-      ai[nij] = gi;
-      aj[nij] = gj;
-      ti[nij] = itype;
-      tj[nij] = map[atomtypes[gj]] + 1;
-      nij++;
+  return M;
+}
+
+void PairPOD::radialbasis(double *rbft, double *rbftx, double *rbfty, double *rbftz, double *rij, int Nij)
+{
+  // Loop over all neighboring atoms
+  for (int n=0; n<Nij; n++) {
+    double xij1 = rij[0+3*n];
+    double xij2 = rij[1+3*n];
+    double xij3 = rij[2+3*n];
+
+    double dij = sqrt(xij1*xij1 + xij2*xij2 + xij3*xij3);
+    double dr1 = xij1/dij;
+    double dr2 = xij2/dij;
+    double dr3 = xij3/dij;
+
+    double r = dij - rin;
+    double y = r/rmax;
+    double y2 = y*y;
+
+    double y3 = 1.0 - y2*y;
+    double y4 = y3*y3 + 1e-6;
+    double y5 = sqrt(y4);
+    double y6 = exp(-1.0/y5);
+    double y7 = y4*sqrt(y4);
+
+    // Calculate the final cutoff function as y6/exp(-1)
+    double fcut = y6/exp(-1.0);
+
+    // Calculate the derivative of the final cutoff function
+    double dfcut = ((3.0/(rmax*exp(-1.0)))*(y2)*y6*(y*y2 - 1.0))/y7;
+
+    // Calculate fcut/r, fcut/r^2, and dfcut/r
+    double f1 = fcut/r;
+    double f2 = f1/r;
+    double df1 = dfcut/r;
+
+    double alpha = besselparams[0];
+    double t1 = (1.0-exp(-alpha));
+    double t2 = exp(-alpha*r/rmax);
+    double x0 =  (1.0 - t2)/t1;
+    double dx0 = (alpha/rmax)*t2/t1;
+
+    alpha = besselparams[1];
+    t1 = (1.0-exp(-alpha));
+    t2 = exp(-alpha*r/rmax);
+    double x1 =  (1.0 - t2)/t1;
+    double dx1 = (alpha/rmax)*t2/t1;
+
+    alpha = besselparams[2];
+    t1 = (1.0-exp(-alpha));
+    t2 = exp(-alpha*r/rmax);
+    double x2 =  (1.0 - t2)/t1;
+    double dx2 = (alpha/rmax)*t2/t1;
+    for (int i=0; i<besseldegree; i++) {
+      double a = (i+1)*MY_PI;
+      double b = (sqrt(2.0/(rmax))/(i+1));
+      double af1 = a*f1;
+
+      double sinax = sin(a*x0);
+      //int idxni = n + Nij*i;
+      int idxni = i + ns*n;
+
+      rbft[idxni] = b*f1*sinax;
+      double drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x0)*dx0);
+      rbftx[idxni] = drbftdr*dr1;
+      rbfty[idxni] = drbftdr*dr2;
+      rbftz[idxni] = drbftdr*dr3;
+
+      sinax = sin(a*x1);
+      //idxni = n + Nij*i + Nij*besseldegree*1;
+      idxni = i + besseldegree + ns*n;
+
+      rbft[idxni] = b*f1*sinax;
+      drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x1)*dx1);
+      rbftx[idxni] = drbftdr*dr1;
+      rbfty[idxni] = drbftdr*dr2;
+      rbftz[idxni] = drbftdr*dr3;
+
+      sinax = sin(a*x2);
+      //idxni = n + Nij*i + Nij*besseldegree*2;
+      idxni = i + besseldegree*2 + ns*n;
+      rbft[idxni] = b*f1*sinax;
+      drbftdr = b*(df1*sinax - f2*sinax + af1*cos(a*x2)*dx2);
+      rbftx[idxni] = drbftdr*dr1;
+      rbfty[idxni] = drbftdr*dr2;
+      rbftz[idxni] = drbftdr*dr3;
+    }
+  
+    // Calculate fcut/dij and dfcut/dij
+    f1 = fcut/dij;
+    for (int i=0; i<inversedegree; i++) {
+      int p = besseldegree*nbesselpars + i;
+      //int idxni = n + Nij*p;      
+      int idxni = p + ns*n;
+      double a = powint(dij, i+1);
+
+      rbft[idxni] = fcut/a;
+
+      double drbftdr = (dfcut - (i+1.0)*f1)/a;
+      rbftx[idxni] = drbftdr*dr1;
+      rbfty[idxni] = drbftdr*dr2;
+      rbftz[idxni] = drbftdr*dr3;
+    }
+  }
+}
+
+void matrixMultiply(double *Phi, double *rbft, double *rbf, int nrbfmax, int ns, int Nij) 
+{
+  for (int idx=0; idx<nrbfmax*Nij; idx++)  {
+    int j = idx / nrbfmax;  // pair index index
+    int i = idx % nrbfmax;  // basis function index
+    double sum = 0.0;
+    for (int k = 0; k < ns; ++k) {
+        sum += rbft[k + ns*j] * Phi[k + ns*i];  // Manually calculate the 1D index
+    }
+    rbf[i + nrbfmax*j] = sum;  // Manually calculate the 1D index for c
+  }        
+}
+
+void PairPOD::orthogonalradialbasis(int Nij)
+{
+  radialbasis(abf, abfx, abfy, abfz, rij, Nij);
+  matrixMultiply(Phi, abf, rbf, nrbfmax, ns,  Nij); 
+  matrixMultiply(Phi, abfx, rbfx, nrbfmax, ns,  Nij); 
+  matrixMultiply(Phi, abfy, rbfy, nrbfmax, ns,  Nij); 
+  matrixMultiply(Phi, abfz, rbfz, nrbfmax, ns,  Nij); 
+}
+
+void PairPOD::angularbasis(double *tm, double *tmu, double *tmv, double *tmw, int N)
+{  
+  // Initialize first angular basis function and its derivatives
+  tm[0] = 1.0;
+  tmu[0] = 0.0;
+  tmv[0] = 0.0;
+  tmw[0] = 0.0;
+
+  // Loop over all neighboring atoms
+  for (int j=0; j<N; j++) {
+    // Calculate relative positions of neighboring atoms and atom i
+    double x = rij[0+3*j];
+    double y = rij[1+3*j];
+    double z = rij[2+3*j];
+
+    // Calculate various terms for derivatives
+    double xx = x*x;
+    double yy = y*y;
+    double zz = z*z;
+    double xy = x*y;
+    double xz = x*z;
+    double yz = y*z;
+
+    // Calculate distance between neighboring atoms and unit vectors
+    double dij = sqrt(xx + yy + zz);
+    double u = x/dij;
+    double v = y/dij;
+    double w = z/dij;
+
+    // Calculate derivatives of unit vectors
+    double dij3 = dij*dij*dij;
+    double dudx = (yy+zz)/dij3;
+    double dudy = -xy/dij3;
+    double dudz = -xz/dij3;
+
+    double dvdx = -xy/dij3;
+    double dvdy = (xx+zz)/dij3;
+    double dvdz = -yz/dij3;
+
+    double dwdx = -xz/dij3;
+    double dwdy = -yz/dij3;
+    double dwdz = (xx+yy)/dij3;
+
+    int idxa = 0 + K3*j;
+    abf[idxa] = 1.0;
+    abfx[idxa] = 0.0;
+    abfy[idxa] = 0.0;
+    abfz[idxa] = 0.0;
+
+    // Loop over all angular basis functions
+    for (int n=1; n<K3; n++) {
+      // Get indices for angular basis function
+      int m = pq3[n]-1;
+      int d = pq3[n + K3];
+      int mj = m + K3*j;
+      double tmm = abf[mj]; 
+      double tmum = abfx[mj];
+      double tmvm = abfy[mj];
+      double tmwm = abfz[mj];
+
+      double tmn, tmun, tmvn, tmwn;      
+      // Calculate angular basis function and its derivatives using recursion relation
+      if (d==1) {
+        tmn = tmm*u;
+        tmun = tmum*u + tmm;
+        tmvn = tmvm*u;
+        tmwn = tmwm*u;
+      }
+      else if (d==2) {
+        tmn = tmm*v;
+        tmun = tmum*v;
+        tmvn = tmvm*v + tmm;
+        tmwn = tmwm*v;
+      }
+      else if (d==3) {
+        tmn = tmm*w;
+        tmun = tmum*w;
+        tmvn = tmvm*w;
+        tmwn = tmwm*w + tmm;
+      }
+      idxa = n + K3*j;
+      abf[idxa] = tmn;
+      abfx[idxa] = tmun;
+      abfy[idxa] = tmvn;
+      abfz[idxa] = tmwn;
+    }
+    
+    for (int n=1; n<K3; n++) {
+      double tmun, tmvn, tmwn;    
+      idxa = n + K3*j;
+      tmun = abfx[idxa];
+      tmvn = abfy[idxa];
+      tmwn = abfz[idxa];
+      abfx[idxa] = tmun*dudx + tmvn*dvdx + tmwn*dwdx;
+      abfy[idxa] = tmun*dudy + tmvn*dvdy + tmwn*dwdy;
+      abfz[idxa] = tmun*dudz + tmvn*dvdz + tmwn*dwdz;      
+    }
+  }
+}
+
+void PairPOD::radialangularsum(int Ni, int Nij)
+{
+  // Initialize sumU to zero
+  std::fill(sumU, sumU + Ni * nelements * K3 * nrbf3, 0.0);
+
+  int totalIterations = nrbf3 * K3 * Nij;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int k = idx % K3;
+    int temp = idx / K3;
+    int m = temp % nrbf3;
+    int n = temp / nrbf3;
+    int ia = k + K3 * n;
+    int ib = m + nrbfmax * n;      
+
+      // Update sumU with atomtype adjustment
+    int tn = tj[n] - 1; // offset the atom type by 1, since atomtype is 1-based
+    sumU[tn + nelements*k + nelements*K3*m + nelements*K3*nrbf3*idxi[n]] += rbf[ib] * abf[ia];
+  }
+}
+
+void PairPOD::radialangularsum2(int Ni, int Nij)
+{
+  // Initialize sumU to zero
+  std::fill(sumU, sumU + Ni * nelements * K3 * nrbf3, 0.0);
+
+  int totalIterations = nrbf3 * K3 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int k = idx % K3;     // K3
+    int temp = idx / K3;
+    int m = temp % nrbf3; // nrbf3
+    int i = temp / nrbf3; // Ni
+    int kmi = nelements*k + nelements*K3*m + nelements*K3*nrbf3*i;
+
+    int start = numij[i];
+    int nj = numij[i+1]-start;
+    double sum[nelements];
+    for (int j=0; j<nj; j++) {
+      int n = start + j;
+      int ia = k + K3 * n;
+      int ib = m + nrbfmax * n;            
+      int tn = tj[n] - 1; // offset the atom type by 1, since atomtype is 1-based
+      sum[tn] += rbf[ib] * abf[ia];    
+    }        
+    for (int e=0; e<nelements; e++) sumU[e + kmi] = sum[e];
+  }
+}
+
+void PairPOD::twobodydescderiv(double *d2, double *dd2, int Ni, int Nij)
+{
+  // Calculate the two-body descriptors and their derivatives
+  int totalIterations = nrbf2 * Nij;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int n = idx / nrbf2; // Recalculate m
+    int m = idx % nrbf2; // Recalculate n
+
+    int i2 = m + nrbfmax * n; // Index of the radial basis function for atom n and RBF m
+    int i1 = 3*(n + Nij * m + Nij * nrbf2 * (tj[n] - 1)); // Index of the descriptor for atom n, RBF m, and atom type tj[n]
+    d2[idxi[n] + Ni * (m + nrbf2 * (tj[n] - 1))] += rbf[i2]; // Add the radial basis function to the corresponding descriptor
+    dd2[0 + i1] = rbfx[i2]; // Add the derivative with respect to x to the corresponding descriptor derivative
+    dd2[1 + i1] = rbfy[i2]; // Add the derivative with respect to y to the corresponding descriptor derivative
+    dd2[2 + i1] = rbfz[i2]; // Add the derivative with respect to z to the corresponding descriptor derivative
+  }
+}
+
+void PairPOD::threebodydesc(double *d3, int Ni)
+{
+  int totalIterations = nrbf3 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int m = idx % nrbf3;
+    int i = idx / nrbf3;    
+    for (int p = 0; p < nabf3; p++) {   
+      int n1 = pn3[p];
+      int n2 = pn3[p + 1];
+      int nn = n2 - n1;
+      for (int q = 0; q < nn; q++) {
+        int k = 0;
+        for (int i1 = 0; i1 < nelements; i1++) {
+          double t1 = pc3[n1 + q] * sumU[i1 + nelements * (n1 + q) + nelements * K3 * m + nelements * K3 * nrbf3*i];
+          for (int i2 = i1; i2 < nelements; i2++) {
+            d3[i + Ni * (p + nabf3 * m + nabf3 * nrbf3 * k)] += t1 * sumU[i2 + nelements * (n1 + q) + nelements * K3 * m + nelements * K3 * nrbf3*i];
+            k += 1;
+          }
+        }
+      }
+     }
+  }
+}
+
+void PairPOD::threebodydescderiv(double *dd3, int Ni, int Nij)
+{
+  int totalIterations = nrbf3 * Nij;
+  if (nelements==1) {
+    for (int idx = 0; idx < totalIterations; ++idx) {
+      int j = idx / nrbf3;       // Calculate j using integer division
+      int m = idx % nrbf3;       // Calculate m using modulo operation
+      int idxR = m + nrbfmax * j;  // Pre-compute the index for rbf
+      double rbfBase = rbf[idxR];
+      double rbfxBase = rbfx[idxR];
+      double rbfyBase = rbfy[idxR];
+      double rbfzBase = rbfz[idxR];
+
+      for (int p = 0; p < nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p + 1];
+        int nn = n2 - n1;
+        int baseIdx = 3 * j + 3 * Nij * (p + nabf3 * m);  // Pre-compute the base index for dd3
+        int idxU = K3 * m + K3*nrbf3*idxi[j];
+        double tmp1 = 0;
+        double tmp2 = 0;
+        double tmp3 = 0;
+        for (int q = 0; q < nn; q++) {                  
+          int idxNQ = n1 + q;  // Combine n1 and q into a single index for pc3 and sumU
+          double t1 = pc3[idxNQ] * sumU[idxNQ + idxU];
+          double f = 2.0 * t1;          
+          int idxA = idxNQ + K3 * j;  // Pre-compute the index for abf          
+          double abfA = abf[idxA];  
+
+          // Use the pre-computed indices to update dd3
+          tmp1 += f * (abfx[idxA] * rbfBase + rbfxBase * abfA);
+          tmp2 += f * (abfy[idxA] * rbfBase + rbfyBase * abfA);
+          tmp3 += f * (abfz[idxA] * rbfBase + rbfzBase * abfA);          
+        }
+        dd3[baseIdx]     = tmp1;
+        dd3[baseIdx + 1] = tmp2;
+        dd3[baseIdx + 2] = tmp3;                  
+      }
+    }
+  }
+  else {
+    int N3 = 3 * Nij *  nabf3 * nrbf3;
+    for (int idx = 0; idx < totalIterations; ++idx) {
+      int j = idx / nrbf3;  // Derive the original j value
+      int m = idx % nrbf3;  // Derive the original m value
+      int idxR = m + nrbfmax * j;  // Pre-compute the index for rbf
+      double rbfBase = rbf[idxR];
+      double rbfxBase = rbfx[idxR];
+      double rbfyBase = rbfy[idxR];
+      double rbfzBase = rbfz[idxR];
+      for (int p = 0; p < nabf3; p++) {
+        int n1 = pn3[p];
+        int n2 = pn3[p + 1];
+        int nn = n2 - n1;
+        int jmp = 3 * j + 3 * Nij * (p + nabf3 * m);
+        for (int q = 0; q < nn; q++) {
+          int idxNQ = n1 + q;  // Combine n1 and q into a single index
+          int idxU = nelements * idxNQ + nelements * K3 * m + nelements*K3*nrbf3*idxi[j];
+          int idxA = idxNQ + K3 * j;  // Pre-compute the index for abf          
+          double abfA = abf[idxA];   
+          double abfxA = abfx[idxA];
+          double abfyA = abfy[idxA];
+          double abfzA = abfz[idxA];
+          for (int i1 = 0; i1 < nelements; i1++) {
+            double t1 = pc3[idxNQ] * sumU[i1 + idxU];
+            int i2 = tj[j] - 1;
+            int k = elemindex[i2 + nelements * i1];
+            double f = (i1 == i2) ? 2.0 * t1 : t1;
+            int ii = jmp + N3 * k;                     
+
+            // Update dd3
+            dd3[0 + ii] += f * (abfxA * rbfBase + rbfxBase * abfA);
+            dd3[1 + ii] += f * (abfyA * rbfBase + rbfyBase * abfA);
+            dd3[2 + ii] += f * (abfzA * rbfBase + rbfzBase * abfA);          
+          }
+        }
+      }
+    }
+  }
+}
+
+void PairPOD::fourbodydesc(double *d4, int Ni)
+{
+  int totalIterations = nrbf4 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int m = idx % nrbf4;
+    int i = idx / nrbf4;    
+    int idxU = nelements * K3 * m + nelements * K3 * nrbf3 * i;
+    for (int p = 0; p < nabf4; p++) {
+      int n1 = pa4[p];
+      int n2 = pa4[p + 1];
+      int nn = n2 - n1;
+      for (int q = 0; q < nn; q++) {
+        int c = pc4[n1 + q];
+        int j1 = pb4[n1 + q];
+        int j2 = pb4[n1 + q + Q4];
+        int j3 = pb4[n1 + q + 2 * Q4];
+        int k = 0;
+        for (int i1 = 0; i1 < nelements; i1++) {
+          double c1 =  sumU[idxU + i1 + nelements * j1];
+          for (int i2 = i1; i2 < nelements; i2++) {
+            double c2 = sumU[idxU + i2 + nelements * j2];
+            double t12 = c * c1 * c2;
+            for (int i3 = i2; i3 < nelements; i3++) {
+              double c3 = sumU[idxU + i3 + nelements * j3];
+              int kk = p + nabf4 * m + nabf4 * nrbf4 * k;
+              d4[i + Ni * kk] += t12 * c3;              
+              k += 1;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void PairPOD::fourbodydescderiv(double *dd4, int Ni, int Nij)
+{
+  if (nelements==1) {
+    for (int idx = 0; idx < Nij * nrbf4; ++idx) {
+      int j = idx / nrbf4;  // Derive the original j value
+      int m = idx % nrbf4;  // Derive the original m value
+      int idxU = K3 * m + K3*nrbf3*idxi[j];
+      int baseIdxJ = m + nrbfmax * j;  // Pre-compute the index for rbf
+      double rbfBase = rbf[baseIdxJ];
+      double rbfxBase = rbfx[baseIdxJ];
+      double rbfyBase = rbfy[baseIdxJ];
+      double rbfzBase = rbfz[baseIdxJ];
+
+      for (int p = 0; p < nabf4; p++) {
+        int n1 = pa4[p];
+        int n2 = pa4[p + 1];
+        int nn = n2 - n1;
+        int kk = p + nabf4 * m;
+        int ii = 3 * Nij * kk;
+        int baseIdx = 3 * j + ii;
+        double tmp1 = 0;
+        double tmp2 = 0;
+        double tmp3 = 0;
+        for (int q = 0; q < nn; q++) {
+          int idxNQ = n1 + q;  // Combine n1 and q into a single index
+          int c = pc4[idxNQ];
+          int j1 = pb4[idxNQ];
+          int j2 = pb4[idxNQ + Q4];
+          int j3 = pb4[idxNQ + 2 * Q4];
+          double c1 = sumU[idxU + j1];
+          double c2 = sumU[idxU + j2];
+          double c3 = sumU[idxU + j3];          
+          double t12 = c * c1 * c2;          
+          double t13 = c * c1 * c3;
+          double t23 = c * c2 * c3;
+          
+          // Pre-calculate commonly used indices          
+          int baseIdxJ3 = j3 + K3 * j; // Common index for j3 terms
+          int baseIdxJ2 = j2 + K3 * j; // Common index for j2 terms
+          int baseIdxJ1 = j1 + K3 * j; // Common index for j1 terms
+
+          // Temporary variables to store repeated calculations
+          double abfBaseJ1 = abf[baseIdxJ1];
+          double abfBaseJ2 = abf[baseIdxJ2];
+          double abfBaseJ3 = abf[baseIdxJ3];
+
+          // Update dd4 using pre-computed indices
+          tmp1 += t12 * (abfx[baseIdxJ3] * rbfBase + rbfxBase * abfBaseJ3)
+                            + t13 * (abfx[baseIdxJ2] * rbfBase + rbfxBase * abfBaseJ2)
+                            + t23 * (abfx[baseIdxJ1] * rbfBase + rbfxBase * abfBaseJ1);
+          tmp2 += t12 * (abfy[baseIdxJ3] * rbfBase + rbfyBase * abfBaseJ3)
+                            + t13 * (abfy[baseIdxJ2] * rbfBase + rbfyBase * abfBaseJ2)
+                            + t23 * (abfy[baseIdxJ1] * rbfBase + rbfyBase * abfBaseJ1);
+          tmp3 += t12 * (abfz[baseIdxJ3] * rbfBase + rbfzBase * abfBaseJ3)
+                            + t13 * (abfz[baseIdxJ2] * rbfBase + rbfzBase * abfBaseJ2)
+                            + t23 * (abfz[baseIdxJ1] * rbfBase + rbfzBase * abfBaseJ1);
+        }
+        dd4[baseIdx]     = tmp1;
+        dd4[baseIdx + 1] = tmp2;
+        dd4[baseIdx + 2] = tmp3;                          
+      }
+    }
+  }
+  else {        
+    int N3 = 3*Nij * nabf4 * nrbf4;
+    int totalIterations = nrbf4 * Nij;
+    for (int idx = 0; idx < totalIterations; idx++) {
+      int j = idx / nrbf4;  // Derive the original j value
+      int m = idx % nrbf4;  // Derive the original m value
+    
+      int idxM = m + nrbfmax * j;
+      // Temporary variables to store frequently used products
+      double rbfM = rbf[idxM];
+      double rbfxM = rbfx[idxM];
+      double rbfyM = rbfy[idxM];
+      double rbfzM = rbfz[idxM];
+      int typej = tj[j] - 1;
+
+      for (int p = 0; p < nabf4; p++)  {
+        int n1 = pa4[p];
+        int n2 = pa4[p + 1];
+        int nn = n2 - n1;
+        int jpm = 3 * j + 3 * Nij * (p + nabf4 * m);
+
+        for (int q = 0; q < nn; q++) {
+          int c = pc4[n1 + q];
+          int j1 = pb4[n1 + q];
+          int j2 = pb4[n1 + q + Q4];
+          int j3 = pb4[n1 + q + 2 * Q4];
+          // Pre-calculate commonly used indices for j3, j2, j1, and m
+          int idxJ3 = j3 + K3 * j;
+          int idxJ2 = j2 + K3 * j;
+          int idxJ1 = j1 + K3 * j;          
+          int idx1 = nelements * j1 + nelements * K3 * m + nelements * K3 * nrbf3 * idxi[j];
+          int idx2 = nelements * j2 + nelements * K3 * m + nelements * K3 * nrbf3 * idxi[j];
+          int idx3 = nelements * j3 + nelements * K3 * m + nelements * K3 * nrbf3 * idxi[j];
+
+          // Temporary variables to store repeated calculations
+          double abfJ1 = abf[idxJ1];
+          double abfJ2 = abf[idxJ2];
+          double abfJ3 = abf[idxJ3];
+          double abfxJ1 = abfx[idxJ1];
+          double abfxJ2 = abfx[idxJ2];
+          double abfxJ3 = abfx[idxJ3];
+          double abfyJ1 = abfy[idxJ1];
+          double abfyJ2 = abfy[idxJ2];
+          double abfyJ3 = abfy[idxJ3];
+          double abfzJ1 = abfz[idxJ1];
+          double abfzJ2 = abfz[idxJ2];
+          double abfzJ3 = abfz[idxJ3];
+
+          int k = 0;          
+          for (int i1 = 0; i1 < nelements; i1++) {            
+            double c1 = sumU[idx1 + i1];
+            for (int i2 = i1; i2 < nelements; i2++) {
+              double c2 = sumU[idx2 + i2];
+              double t12 = c*(c1 * c2);  
+              for (int i3 = i2; i3 < nelements; i3++) {                                                
+                double c3 = sumU[idx3 + i3];                
+                double t13 = c*(c1 * c3);
+                double t23 = c*(c2 * c3);
+                int baseIdx = jpm + N3 * k;
+                
+                // Compute contributions for each condition
+                if (typej == i3) {
+                    dd4[0 + baseIdx] += t12 * (abfxJ3 * rbfM + rbfxM * abfJ3);
+                    dd4[1 + baseIdx] += t12 * (abfyJ3 * rbfM + rbfyM * abfJ3);
+                    dd4[2 + baseIdx] += t12 * (abfzJ3 * rbfM + rbfzM * abfJ3);
+                }
+                if (typej == i2) {
+                    dd4[0 + baseIdx] += t13 * (abfxJ2 * rbfM + rbfxM * abfJ2);
+                    dd4[1 + baseIdx] += t13 * (abfyJ2 * rbfM + rbfyM * abfJ2);
+                    dd4[2 + baseIdx] += t13 * (abfzJ2 * rbfM + rbfzM * abfJ2);
+                }
+                if (typej == i1) {
+                    dd4[0 + baseIdx] += t23 * (abfxJ1 * rbfM + rbfxM * abfJ1);
+                    dd4[1 + baseIdx] += t23 * (abfyJ1 * rbfM + rbfyM * abfJ1);
+                    dd4[2 + baseIdx] += t23 * (abfzJ1 * rbfM + rbfzM * abfJ1);
+                }
+                k += 1;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void PairPOD::fourbodydesc23(double *d23, double *d2, double *d3, int Ni)
+{
+  int totalIterations = n32 * n23 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int n = idx % Ni;
+    int temp = idx / Ni;
+    int i = temp % n23;
+    int j = temp / n23;
+
+    int indexDst = n + Ni * i + Ni * n23 * j;
+    int indexSrc2 = n + Ni * ind23[i];
+    int indexSrc3 = n + Ni * ind32[j];
+    d23[indexDst] = d2[indexSrc2] * d3[indexSrc3];
+  }
+}
+
+void PairPOD::fourbodydescderiv23(double* dd23, double *d2, double *d3, double *dd2, double *dd3, int *idxi, int Ni, int N)
+{
+  int totalIterations = n32 * n23 * N;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int n = idx % N;
+    int temp = idx / N;
+    int i = temp % n23;
+    int j = temp / n23;
+
+    int k = 3 * (n + N * i + N * n23 * j);        
+    int k1 = 3 * n + 3 * N * ind23[i];
+    int k2 = 3 * n + 3 * N * ind32[i];
+    int m1 = idxi[n] + Ni * ind23[i];
+    int m2 = idxi[n] + Ni * ind32[i];
+    dd23[0 + k] = d2[m1] * dd3[0 + k2] + dd2[0 + k1] * d3[m2];
+    dd23[1 + k] = d2[m1] * dd3[1 + k2] + dd2[1 + k1] * d3[m2];
+    dd23[2 + k] = d2[m1] * dd3[2 + k2] + dd2[2 + k1] * d3[m2];
+  }
+}
+
+void PairPOD::crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12, int Ni)
+{
+  int totalIterations = n12 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int n = idx % Ni;
+    int i = idx / Ni;
+
+    d12[n + Ni * i] = d1[n + Ni * ind1[i]] * d2[n + Ni * ind2[i]];
+  }
+}
+
+void PairPOD::crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int *idxi, int n12, int Ni, int Nij)
+{  
+  int totalIterations = n12 * Nij;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int n = idx % Nij;
+    int i = idx / Nij;
+
+    int k = 3 * n + 3 * Nij * i;
+    int k1 = 3 * n + 3 * Nij * ind1[i];
+    int k2 = 3 * n + 3 * Nij * ind2[i];
+    int m1 = idxi[n] + Ni * ind1[i];
+    int m2 = idxi[n] + Ni * ind2[i];
+
+    dd12[0 + k] = d1[m1] * dd2[0 + k2] + dd1[0 + k1] * d2[m2];
+    dd12[1 + k] = d1[m1] * dd2[1 + k2] + dd1[1 + k1] * d2[m2];
+    dd12[2 + k] = d1[m1] * dd2[2 + k2] + dd1[2 + k1] * d2[m2];
+  }
+}
+
+void PairPOD::crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, 
+        int n12, int nd1, int nd2, int Ni)
+{
+  int totalIterations = n12 * Ni;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int i = idx % n12;
+    int n = idx / n12;
+
+    d12[i + n12*n] = d1[ind1[i] + nd1*n] * d2[ind2[i] + nd2*n];
+  }
+}
+
+void PairPOD::crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int *idxi, int n12, int nd1, int nd2, int Ni, int Nij)
+{  
+  int totalIterations = n12 * Nij;
+  for (int idx = 0; idx < totalIterations; idx++) {
+    int i = idx % n12;
+    int n = idx / n12;
+    
+    int k = 3 * i + 3 * n12 * n;
+    int k1 = 3 * ind1[i] + 3 * nd1 * n;
+    int k2 = 3 * ind2[i] + 3 * nd2 * n;
+    int m1 = ind1[i] + nd1*idxi[n];
+    int m2 = ind2[i] + nd2*idxi[n];
+
+    dd12[0 + k] = d1[m1] * dd2[0 + k2] + dd1[0 + k1] * d2[m2];
+    dd12[1 + k] = d1[m1] * dd2[1 + k2] + dd1[1 + k1] * d2[m2];
+    dd12[2 + k] = d1[m1] * dd2[2 + k2] + dd1[2 + k1] * d2[m2];
+  }
+}
+
+void PairPOD::blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int Nij)
+{
+  for (int i=0; i<Ni*Mdesc; i++) bd1[i] = 0.0;
+  for (int i=0; i<3*Nij*Mdesc; i++) bdd1[i] = 0.0;
+
+  double *d2 =  &bd1[0]; // nl2
+  double *d3 =  &bd1[Ni*nl2]; // nl3
+  double *d4 =  &bd1[Ni*(nl2 + nl3)]; // nl4
+  double *d23 =  &bd1[Ni*(nl2 + nl3 + nl4)]; // nl23
+  double *d33 =  &bd1[Ni*(nl2 + nl3 + nl4 + nl23)]; // nl33
+  double *d34 =  &bd1[Ni*(nl2 + nl3 + nl4 + nl23 + nl33)]; // nl34
+  double *d44 =  &bd1[Ni*(nl2 + nl3 + nl4 + nl23 + nl33 + nl34)]; // nl44
+
+  double *dd2 = &bdd1[0]; // 3*Nj*nl2  
+  double *dd3 = &bdd1[3*Nij*nl2]; // 3*Nj*nl3  
+  double *dd4 = &bdd1[3*Nij*(nl2+nl3)]; // 3*Nj*nl4
+  double *dd23 = &bdd1[3*Nij*(nl2+nl3+nl4)]; // 3*Nj*nl23
+  double *dd33 = &bdd1[3*Nij*(nl2+nl3+nl4+nl23)]; // 3*Nj*nl33
+  double *dd34 = &bdd1[3*Nij*(nl2+nl3+nl4+nl23+nl33)]; // 3*Nj*nl34
+  double *dd44 = &bdd1[3*Nij*(nl2+nl3+nl4+nl23+nl33+nl34)]; // 3*Nj*nl44
+
+  orthogonalradialbasis(Nij);
+
+  if ((nl2>0) && (Nij>0)) {
+    twobodydescderiv(d2, dd2, Ni, Nij);
+  }
+
+  if ((nl3 > 0) && (Nij>1)) {
+    angularbasis(abftm, &abftm[K3], &abftm[2*K3], &abftm[3*K3], Nij);
+    radialangularsum2(Ni, Nij);
+
+    threebodydesc(d3, Ni);
+    threebodydescderiv(dd3, Ni, Nij);
+
+    if ((nl23>0) && (Nij>2)) {
+      fourbodydesc23(d23, d2, d3, Ni);
+      fourbodydescderiv23(dd23, d2, d3, dd2, dd3, idxi, Ni, Nij);
+    }
+
+    if ((nl33>0) && (Nij>3)) {
+      crossdesc(d33, d3, d3, ind33l, ind33r, nl33, Ni);
+      crossdescderiv(dd33, d3, d3, dd3, dd3, ind33l, ind33r, idxi, nl33, Ni, Nij);
+    }
+    
+    if ((nl4 > 0) && (Nij>2)) {
+      if (K4 < K3) {        
+        fourbodydesc(d4, Ni);
+        fourbodydescderiv(dd4, Ni, Nij);        
+      }
+
+      if ((nl34>0) && (Nij>4)) {
+        crossdesc(d34, d3, d4, ind34l, ind34r, nl34, Ni);
+        crossdescderiv(dd34, d3, d4, dd3, dd4, ind34l, ind34r, idxi, nl34, Ni, Nij);
+      }
+
+      if ((nl44>0) && (Nij>5)) {
+        crossdesc(d44, d4, d4, ind44l, ind44r, nl44, Ni);
+        crossdescderiv(dd44, d4, d4, dd4, dd4, ind44l, ind44r, idxi, nl44, Ni, Nij);
+      }
     }
   }
+}
 
-  numneighsum[0] = 0;
-  numneighsum[1] = nij;
+void PairPOD::environment_descriptors(double *ei, double *cb, double *B, int Ni)
+{
+  double *P = &abf[0];
+  double *cp = &abfx[0];  
+  double *pca = &abfy[0]; // Ni*nComponents 
+  double *D = &abfz[0];   // Ni*nClusters
+  double *sumD = &rbf[0]; // Ni
+    
+  double *proj = &Proj[0];
+  double *cent = &Centroids[0];
+  double *cefs = &coefficients[0];  
+  int *tyai = &typeai[0];  
+  
+  int nCom = nComponents;
+  int nCls = nClusters;
+  int nDes = Mdesc;
+  int nCoeff = nCoeffPerElement;
+  
+  for (int idx=0; idx<Ni*nCom; idx++) {
+    int k = idx % nCom;
+    int i = idx / nCom;     
+    double sum = 0.0;
+    int typei = tyai[i]-1;
+    for (int m = 0; m < nDes; m++) {
+      sum += proj[k + nCom*m + nCom*nDes*typei] * B[i + Ni*m];
+    }
+    pca[i + Ni*k] = sum;    
+  }
+  
+  for (int idx=0; idx<Ni*nCls; idx++) {
+    int j = idx % nCls;
+    int i = idx / nCls;     
+    int typei = tyai[i]-1;
+    double sum = 1e-20; 
+    for (int k = 0; k < nCom; k++) {
+      double c = cent[k + j * nCom + nCls*nCom*typei];
+      double p = pca[i + Ni*k];
+      sum += (p - c) * (p - c);
+    }
+    D[i + Ni*j] = 1.0 / sum;
+  }
+  
+  for (int i = 0; i< Ni; i++) {    
+    double sum = 0; 
+    for (int j = 0; j < nCls; j++) sum += D[i + Ni*j];    
+    sumD[i] = sum;
+    for (int j = 0; j < nCls; j++) P[i + Ni*j] = D[i + Ni*j]/sum;    
+  }
+  
+  for (int n=0; n<Ni; n++) {
+    int nc = nCoeff*(tyai[n]-1);
+    ei[n] = cefs[0 + nc];
+    for (int k = 0; k<nCls; k++)
+      for (int m=0; m<nDes; m++)     
+        ei[n] += cefs[1 + m + nDes*k + nc]*B[n + Ni*m]*P[n + Ni*k];
+  }    
+  
+  for (int idx=0; idx<Ni*nCls; idx++) {
+    int n = idx % Ni;
+    int k = idx / Ni;
+    int nc = nCoeff*(tyai[n]-1);            
+    double sum = 0;
+    for (int m = 0; m<nDes; m++)     
+      sum += cefs[1 + m + k*nDes + nc]*B[n + Ni*m];
+    cp[n + Ni*k] = sum;
+  }            
+
+  for (int idx=0; idx<Ni*nDes; idx++) {
+    int n = idx % Ni;
+    int m = idx / Ni;
+    int nc = nCoeff*(tyai[n]-1);            
+    double sum = 0.0;    
+    for (int k = 0; k<nCls; k++)    
+      sum += cefs[1 + m + k*nDes + nc]*P[n + Ni*k];      
+    cb[n + Ni*m] = sum;
+  }              
+  
+  for (int idx=0; idx<Ni*nDes; idx++) {
+    int i = idx % Ni;
+    int m = idx / Ni;
+    int typei = tyai[i]-1;
+    double S1 = 1/sumD[i];
+    double S2 = sumD[i]*sumD[i];   
+    double sum = 0.0;
+    for (int j=0; j<nCls; j++) {
+      double dP_dB = 0.0;
+      for (int k = 0; k < nCls; k++) {
+        double dP_dD = -D[i + Ni*j] / S2;
+        if (k==j) dP_dD += S1;
+        double dD_dB = 0.0;
+        double D2 = 2 * D[i + Ni*k] * D[i + Ni*k];
+        for (int n = 0; n < nCom; n++) {
+          double dD_dpca = D2 * (cent[n + k * nCom + nCls*nCom*typei] - pca[i + Ni*n]);        
+          dD_dB += dD_dpca * proj[n + m * nCom + nCom*nDes*typei];
+        }                
+        dP_dB += dP_dD * dD_dB;
+      }      
+      sum += cp[i + Ni*j]*dP_dB;      
+    }
+    cb[i + Ni*m] += sum;
+  }  
 }
+
+void PairPOD::blockatomenergyforce(double *ei, double *fij, int Ni, int Nij)
+{  
+  // calculate base descriptors and their derivatives with respect to atom coordinates
+  blockatombase_descriptors(bd, bdd, Ni, Nij);  
+  
+  if (nClusters > 1) {    
+    double *cb = &sumU[0];
+    environment_descriptors(ei, cb, bd, Ni);
+    
+    int N3 = 3*Nij;
+    for (int n=0; n<Nij; n++) {
+      int n3 = 3*n;
+      int i = idxi[n];
+      double fx = 0.0;
+      double fy = 0.0;
+      double fz = 0.0;            
+      for (int m=0; m<Mdesc; m++) {    
+        double c = cb[i + Ni*m];
+        fx += c*bdd[0 + n3 + N3*m];
+        fy += c*bdd[1 + n3 + N3*m];
+        fz += c*bdd[2 + n3 + N3*m];
+      }
+      fij[0 + n3] = fx;
+      fij[1 + n3] = fy;
+      fij[2 + n3] = fz;      
+    }        
+  }
+  else {
+    for (int n=0; n<Ni; n++) {
+      ei[n] = coefficients[0 + nCoeffPerElement*(typeai[n]-1)];
+      for (int m=0; m<Mdesc; m++)     
+        ei[n] += coefficients[1 + m + nCoeffPerElement*(typeai[n]-1)]*bd[n + Ni*m];
+    }
+
+    for (int n=0; n<Nij; n++) {
+      int n3 = 3*n;
+      int nc = nCoeffPerElement*(ti[n]-1);
+      int N3 = 3*Nij;
+      fij[0 + n3] = 0.0;
+      fij[1 + n3] = 0.0;
+      fij[2 + n3] = 0.0;
+      for (int m=0; m<Mdesc; m++) {    
+        fij[0 + n3] += coefficients[1 + m + nc]*bdd[0 + n3 + N3*m];
+        fij[1 + n3] += coefficients[1 + m + nc]*bdd[1 + n3 + N3*m];
+        fij[2 + n3] += coefficients[1 + m + nc]*bdd[2 + n3 + N3*m];
+      }
+    }      
+  }
+}
+
+void PairPOD::savematrix2binfile(std::string filename, double *A, int nrows, int ncols)
+{
+  FILE *fp = fopen(filename.c_str(), "wb");
+  double sz[2];
+  sz[0] = (double) nrows;
+  sz[1] = (double) ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(double) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A ), sizeof(double) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+void PairPOD::saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols)
+{
+  FILE *fp = fopen(filename.c_str(), "wb");
+  int sz[2];
+  sz[0] = nrows;
+  sz[1] = ncols;
+  fwrite( reinterpret_cast<char*>( sz ), sizeof(int) * (2), 1, fp);
+  fwrite( reinterpret_cast<char*>( A ), sizeof(int) * (nrows*ncols), 1, fp);
+  fclose(fp);
+}
+
+void PairPOD::savedatafordebugging()
+{
+  saveintmatrix2binfile("podtypeai.bin", typeai, ni, 1);  
+  saveintmatrix2binfile("podnumij.bin", numij, ni+1, 1);  
+  saveintmatrix2binfile("podai.bin", ai, nij, 1);  
+  saveintmatrix2binfile("podaj.bin", aj, nij, 1);  
+  saveintmatrix2binfile("podti.bin", ti, nij, 1);  
+  saveintmatrix2binfile("podtj.bin", tj, nij, 1);  
+  saveintmatrix2binfile("podidxi.bin", idxi, nij, 1);     
+  savematrix2binfile("podrbf.bin", rbf, nrbfmax, nij);
+  savematrix2binfile("podrbfx.bin", rbfx, nrbfmax, nij);
+  savematrix2binfile("podrbfy.bin", rbfy, nrbfmax, nij);
+  savematrix2binfile("podrbfz.bin", rbfz, nrbfmax, nij);      
+  int kmax = (K3 > ns) ? K3 : ns;
+  savematrix2binfile("podabf.bin", abf,   kmax, nij);
+  savematrix2binfile("podabfx.bin", abfx, kmax, nij);
+  savematrix2binfile("podabfy.bin", abfy, kmax, nij);
+  savematrix2binfile("podabfz.bin", abfz, kmax, nij);            
+  savematrix2binfile("podbdd.bin", bdd, 3*nij, Mdesc);      
+  savematrix2binfile("podbd.bin", bd, ni, Mdesc);      
+  savematrix2binfile("podsumU.bin", sumU, nelements * K3 * nrbfmax, ni);      
+  savematrix2binfile("podrij.bin", rij, 3, nij);
+  savematrix2binfile("podfij.bin", fij, 3, nij);
+  savematrix2binfile("podei.bin", ei, ni, 1);           
+  error->all(FLERR, "Save data and stop the run for debugging");
+}
+
diff --git a/src/ML-POD/pair_pod.h b/src/ML-POD/pair_pod.h
index 62b6e99f3bd..90600e7e136 100644
--- a/src/ML-POD/pair_pod.h
+++ b/src/ML-POD/pair_pod.h
@@ -25,7 +25,7 @@ PairStyle(pod,PairPOD);
 namespace LAMMPS_NS {
 
 class PairPOD : public Pair {
- public:
+public:
   PairPOD(class LAMMPS *);
   ~PairPOD() override;
   void compute(int, int) override;
@@ -36,42 +36,122 @@ class PairPOD : public Pair {
   double init_one(int, int) override;
   double memory_usage() override;
 
-  int dim;    // typically 3
-
-  double *gd;             // global linear descriptors
-  double *gdall;          // global linear descriptors summed over all MPI ranks
-  double *podcoeff;       // POD coefficients
-  double *newpodcoeff;    // normalized POD coefficients
-  double *energycoeff;    // energy coefficients
-  double *forcecoeff;     // force coefficients
-
-  void estimate_tempmemory();
-  void free_tempmemory();
-  void allocate_tempmemory();
-
-  void lammpsNeighPairs(double **x, int **firstneigh, int *atomtype, int *map, int *numneigh,
-                        int i);
-
- protected:
-  int nablockmax;    // maximum number of atoms per computation block
-  int nij;           //  number of atom pairs
-  int nijmax;        // maximum number of atom pairs
-  int szd;           // size of tmpmem
-
-  class MLPOD *podptr;
-
-  // temporary arrays for computation blocks
-
-  double *tmpmem;      // temporary memory
-  int *typeai;         // types of atoms I only
-  int *numneighsum;    // cumulative sum for an array of numbers of neighbors
+  void lammpsNeighborList(double *rij1, int *ai1, int *aj1, int *ti1, int *tj1, double **x, int **firstneigh, int *atomtype, int *map, int *numneigh,
+                        double rcutsq, int i);
+  void NeighborCount(double **x, int **firstneigh, int *ilist, int *numneigh, double rcutsq, int i1, int i2);
+  void NeighborList(double **x, int **firstneigh, int *atomtype, int *map, int *ilist, int *numneigh,
+                        double rcutsq, int i1, int i2);
+  void tallyenergy(double *ei, int istart, int Ni);          
+  void tallystress(double *fij, double *rij, int *ai, int *aj, int nlocal, int N);              
+  void tallyforce(double **force, double *fij,  int *ai, int *aj, int N);
+  void divideInterval(int *intervals, int N, int M);
+  int calculateNumberOfIntervals(int N, int intervalSize); 
+  int numberOfNeighbors();
+  
+  void copy_data_from_pod_class();
+  void radialbasis(double *rbft, double *rbftx, double *rbfty, double *rbftz, double *rij, int Nij);
+  void orthogonalradialbasis(int Nij);
+  void angularbasis(double *tm, double *tmu, double *tmv, double *tmw, int N);
+  void radialangularsum(int Ni, int Nij);
+  void radialangularsum2(int Ni, int Nij);
+  void twobodydescderiv(double *d2, double *dd2, int Ni, int Nij);
+  void threebodydesc(double *d3, int Ni);
+  void threebodydescderiv(double *dd3, int Ni, int Nij);
+  void extractsumU(int Ni);
+  void fourbodydesc(double *d4, int Ni);
+  void fourbodydescderiv(double *dd4, int Ni, int Nij);
+  void fourbodydesc23(double *d23, double *d2, double *d3, int Ni);
+  void fourbodydescderiv23(double* dd23, double *d2, double *d3, double *dd2, double *dd3, int *idxi, int Ni, int N);
+  void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, int n12, int Ni);
+  void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int *idxi, int n12, int Ni, int Nij);
+  void crossdesc(double *d12, double *d1, double *d2, int *ind1, int *ind2, 
+        int n12, int nd1, int nd2, int Ni);
+  void crossdescderiv(double *dd12, double *d1, double *d2, double *dd1, double *dd2,
+        int *ind1, int *ind2, int *idxi, int n12, int nd1, int nd2, int Ni, int Nij);
+  void blockatombase_descriptors(double *bd1, double *bdd1, int Ni, int Nij);
+  void environment_descriptors(double *ei, double *cb, double *B, int Ni);
+  void blockatomenergyforce(double *ei, double *fij, int Ni, int Nij);
+
+  void savematrix2binfile(std::string filename, double *A, int nrows, int ncols);
+  void saveintmatrix2binfile(std::string filename, int *A, int nrows, int ncols);  
+  void savedatafordebugging();    
+
+protected:
+  class EAPOD *fastpodptr;  
+  virtual void allocate();
+  void grow_atoms(int Ni);
+  void grow_pairs(int Nij);
+
+  int atomBlockSize;        // size of each atom block
+  int nAtomBlocks;          // number of atoms blocks
+  int atomBlocks[101];      // atom blocks
+  
+  int ni;            // total number of atoms i
+  int nij;           // total number of pairs (i,j)
+  int nimax;         // maximum number of atoms i
+  int nijmax;        // maximum number of pairs (i,j)
+
+  int nelements; // number of elements 
+  int onebody;   // one-body descriptors
+  int besseldegree; // degree of Bessel functions
+  int inversedegree; // degree of inverse functions
+  int nbesselpars;  // number of Bessel parameters
+  int nCoeffPerElement; // number of coefficients per element = (nl1 + Mdesc*nClusters)
+  int ns;      // number of snapshots for radial basis functions
+  int nl1, nl2, nl3, nl4, nl23, nl33, nl34, nl44, n23, n32, nl;   // number of local descriptors
+  int nrbf2, nrbf3, nrbf4, nrbfmax;            // number of radial basis functions
+  int nabf3, nabf4;                            // number of angular basis functions  
+  int K3, K4, Q4;                                  // number of monomials
+    
+  // environmental variables
+  int nClusters; // number of environment clusters
+  int nComponents; // number of principal components
+  int Mdesc; // number of base descriptors 
+
+  double rin;  // inner cut-off radius
+  double rcut; // outer cut-off radius
+  double rmax; // rcut - rin
+  
   double *rij;         // (xj - xi) for all pairs (I, J)
-  int *idxi;           // storing linear indices for all pairs (I, J)
+  double *fij;         // force for all pairs (I, J)
+  double *ei;          // energy for each atom I
+  int *typeai;         // types of atoms I only
+  int *numij;          // number of pairs (I, J) for each atom I   
+  int *idxi;           // storing linear indices of atom I for all pairs (I, J)
   int *ai;             // IDs of atoms I for all pairs (I, J)
   int *aj;             // IDs of atoms J for all pairs (I, J)
   int *ti;             // types of atoms I for all pairs (I, J)
-  int *tj;             // types of atoms J  for all pairs (I, J)
-
+  int *tj;             // types of atoms J  for all pairs (I, J)  
+
+  double besselparams[3];
+  double *Phi ;  // eigenvectors matrix ns x ns
+  double *rbf;  // radial basis functions nij x nrbfmax  
+  double *rbfx; // x-derivatives of radial basis functions nij x nrbfmax 
+  double *rbfy; // y-derivatives of radial basis functions nij x nrbfmax
+  double *rbfz; // z-derivatives of radial basis functions nij x nrbfmax   
+  double *abf;  // angular basis functions nij x K3
+  double *abfx; // x-derivatives of angular basis functions nij x K3
+  double *abfy; // y-derivatives of angular basis functions nij x K3  
+  double *abfz; // z-derivatives of angular basis functions nij x K3
+  double *abftm ; // angular basis functions 4 x K3
+  double *sumU; // sum of radial basis functions ni x K3 x nrbfmax x nelements
+  double *Proj; // PCA Projection matrix
+  double *Centroids; // centroids of the clusters  
+  double *bd;   // base descriptors ni x Mdesc
+  double *bdd;  // base descriptors derivatives 3 x nij x Mdesc 
+  double *pd;   // environment probability descriptors ni x nClusters
+  double *pdd;  // environment probability descriptors derivatives 3 x nij x nClusters
+  double *coefficients; // coefficients nCoeffPerElement x nelements
+  int *pq3, *pn3, *pc3;       // arrays to compute 3-body angular basis functions
+  int *pa4, *pb4, *pc4; // arrays to compute 4-body angular basis functions  
+  int *ind23; // n23 
+  int *ind32; // n32
+  int *ind33l, *ind33r; // nl33
+  int *ind34l, *ind34r; // nl34
+  int *ind44l, *ind44r; // nl44
+  int *elemindex;
+     
   bool peratom_warn;    // print warning about missing per-atom energies or stresses
 };