diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 5d5b2c4e6..134967511 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -105,7 +105,7 @@ jobs: shell: bash working-directory: ${{github.workspace}}/build/compiler run: | - CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2 + CPPFLAGS="-I$EIGEN3_INCLUDE_DIR" CXXFLAGS="-std=c++11 -Wno-enum-compare" ${{github.workspace}}/configure --with-max-am=2,1 --with-eri-max-am=2,2 --with-eri3-max-am=3,2 --with-eri2-max-am=3,2 --enable-eri=1 --enable-eri3=1 --enable-eri2=0 --enable-1body=1 --disable-1body-property-derivs --with-multipole-max-order=2 make -j3 make check cd src/bin/test_eri && ./stdtests.pl && cd ../../.. diff --git a/configure.ac b/configure.ac index df08efb8b..aded39d1f 100644 --- a/configure.ac +++ b/configure.ac @@ -341,6 +341,7 @@ esac ) LIBINT_HARD_MAX_AM=12 +AC_DEFINE_UNQUOTED(LIBINT_HARD_MAX_AM,$LIBINT_HARD_MAX_AM) LIBINT_MAX_AM=4 AC_ARG_WITH(max-am, AS_HELP_STRING([--with-max-am=N],[Support Gaussians of angular momentum up to N. Can specify values for each derivative levels as a list N0,N1,N2...]), @@ -528,7 +529,7 @@ AS_HELP_STRING([--with-eri3-max-am=N],[Support 3-center ERIs for Gaussians of an else ERI3_MAX_AM=$withval fi - if test $ERI3_MAX_AM -ge $LIBINT_HARD_MAX_AM; then + if test $ERI3_MAX_AM -gt $LIBINT_HARD_MAX_AM; then AC_MSG_ERROR([Value for --with-eri3-max-am too high ($withval). Are you sure you know what you are doing?]) fi AC_SUBST(ERI3_MAX_AM) @@ -599,7 +600,7 @@ AS_HELP_STRING([--with-eri2-max-am=N],[Support 2-center ERIs for Gaussians of an else ERI2_MAX_AM=$withval fi - if test $ERI2_MAX_AM -ge $LIBINT_HARD_MAX_AM; then + if test $ERI2_MAX_AM -gt $LIBINT_HARD_MAX_AM; then AC_MSG_ERROR([Value for --with-eri2-max-am too high ($withval). Are you sure you know what you are doing?]) fi AC_SUBST(ERI2_MAX_AM) diff --git a/export/cmake/CMakeLists.txt.export b/export/cmake/CMakeLists.txt.export index 8c2b78a43..e27a9c821 100644 --- a/export/cmake/CMakeLists.txt.export +++ b/export/cmake/CMakeLists.txt.export @@ -429,6 +429,7 @@ if (LIBINT_HAS_CXX_API) tests/unit/test-c-api.cc tests/unit/test-core.cc tests/unit/test-core-ints.cc + tests/unit/test-df-autogen.cc tests/unit/test-permute.cc tests/unit/test-precision.cc tests/unit/test-shell-order.cc diff --git a/include/libint2/config.h.in b/include/libint2/config.h.in index bd42b26e2..8c807f5f1 100644 --- a/include/libint2/config.h.in +++ b/include/libint2/config.h.in @@ -47,6 +47,9 @@ /* Prefix for all names in API */ #undef LIBINT_API_PREFIX +/* Max AM supported by Libint *in principle* */ +#undef LIBINT_HARD_MAX_AM + /* Max AM (same for all derivatives; if not defined see LIBINT_MAX_AM_LIST) */ #undef LIBINT_MAX_AM diff --git a/include/libint2/dfbs_generator.h b/include/libint2/dfbs_generator.h new file mode 100644 index 000000000..750f54b69 --- /dev/null +++ b/include/libint2/dfbs_generator.h @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2004-2024 Edward F. Valeev + * + * This file is part of Libint. + * + * Libint is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Libint. If not, see . + * + */ + +#ifndef _libint2_src_lib_libint_dfbs_generator_h_ +#define _libint2_src_lib_libint_dfbs_generator_h_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace libint2 { + +namespace detail { + +/// @brief Uncontracts a set of shells +/// @param[in] shells a vector of shells to be uncontracted +/// @return a vector of uncontracted shells +inline std::vector uncontract(const std::vector &shells) { + std::vector primitive_shells; + for (const auto &contracted_shell : shells) { + for (size_t p = 0; p < contracted_shell.nprim(); p++) { + const auto prim_shell = contracted_shell.extract_primitive(p, true); + // if dealing with generally contracted basis (e.g., cc-pvxz) represented + // as a segmented basis need to remove duplicates + if (std::find(primitive_shells.begin(), primitive_shells.end(), + prim_shell) == primitive_shells.end()) + primitive_shells.emplace_back(std::move(prim_shell)); + } + } + return primitive_shells; +} + +/// @brief returns \f$ \Gamma(x) \f$ +inline double gamma_function(const double x) { return std::tgamma(x); } + +/// @brief Computes an effective exponent for a product of two primitive +/// gaussians as as described in J. Chem. Theory Comput. 2021, 17, 6886−6900. +/// \f$ \alpha_{eff} = \left[ \frac{\Gamma(L+2)\Gamma(l_\mu +l_\nu + +/// \frac{3}{2})}{\Gamma(L+ \frac{3}{2})\Gamma(l_\mu +l_\nu + 2)} \right]^2 +/// (\alpha_\mu + \alpha_\nu) \f$ +/// @param shell1 first primitive shell +/// @param shell2 second primitive shell +/// @param L total angular momentum of product function +/// @return effective exponent of product function +inline double alpha_eff(const Shell &shell1, const Shell &shell2, const int L) { + const auto alpha1 = shell1.alpha[0]; + const auto alpha2 = shell2.alpha[0]; + const auto l1 = shell1.contr[0].l; + const auto l2 = shell2.contr[0].l; + const auto prefactor = + std::pow((gamma_function(L + 2.) * gamma_function(l1 + l2 + 1.5)) / + (gamma_function(l1 + l2 + 2.) * gamma_function(L + 1.5)), + 2.); + return prefactor * (alpha1 + alpha2); +} + +/// @brief Creates a set of product functions from a set of primitive shells. +/// Each pair of primitive shells produces a set of product functions with an +/// effective exponent (\f$ \alpha_{eff} \f$ as described above) and angular +/// momentum ranging from \f$ |l1-l2| \leq L \leq l1+l2 \f$ as described in J. +/// Chem. Theory Comput. 2021, 17, 6886−6900. +/// @param primitive_shells set of primitive shells +/// @return set of product functions +inline std::vector product_functions( + const std::vector &primitive_shells) { + std::vector product_functions; + for (size_t i = 0; i < primitive_shells.size(); ++i) { + for (size_t j = 0; j <= i; ++j) { + const auto li = primitive_shells[i].contr[0].l; + const auto lj = primitive_shells[j].contr[0].l; + for (auto L = std::abs(li - lj); L <= li + lj; L++) { + const auto alpha = libint2::svector( + {alpha_eff(primitive_shells[i], primitive_shells[j], L)}); + libint2::svector contr_; + Shell::Contraction contr1; + contr1.l = L; + contr1.pure = true; // libint2 needs solid harmonics for 2c2b integrals + contr1.coeff = {1.0}; + contr_.push_back(contr1); + assert(primitive_shells[i].O == primitive_shells[j].O); + const auto shell = Shell(alpha, contr_, primitive_shells[i].O); + if (std::find(product_functions.begin(), product_functions.end(), + shell) == product_functions.end()) + product_functions.emplace_back(shell); + } + } + } + return product_functions; +} + +/// @brief returns a hash map of shell indices to basis function indices +inline std::vector map_shell_to_basis_function( + const std::vector &shells) { + std::vector result; + result.reserve(shells.size()); + + size_t n = 0; + for (auto &&shell : shells) { + result.push_back(n); + n += shell.size(); + } + + return result; +} + +/// @brief Computes the Coulomb matrix \f$ (\mu|rij^{-1}|\nu) \f$ for a set of +/// shells +/// @param shells set of shells +/// @return Coulomb matrix +inline Eigen::MatrixXd compute_coulomb_matrix( + const std::vector &shells) { + const auto n = nbf(shells); + Eigen::MatrixXd result = Eigen::MatrixXd::Zero(n, n); + using libint2::Engine; + const auto oper = libint2::Operator::coulomb; + const auto braket = libint2::BraKet::xs_xs; + Engine engine(oper, max_nprim(shells), max_l(shells), 0, + std::numeric_limits::epsilon(), + libint2::default_params(oper), braket); + engine.set(ScreeningMethod::Conservative); + const auto shell2bf = map_shell_to_basis_function(shells); + const auto &buf = engine.results(); + for (size_t s1 = 0; s1 != shells.size(); ++s1) { + auto bf1 = shell2bf[s1]; + auto n1 = shells[s1].size(); + for (size_t s2 = 0; s2 <= s1; ++s2) { + auto bf2 = shell2bf[s2]; + auto n2 = shells[s2].size(); + engine.compute(shells[s1], shells[s2]); + Eigen::Map buf_mat(buf[0], n1, n2); + result.block(bf1, bf2, n1, n2) = buf_mat; + if (s1 != s2) result.block(bf2, bf1, n2, n1) = buf_mat.transpose(); + } + } + return result; +} + +/// @brief Splits a set of shells by angular momentum +/// @param shells set of shells +/// @return vector of vectors of shells split by angular momentum L. The i-th +/// vector contains all shells with angular momentum i +inline std::vector> split_by_L( + const std::vector &shells) { + int lmax = max_l(shells); + std::vector> sorted_shells; + sorted_shells.resize(lmax + 1); + for (auto &&shell : shells) { + auto l = shell.contr[0].l; + sorted_shells[l].push_back(shell); + } + return sorted_shells; +} + +/// @brief Computes the reduced set of product functions via pivoted Cholesky +/// decomposition as described in J. Chem. Theory Comput. 2021, 17, 6886−6900. +/// Cholesky decomposition is performed on the Coulomb matrix of the product +/// functions and the pivot indices are sorted in ascending order of column sums +/// of the Coulomb matrix. The reduced set of product functions is then +/// constructed by selecting the product functions corresponding to the pivot +/// indices. +/// @param shells set of shells +/// @param cholesky_threshold threshold for choosing a product function via +/// pivoted Cholesky decomposition +/// @return reduced set of product functions +inline std::vector pivoted_cholesky_in_L( + const std::vector &shells, const double cholesky_threshold) { + const auto n = shells.size(); // number of shells + std::vector + shell_indices; // hash map of basis function indices to shell indices + const auto L = shells[0].contr[0].l; // all shells must have same L + const auto nbf = libint2::nbf( + shells); // total number of basis functions in vector of shells + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < 2 * L + 1; + ++j) // 2L+1 since libint2 strictly uses solid harmonics for 2c2b + // integrals + shell_indices.push_back(i); + } + assert(shell_indices.size() == nbf); + const auto C = compute_coulomb_matrix(shells); + std::vector pivot(nbf); + for (auto i = 0; i < nbf; ++i) { + pivot[i] = i; + } + // set pivot indices in ascending order of off diagonal elements of Coulomb + // matrix see Phys. Rev. A 101, 032504 (Accurate reproduction of strongly + // repulsive interatomic potentials) + Eigen::MatrixXd C_off_diag = C; + auto col_sum = C_off_diag.colwise().sum(); + // sort pivot indices in ascending order of column sums + std::sort(pivot.begin(), pivot.end(), [&col_sum](size_t i1, size_t i2) { + return col_sum[i1] < col_sum[i2]; + }); + // compute Cholesky decomposition + const auto reduced_pivots = pivoted_cholesky(C, cholesky_threshold, pivot); + + std::vector reduced_shells; + for (size_t i = 0; i < reduced_pivots.size(); ++i) { + // check if the reduced shell is already in reduced shells + if (std::find(reduced_shells.begin(), reduced_shells.end(), + shells[shell_indices[reduced_pivots[i]]]) == + reduced_shells.end()) + reduced_shells.push_back(shells[shell_indices[reduced_pivots[i]]]); + } + return reduced_shells; +} +} // namespace detail + +/// @brief This class produces density fitting basis sets for an atom from +/// products of AO basis functions and eliminates linearly dependent functions +/// via pivoted Cholesky decomposition see: J. Chem. Theory Comput. 2021, 17, +/// 6886−6900 (Straightforward and Accurate Automatic Auxiliary Basis Set +/// Generation for Molecular Calculations with Atomic Orbital Basis Sets) +class DFBasisSetGenerator { + public: + /// @brief constructor for DFBasisSetGenerator class, generates density + /// fitting basis set from products of AO basis functions of and atom see: J. + /// Chem. Theory Comput. 2021, 17, 6886−6900 (Straightforward and Accurate + /// Automatic Auxiliary Basis Set Generation for Molecular Calculations with + /// Atomic Orbital Basis Sets) + /// @param obs_name name of basis set for AO functions + /// @param atoms vector of atoms + /// @param cholesky_threshold threshold for threshold for pivoted Cholesky + /// decomposition to be performed on the Coulomb matrix of the product + /// functions + DFBasisSetGenerator(std::string obs_name, const Atom &atom, + const double cholesky_threshold = 1e-7) { + // get AO basis shells for each atom + const auto atom_bs = BasisSet(obs_name, {atom}); + const auto obs_shells = atom_bs.shells(); + // get primitive shells from AO functions + const auto primitive_shells = detail::uncontract(obs_shells); + // compute candidate shells + candidate_shells_ = detail::product_functions(primitive_shells); + cholesky_threshold_ = cholesky_threshold; + } + + /// @brief constructor for DFBasisSetGenerator class, generates density + /// fitting basis set from products of AO shells provided by user + /// @param shells vector of vector of shells for each atom + /// @param cholesky_threshold threshold for pivoted Cholesky decomposition to + /// be performed on the Coulomb matrix of the product functions + DFBasisSetGenerator(const std::vector &shells, + const double cholesky_threshold = 1e-7) { + const auto primitive_shells = detail::uncontract(shells); + candidate_shells_ = detail::product_functions(primitive_shells); + cholesky_threshold_ = cholesky_threshold; + } + + DFBasisSetGenerator() = default; + + ~DFBasisSetGenerator() = default; + + /// @brief returns the candidate shells (full set of product functions) + std::vector candidate_shells() { return candidate_shells_; } + + /// @brief returns a set of shells reduced via pivoted Cholesky + /// decomposition of the Coulomb matrix of the product functions for each + /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17, + /// 6886−6900. + std::vector reduced_shells() { + if (reduced_shells_computed_) + return reduced_shells_; + else { + const auto candidate_splitted_in_L = + detail::split_by_L(candidate_shells_); + for (size_t i = 0; i < candidate_splitted_in_L.size(); ++i) { + std::vector reduced_shells_L; + if (candidate_splitted_in_L[i].size() > 1) + reduced_shells_L = detail::pivoted_cholesky_in_L( + candidate_splitted_in_L[i], cholesky_threshold_); + else + reduced_shells_L = candidate_splitted_in_L[i]; + reduced_shells_.insert(reduced_shells_.end(), reduced_shells_L.begin(), + reduced_shells_L.end()); + } + reduced_shells_computed_ = true; + } + return reduced_shells_; + } + + /// @brief returns a BasisSet of shells reduced via pivoted Cholesky + /// decomposition of the Coulomb matrix of the product functions for each + /// angular momentum L as described in J. Chem. Theory Comput. 2021, 17, + /// 6886−6900. + const BasisSet reduced_basis() { return BasisSet(reduced_shells()); } + + private: + double cholesky_threshold_; + std::vector candidate_shells_; // full set of product functions + std::vector reduced_shells_; // reduced set of product functions + bool reduced_shells_computed_ = false; +}; + +} // namespace libint2 + +#endif /* _libint2_src_lib_libint_dfbs_generator_h_ */ diff --git a/include/libint2/pivoted_cholesky.h b/include/libint2/pivoted_cholesky.h new file mode 100644 index 000000000..db88d60f9 --- /dev/null +++ b/include/libint2/pivoted_cholesky.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2004-2021 Edward F. Valeev + * + * This file is part of Libint. + * + * Libint is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Libint. If not, see . + * + */ + +#ifndef _libint2_src_lib_libint_pivoted_cholesky_h_ +#define _libint2_src_lib_libint_pivoted_cholesky_h_ + +#include +#include +#include + +namespace libint2 { + +/// @brief computes the pivoted Cholesky decomposition of a symmetric positive +/// definite matrix +/// @param A symmetric positive definite matrix +/// @param tolerance tolerance for the error +/// @param pivot initial pivot indices +/// @return pivoted Cholesky decomposition of A +inline std::vector pivoted_cholesky(const Eigen::MatrixXd &A, + const double tolerance, + const std::vector &pivot) { + // number of elements in A + const auto n = A.rows(); + // diagonal elements of A + std::vector d(n); + // initial error + auto error = A.diagonal()[0]; + for (size_t i = 0; i < n; ++i) { + d[i] = A.diagonal()[i]; + error = std::max(d[i], error); + } + + // Return matrix + Eigen::MatrixXd L(n, n); + + // loop index + size_t m = 0; + + // copy input pivot indices + std::vector piv; + piv.reserve(n); + for (size_t i = 0; i < n; ++i) { + piv.push_back(pivot[i]); + } + + while (error > tolerance && m < n) { + // update pivot indices + // Errors in pivoted order + std::vector err(d.size()); + for (size_t i = 0; i < d.size(); ++i) { + err[i] = d[piv[i]]; + } + // error vector after mth element + std::vector err2(err.begin() + m, err.end()); + std::vector idx(err2.size()); + for (size_t i = 0; i < idx.size(); ++i) { + idx[i] = i; + } + // sort indices + std::sort(idx.begin(), idx.end(), + [&err2](size_t i1, size_t i2) { return err2[i1] > err2[i2]; }); + // subvector of piv + std::vector piv_subvec(piv.size() - m); + for (size_t i = 0; i < piv_subvec.size(); ++i) { + piv_subvec[i] = piv[i + m]; + } + // sort piv + for (size_t i = 0; i < idx.size(); ++i) { + piv[i + m] = piv_subvec[idx[i]]; + } + + // TODO: find a better way to update pivot indices + + // current pivot index + size_t pim = piv[m]; + // compute diagonal element + L(m, pim) = std::sqrt(d[pim]); + + // off-diagonal elements + for (size_t i = m + 1; i < n; ++i) { + auto pii = piv[i]; + // compute element + L(m, pii) = + (m > 0) ? (A(pim, pii) - L.col(pim).head(m).dot(L.col(pii).head(m))) / + L(m, pim) + : (A(pim, pii)) / L(m, pim); + // update d + d[pii] -= L(m, pii) * L(m, pii); + } + // update error + if (m + 1 < n) { + error = d[piv[m + 1]]; + for (size_t i = m + 1; i < n; ++i) { + error = std::max(d[piv[i]], error); + } + } + // increase m + m++; + } + // Transpose to get Cholesky vectors as columns + L.transposeInPlace(); + // Drop unnecessary columns + L.conservativeResize(n, m); + + // return reduced pivot indices + std::vector reduced_piv; + reduced_piv.reserve(m); + for (size_t i = 0; i < m; ++i) { + reduced_piv.push_back(piv[i]); + } + + return reduced_piv; +} + +} // namespace libint2 + +#endif //_libint2_src_lib_libint_pivoted_cholesky_h_ diff --git a/include/libint2/shell.h b/include/libint2/shell.h index 44c705b82..33422a143 100644 --- a/include/libint2/shell.h +++ b/include/libint2/shell.h @@ -33,12 +33,14 @@ #include #include #include +#include #include namespace libint2 { namespace math { -/// fac[k] = k! + +/// @brief fac[k] = k!, `0<=k<=20` static constexpr std::array fac = {{1LL, 1LL, 2LL, @@ -60,8 +62,224 @@ static constexpr std::array fac = {{1LL, 6402373705728000LL, 121645100408832000LL, 2432902008176640000LL}}; -/// df_Kminus1[k] = (k-1)!! -static constexpr std::array df_Kminus1 = {{1LL, + +/// @brief fac_ld[k] = static_cast(k!), `0<=k<=170` +/// @note This is an inexact representation for most values, so should only be +/// used when `fac` does not +/// suffice, i.e. for `k>=21`. +/// @note This should be sufficient for computing with `double` reals +static constexpr std::array fac_ld = { + {1.l, + 1.l, + 2.l, + 6.l, + 24.l, + 120.l, + 720.l, + 5040.l, + 40320.l, + 362880.l, + 3.6288e6l, + 3.99168e7l, + 4.790016e8l, + 6.2270208e9l, + 8.71782912e10l, + 1.307674368e12l, + 2.0922789888e13l, + 3.55687428096e14l, + 6.402373705728e15l, + 1.21645100408832e17l, + 2.43290200817664e18l, + 5.109094217170944e19l, + 1.12400072777760768e21l, + 2.585201673888497664e22l, + 6.2044840173323943936e23l, + 1.5511210043330985984e25l, + 4.03291461126605635584e26l, + 1.0888869450418352160768e28l, + 3.04888344611713860501504e29l, + 8.841761993739701954543616e30l, + 2.6525285981219105863630848e32l, + 8.22283865417792281772556288e33l, + 2.6313083693369353016721801216e35l, + 8.68331761881188649551819440128e36l, + 2.9523279903960414084761860964352e38l, + 1.03331479663861449296666513375232e40l, + 3.719933267899012174679994481508352e41l, + 1.37637530912263450463159795815809024e43l, + 5.23022617466601111760007224100074291e44l, + 2.03978820811974433586402817399028974e46l, + 8.15915283247897734345611269596115894e47l, + 3.34525266131638071081700620534407517e49l, + 1.40500611775287989854314260624451157e51l, + 6.04152630633738356373551320685139975e52l, + 2.65827157478844876804362581101461589e54l, + 1.19622220865480194561963161495657715e56l, + 5.50262215981208894985030542880025489e57l, + 2.5862324151116818064296435515361198e59l, + 1.2413915592536072670862289047373375e61l, + 6.08281864034267560872252163321295377e62l, + 3.04140932017133780436126081660647688e64l, + 1.55111875328738228022424301646930321e66l, + 8.0658175170943878571660636856403767e67l, + 4.27488328406002556429801375338939965e69l, + 2.30843697339241380472092742683027581e71l, + 1.2696403353658275925965100847566517e73l, + 7.1099858780486345185404564746372495e74l, + 4.05269195048772167556806019054323221e76l, + 2.35056133128287857182947491051507468e78l, + 1.38683118545689835737939019720389406e80l, + 8.32098711274139014427634118322336438e81l, + 5.07580213877224798800856812176625227e83l, + 3.14699732603879375256531223549507641e85l, + 1.98260831540444006411614670836189814e87l, + 1.26886932185884164103433389335161481e89l, + 8.24765059208247066672317030678549625e90l, + 5.44344939077443064003729240247842753e92l, + 3.64711109181886852882498590966054644e94l, + 2.48003554243683059960099041856917158e96l, + 1.71122452428141311372468338881272839e98l, + 1.19785716699698917960727837216890987e100l, + 8.5047858856786231752116764423992601e101l, + 6.12344583768860868615240703852746727e103l, + 4.47011546151268434089125713812505111e105l, + 3.30788544151938641225953028221253782e107l, + 2.48091408113953980919464771165940337e109l, + 1.88549470166605025498793226086114656e111l, + 1.45183092028285869634070784086308285e113l, + 1.13242811782062978314575211587320462e115l, + 8.94618213078297528685144171539831652e116l, + 7.15694570462638022948115337231865322e118l, + 5.79712602074736798587973423157810911e120l, + 4.75364333701284174842138206989404947e122l, + 3.94552396972065865118974711801206106e124l, + 3.31424013456535326699938757913013129e126l, + 2.81710411438055027694947944226061159e128l, + 2.42270953836727323817655232034412597e130l, + 2.1077572983795277172136005186993896e132l, + 1.85482642257398439114796845645546284e134l, + 1.65079551609084610812169192624536193e136l, + 1.48571596448176149730952273362082574e138l, + 1.35200152767840296255166568759495142e140l, + 1.24384140546413072554753243258735531e142l, + 1.15677250708164157475920516230624044e144l, + 1.08736615665674308027365285256786601e146l, + 1.03299784882390592625997020993947271e148l, + 9.91677934870949689209571401541893801e149l, + 9.61927596824821198533284259495636987e151l, + 9.42689044888324774562618574305724247e153l, + 9.33262154439441526816992388562667005e155l, + 9.33262154439441526816992388562667005e157l, + 9.42594775983835942085162312448293675e159l, + 9.61446671503512660926865558697259548e161l, + 9.90290071648618040754671525458177335e163l, + 1.02990167451456276238485838647650443e166l, + 1.08139675824029090050410130580032965e168l, + 1.14628056373470835453434738414834943e170l, + 1.22652020319613793935175170103873389e172l, + 1.3246418194518289744998918371218326e174l, + 1.44385958320249358220488210246279753e176l, + 1.58824554152274294042537031270907729e178l, + 1.76295255109024466387216104710707579e180l, + 1.97450685722107402353682037275992488e182l, + 2.23119274865981364659660702121871512e184l, + 2.54355973347218755712013200418933523e186l, + 2.92509369349301569068815180481773552e188l, + 3.3931086844518982011982560935885732e190l, + 3.96993716080872089540195962949863065e192l, + 4.68452584975429065657431236280838416e194l, + 5.57458576120760588132343171174197716e196l, + 6.68950291344912705758811805409037259e198l, + 8.09429852527344373968162284544935083e200l, + 9.87504420083360136241157987144820801e202l, + 1.21463043670253296757662432418812959e205l, + 1.50614174151114087979501416199328069e207l, + 1.88267717688892609974376770249160086e209l, + 2.37217324288004688567714730513941708e211l, + 3.01266001845765954480997707752705969e213l, + 3.85620482362580421735677065923463641e215l, + 4.97450422247728744039023415041268096e217l, + 6.46685548922047367250730439553648525e219l, + 8.47158069087882051098456875815279568e221l, + 1.11824865119600430744996307607616903e224l, + 1.48727070609068572890845089118130481e226l, + 1.99294274616151887673732419418294845e228l, + 2.6904727073180504835953876621469804e230l, + 3.65904288195254865768972722051989335e232l, + 5.01288874827499166103492629211225388e234l, + 6.91778647261948849222819828311491036e236l, + 9.6157231969410890041971956135297254e238l, + 1.34620124757175246058760738589416156e241l, + 1.89814375907617096942852641411076779e243l, + 2.69536413788816277658850750803729027e245l, + 3.85437071718007277052156573649332508e247l, + 5.55029383273930478955105466055038812e249l, + 8.04792605747199194484902925779806277e251l, + 1.17499720439091082394795827163851716e254l, + 1.72724589045463891120349865930862023e256l, + 2.55632391787286558858117801577675794e258l, + 3.80892263763056972698595524350736934e260l, + 5.713383956445854590478932865261054e262l, + 8.62720977423324043162318862654419154e264l, + 1.31133588568345254560672467123471711e267l, + 2.00634390509568239477828874698911719e269l, + 3.08976961384735088795856467036324047e271l, + 4.78914290146339387633577523906302272e273l, + 7.47106292628289444708380937293831545e275l, + 1.17295687942641442819215807155131553e278l, + 1.85327186949373479654360975305107853e280l, + 2.94670227249503832650433950735121486e282l, + 4.71472363599206132240694321176194378e284l, + 7.59070505394721872907517857093672949e286l, + 1.22969421873944943411017892849175018e289l, + 2.00440157654530257759959165344155279e291l, + 3.28721858553429622726333031164414657e293l, + 5.42391066613158877498449501421284184e295l, + 9.00369170577843736647426172359331746e297l, + 1.50361651486499904020120170784008402e300l, + 2.52607574497319838753801886917134115e302l, + 4.26906800900470527493925188889956654e304l, + 7.25741561530799896739672821112926311e306l}}; + +/// @brief computes factorial as an integer + +/// @param k non-negative integer +/// @pre `k::value>::type> +constexpr int64_t fac_int(Int k) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 + assert(!std::is_signed::value || k >= 0); + assert(k < fac.size()); +#endif + return fac[k]; +} + +/// @brief computes factorial as a real number + +/// @param k non-negative integer +/// @pre `k::value>::type> +constexpr Real fac_real(Int k) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 + assert(!std::is_signed::value || k >= 0); + assert(k < fac.size() || k < fac_ld.size()); + // for extended-precision types should do better ... + assert(k < fac.size() || (std::is_same::value || + std::is_same::value)); +#endif + + // use exact representation when available + return (k < fac.size()) ? static_cast(fac[k]) + : static_cast(fac_ld[k]); +} + +/// `df_Kminus1[k] = (k-1)!!`, `0<=k<=34` +static constexpr std::array df_Kminus1 = {{1LL, 1LL, 1LL, 2LL, @@ -91,15 +309,391 @@ static constexpr std::array df_Kminus1 = {{1LL, 51011754393600LL, 213458046676875LL, 1428329123020800LL, - 6190283353629375LL}}; -/// bc(i,j) = binomial coefficient, i! / (j! (i-j)!) -template -int64_t bc(Int i, Int j) { - assert(i < Int(fac.size())); - assert(j < Int(fac.size())); + 6190283353629375LL, + 42849873690624000LL, + 191898783962510625LL, + 1371195958099968000LL, + 6332659870762850625LL}}; + +/// @brief fac_ld[k] = static_cast(k!), `0<=k<=170` +/// @note This is an inexact representation for most values, so should only be +/// used when `fac` does not +/// suffice, i.e. for `k>=21`. +/// @note This should be sufficient for computing with `double` reals +static constexpr std::array df_Kminus1_ld = { + {1.l, + 1.l, + 1.l, + 2.l, + 3.l, + 8.l, + 15.l, + 48.l, + 105.l, + 384.l, + 945.l, + 3840.l, + 10395.l, + 46080.l, + 135135.l, + 645120.l, + 2.027025e6l, + 1.032192e7l, + 3.4459425e7l, + 1.8579456e8l, + 6.54729075e8l, + 3.7158912e9l, + 1.3749310575e10l, + 8.17496064e10l, + 3.16234143225e11l, + 1.9619905536e12l, + 7.905853580625e12l, + 5.10117543936e13l, + 2.13458046676875e14l, + 1.4283291230208e15l, + 6.190283353629375e15l, + 4.2849873690624e16l, + 1.91898783962510625e17l, + 1.371195958099968e18l, + 6.332659870762850625e18l, + 4.6620662575398912e19l, + 2.21643095476699771875e20l, + 1.678343852714360832e21l, + 8.200794532637891559375e21l, + 6.3777066403145711616e22l, + 3.19830986772877770815625e23l, + 2.55108265612582846464e24l, + 1.3113070457687988603440625e25l, + 1.0714547155728479551488e26l, + 5.63862029680583509947946875e26l, + 4.71440074852053100265472e27l, + 2.5373791335626257947657609375e28l, + 2.1686243443194442612211712e29l, + 1.192568192774434123539907640625e30l, + 1.040939685273333245386162176e31l, + 5.8435841445947272053455474390625e31l, + 5.20469842636666622693081088e32l, + 2.980227913743310874726229193921875e33l, + 2.7064431817106664380040216576e34l, + 1.57952079428395476360490147277859375e35l, + 1.461479318123759876522171695104e36l, + 8.68736436856175119982695810028226562e36l, + 8.1842841814930553085241614925824e37l, + 4.95179769008019818390136611716089141e38l, + 4.746884825265972078944013665697792e39l, + 2.92156063714731692850180600912492593e40l, + 2.8481308951595832473664081994186752e41l, + 1.78215198865986332638610166556620482e42l, + 1.76584115499894161336717308363957862e43l, + 1.12275575285571389562324404930670903e44l, + 1.13013833919932263255499077352933032e45l, + 7.29791239356214032155108632049360873e45l, + 7.45891303871552937486293910529358011e46l, + 4.88960130368663401543922783473071785e47l, + 5.07206086632655997490679859159963447e48l, + 3.37382489954377747065306720596419531e49l, + 3.55044260642859198243475901411974413e50l, + 2.39541567867608200416367771623457867e51l, + 2.55631867662858622735302649016621577e52l, + 1.74865344543353986303948473285124243e53l, + 1.89167582070515380824123960272299967e54l, + 1.31149008407515489727961354963843182e55l, + 1.43767362373591689426334209806947975e56l, + 1.0098473647378692709053024332215925e57l, + 1.12138542651401517752540683649419421e58l, + 7.97779418142916724015188922245058078e58l, + 8.97108341211212142020325469195355365e59l, + 6.46201328695762546452303027018497043e60l, + 7.35628839793193956456666884740191399e61l, + 5.36347102817482913555411512425352546e62l, + 6.17928225426282923423600183181760775e63l, + 4.55895037394860476522099785561549664e64l, + 5.31418273866603314144296157536314267e65l, + 3.96628682533528614574226813438548208e66l, + 4.67648081002610916446980618631956555e67l, + 3.52999527454840466971061863960307905e68l, + 4.20883272902349824802282556768760899e69l, + 3.21229569983904824943666296203880193e70l, + 3.87212611070161838818099952227260027e71l, + 2.9874350008503148719760965546960858e72l, + 3.63979854405952128489013955093624426e73l, + 2.83806325080779912837729172696128151e74l, + 3.49420660229714043349453396889879449e75l, + 2.75292135328356515452597297515244306e76l, + 3.4243224702511976248246432895208186e77l, + 2.72539213975072950298071324540091863e78l, + 3.4243224702511976248246432895208186e79l, + 2.75264606114823679801052037785492782e80l, + 3.49280891965622157732113615531123497e81l, + 2.83522544298268390195083598919057565e82l, + 3.63252127644247044041398160152368437e83l, + 2.97698671513181809704837778865010444e84l, + 3.85047255302901866683882049761510543e85l, + 3.18537578519104536384176423385561175e86l, + 4.15851035727134016018592613742431386e87l, + 3.4720596058582394465875230149026168e88l, + 4.57436139299847417620451875116674525e89l, + 3.85398616250264578571215054654190465e90l, + 5.12328476015829107734906100130675468e91l, + 4.35500436362798973785473011759235226e92l, + 5.84054462658045182817792954148970034e93l, + 5.0082550181721881985329396352312051e94l, + 6.77503176683332412068639826812805239e95l, + 5.85965837126146019228353937322050996e96l, + 7.99453748486332246240994995639110182e97l, + 6.97299346180113762881741185413240686e98l, + 9.59344498183598695489193994766932219e99l, + 8.4373220887793765308690683435002123e100l, + 1.17040028778399040849681667361565731e102l, + 1.03779061691986331329689540625052611e103l, + 1.45129635685214810653605267528341506e104l, + 1.29723827114982914162111925781315764e105l, + 1.82863340963370661423542637085710298e106l, + 1.6474926043602830098588214574227102e107l, + 2.34065076433114446622134575469709181e108l, + 2.12526545962476508271787968007529616e109l, + 3.04284599363048780608774948110621935e110l, + 2.78409775210844225836042238089863797e111l, + 4.01655671159224390403582931506020954e112l, + 3.7028500103042282036193617665951885e113l, + 5.38218599353360683140801128218068079e114l, + 4.99884751391070807488613838490350448e115l, + 7.31977295120570529071489534376572587e116l, + 6.84842109405767006259400958731780114e117l, + 1.01012866726638733011865555743967017e119l, + 9.51930532074016138700567332637174358e119l, + 1.41418013417294226216611778041553824e121l, + 1.34222205022436275556779993901841585e122l, + 2.0081357905255780122758872481900643e123l, + 1.91937753182083874046195391279633466e124l, + 2.89171553835683233767727763739369259e125l, + 2.78309742114021617366983317355468525e126l, + 4.22190468600097521300882535059479118e127l, + 4.09115320907611777529465476512538732e128l, + 6.24841893528144331525306151888029095e129l, + 6.09581828152341548518903560003682711e130l, + 9.37262840292216497287959227832043642e131l, + 9.20468560510035738263544375605560894e132l, + 1.42463951724416907587769802630470634e134l, + 1.40831689758035467954322289467650817e135l, + 2.19394485655602037685165496050924776e136l, + 2.18289119124954975329199548674858766e137l, + 3.4225539762273917878885817383944265e138l, + 3.42713917026179311266843291419528263e139l, + 5.40763528243927902486395914666319387e140l, + 5.44915128071625104914280833357049938e141l, + 8.6522164519028464397823346346611102e142l, + 8.773133561953164189119921417048504e143l, + 1.40165906520826112324473821081509985e145l, + 1.43002077059836576282654719097890615e146l, + 2.29872086694154824212137066573676376e147l, + 2.35953427148730350866380286511519515e148l, + 3.81587663912297008192147530512302784e149l, + 3.9404222333837968594685507847423759e150l, + 6.41067275372658973762807851260668677e151l, + 6.65931357441861669250185082621461527e152l, + 1.08981436813352025539677334714313675e154l, + 1.13874262122558345441781649128269921e155l, + 1.87448071318965483928245015708619521e156l, + 1.97002473472025937614282252991906964e157l, + 3.26159644094999942035146327332997967e158l, + 3.44754328576045390824993942735837186e159l, + 5.74040973607199897981857536106076421e160l, + 6.1021516157960034176023927864243182e161l, + 1.02179293302081581840770641426881603e163l, + 1.09228513922748461175082830876995296e164l, + 1.83922727943746847313387154568386885e165l, + 1.97703610200174714726899923887361485e166l, + 3.34739364857619262110364621314464131e167l, + 3.61797606666319727950226860713871518e168l, + 6.15920431338019442283070903218614002e169l, + 6.69325572332691496707919692320662308e170l, + 1.14561200228871616264651187998662204e172l, + 1.25163882026213309884380982463963852e173l, + 2.15375056430278638577544233437484944e174l, + 2.3655973702954315568148005685689168e175l, + 4.09212607217529413297334043531221394e176l, + 4.51829097726427427351626908596663108e177l, + 7.85688205857656473530881363579945076e178l, + 8.72030158612004934788639933591559799e179l, + 1.52423511936385355864990984534509345e181l, + 1.70045880929340962283784787050354161e182l, + 2.98750083395315297495382329687638316e183l, + 3.34990385430801695699056030489197697e184l, + 5.91525165122724289040857012781523865e185l, + 6.66630867007295374441121500673503416e186l, + 1.18305033024544857808171402556304773e188l, + 1.33992804268466370262665421635374187e189l, + 2.38976166709580612772506233163735642e190l, + 2.72005392664986731633210805919809599e191l, + 4.87511380087544450055912715654020709e192l, + 5.57611054963222799848082152135609678e193l, + 1.00427344298034156711518019424728266e195l, + 1.15425488377387119568553005492071203e196l, + 2.08888876139911045959957480403434793e197l, + 2.41239270708739079898275781478428815e198l, + 4.38666639893813196515910708847213066e199l, + 5.090148611954394585853618989194848e200l, + 9.299732765748839766137307027560917e201l, + 1.08420165434628604678682084469850262e203l, + 1.99014281187025170995338370389803624e204l, + 2.33103355684451500059166481610178064e205l, + 4.29870847363974369349930880041975827e206l, + 5.05834281835259755128391265094086399e207l, + 9.37118447253464125182849318491507304e208l, + 1.10777707721921886373117687055604921e210l, + 2.06166058395762107540226850068131607e211l, + 2.44818734065447368884590088392886876e212l, + 4.57688649638591878739303607151252167e213l, + 5.45945776965947632612635897116137734e214l, + 1.02522257519044580837604008001880485e216l, + 1.2283779981733821733784307685113099e217l, + 2.31700301993040752692985058084249897e218l, + 2.78841805585357753356903784452067348e219l, + 5.28276688544132916140005932432089765e220l, + 6.38547734790469255187309666395234226e221l, + 1.21503638365150570712201364459380646e223l, + 1.47504526736598397948268532937299106e224l, + 2.81888441007149324052307165545763099e225l, + 3.43685547296274267219465681743906917e226l, + 6.59618951956729418282398767377085651e227l, + 8.07661036146244527965744352098181256e228l, + 1.55670072661788142714646109100992214e230l, + 1.91415665566659953127881411447268958e231l, + 3.70494772935055779660857739660361469e232l, + 4.57483440704317287975636573358972809e233l, + 8.89187455044133871186058575184867524e234l, + 1.10253509209740466402128414179512447e236l, + 2.15183364120680396827026175194737941e237l, + 2.67916027379669333357172046456215246e238l, + 5.25047408454460168257943867475160576e239l, + 6.56394267080189866725071513817727353e240l, + 1.29161662479797201391454191398889502e242l, + 1.62129383968806897081092663912978656e243l, + 3.20320922949897059450806394669245964e244l, + 4.03702166082329173731920733143316854e245l, + 8.0080230737474264862701598667311491e246l, + 1.0132924368666462260671210401897253e248l, + 2.01802181458435147454008028641624957e249l, + 2.56362986527261495194981623168000502e250l, + 5.12577540904425274533180392749727392e251l, + 6.53725615644516812747203139078401279e252l, + 1.31219850471532870280494180543930212e254l, + 1.68007483220640820876031206743149129e255l, + 3.38547214216554805323674985803339948e256l, + 4.35139381541459726068920825464756243e257l, + 8.80222756963042493841554963088683864e258l, + 1.1357137858232098850398833544630138e260l, + 2.30618362324317133386487400329235172e261l, + 2.98692725671504199765489322223772628e262l, + 6.08832476536197232140326736869180855e263l, + 7.91535723029486129378546703892997465e264l, + 1.61949438758628463749326912007202107e266l, + 2.11340038048872796544071969939430323e267l, + 4.34024495873124282848196124179301648e268l, + 5.68504702351467822703553599137067569e269l, + 1.17186613885743556369012953528411445e271l, + 1.54064774337247779952663025366145311e272l, + 3.1874758976922247332371523359727913e273l, + 4.205968339406864392707700592495767e274l, + 8.73368395967669576906979740056544817e275l, + 1.15664129333688770799461766293633592e277l, + 2.41049677287076803226326408255606369e278l, + 3.20389638254317895114509092633365051e279l, + 6.70118102858073512969187414950585707e280l, + 8.93887090729546927369480368447088492e281l, + 1.87633068800260583631372476186163998e283l, + 2.51182272495002686590823983533631866e284l, + 5.29125254016734845840470382844982474e285l, + 7.10845831160857603052031873400178182e286l, + 1.50271572140752696218693588727975023e288l, + 2.02591061880844416869829083919050782e289l, + 4.29776696322552711185463663762008565e290l, + 5.81436347598023476416409470847675744e291l, + 1.23775688540895180821413535163458467e293l, + 1.6803510445582878468434233707497829e294l, + 3.58949496768596024382099251974029553e295l, + 4.88982153966461763431436200888186824e296l, + 1.0481325305643003911957298157641663e298l, + 1.43271771112173296685410806860238739e299l, + 3.08150963985904315011544565834664891e300l, + 4.22651724780911225221961880237704281e301l, + 9.12126853398276772434171914870608078e302l, + 1.25527562259930633890922678430598171e304l, + 2.71813802312686478185383230631441207e305l, + 3.75327411157192595333858808507488533e306l, + 8.15441406938059434556149691894323621e307l}}; + +/// @brief computes double factorial as an integer + +/// @param k non-negative integer +/// @pre `k::value>::type> +constexpr int64_t df_Kminus1_int(Int k) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 + assert(!std::is_signed::value || k >= 0); + assert(k < df_Kminus1.size()); +#endif + return df_Kminus1[k]; +} + +/// @brief computes double factorial as a real number + +/// @param k non-negative integer +/// @pre `k::value>::type> +constexpr Real df_Kminus1_real(Int k) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 + assert(!std::is_signed::value || k >= 0); + assert(k < df_Kminus1.size() || k < df_Kminus1_ld.size()); + // for extended-precision types should do better ... + assert(k < df_Kminus1.size() || (std::is_same::value || + std::is_same::value)); +#endif + // use exact representation when available + return (k < df_Kminus1.size()) ? static_cast(df_Kminus1[k]) + : static_cast(df_Kminus1_ld[k]); +} + +/// @brief computes binomial coefficient as an integer + +/// @param i non-negative integer +/// @param j non-negative integer +/// @pre `i>=j` +/// return `i! / (j! * (i-j)!)` +template ::value>::type> +constexpr int64_t bc_int(Int i, Int j) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 assert(i >= j); - return fac[i] / (fac[j] * fac[i - j]); +#endif + return fac_int(i) / (fac_int(j) * fac_int(i - j)); } + +/// @brief computes binomial coefficient as a real number + +/// @param i non-negative integer +/// @param j non-negative integer +/// @pre `i>=j` +/// @return binomial coefficient, (\p i , \p j), uses exact representation when +/// possible +template < + typename Real, typename Int, + typename = typename std::enable_if::value>::type> +constexpr Real bc_real(Int i, Int j) { +#if LIBINT2_CPLUSPLUS_STD >= 2014 + assert(!std::is_signed::value || (i >= 0 && j >= 0)); + assert(i >= j); +#endif + return fac_real(i) / (fac_real(j) * fac_real(i - j)); +} + } // namespace math /// generally-contracted Solid-Harmonic/Cartesion Gaussian Shell @@ -319,15 +913,14 @@ struct Shell { const auto alpha = this->alpha.at(p); assert(alpha >= 0.0); const auto l = contr.at(c).l; - assert(l <= 15); // due to df_Kminus1[] a 64-bit integer type; kinda - // ridiculous restriction anyway - using libint2::math::df_Kminus1; + using libint2::math::df_Kminus1_real; const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212}; const auto two_alpha = 2 * alpha; const auto two_alpha_to_am32 = pow(two_alpha, l + 1) * sqrt(two_alpha); - const auto one_over_N = sqrt((sqrt_Pi_cubed * df_Kminus1[2 * l]) / - (pow(2, l) * two_alpha_to_am32)); + const auto one_over_N = + sqrt((sqrt_Pi_cubed * df_Kminus1_real(2 * l)) / + (pow(2, l) * two_alpha_to_am32)); return contr.at(c).coeff[p] * one_over_N; } @@ -360,13 +953,11 @@ struct Shell { /// before computing integrals. \warning Must be done only once. \note this is /// now private void renorm() { - using libint2::math::df_Kminus1; + using libint2::math::df_Kminus1_real; using std::pow; const auto sqrt_Pi_cubed = real_t{5.56832799683170784528481798212}; const auto np = nprim(); for (auto& c : contr) { - assert(c.l <= 15); // due to df_Kminus1[] a 64-bit integer type; kinda - // ridiculous restriction anyway for (auto p = 0ul; p != np; ++p) { assert(alpha[p] >= 0); if (alpha[p] != 0) { @@ -375,7 +966,7 @@ struct Shell { pow(two_alpha, c.l + 1) * sqrt(two_alpha); const auto normalization_factor = sqrt(pow(2, c.l) * two_alpha_to_am32 / - (sqrt_Pi_cubed * df_Kminus1[2 * c.l])); + (sqrt_Pi_cubed * df_Kminus1_real(2 * c.l))); c.coeff[p] *= normalization_factor; } @@ -389,8 +980,8 @@ struct Shell { for (auto p = 0ul; p != np; ++p) { for (decltype(p) q = 0ul; q <= p; ++q) { auto gamma = alpha[p] + alpha[q]; - norm += (p == q ? 1 : 2) * df_Kminus1[2 * c.l] * sqrt_Pi_cubed * - c.coeff[p] * c.coeff[q] / + norm += (p == q ? 1 : 2) * df_Kminus1_real(2 * c.l) * + sqrt_Pi_cubed * c.coeff[p] * c.coeff[q] / (pow(2, c.l) * pow(gamma, c.l + 1) * sqrt(gamma)); } } @@ -621,7 +1212,7 @@ struct ShellPair { std::abs(P[2] - B[2])), max_l2); const auto fac_l1_fac_l2_oogamma_to_l = - math::fac[max_l1] * math::fac[max_l2] * + math::fac_real(max_l1) * math::fac_real(max_l2) * std::pow(oogamma, max_l1 + max_l2); nonspherical_scr_factor = std::max(maxabs_PA_i_to_l1 * maxabs_PB_i_to_l2, diff --git a/include/libint2/soad_fock.h b/include/libint2/soad_fock.h new file mode 100644 index 000000000..f3796e539 --- /dev/null +++ b/include/libint2/soad_fock.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2004-2024 Edward F. Valeev + * + * This file is part of Libint. + * + * Libint is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Libint. If not, see . + * + */ + +#ifndef _libint2_src_lib_libint_soad_fock_h_ +#define _libint2_src_lib_libint_soad_fock_h_ + +#include +#include +#include + +#include + +namespace libint2 { + +/// @brief computes Superposition-Of-Atomic-Densities guess for the molecular +/// 1-RDM + +/// The atomic densities are represented by the STO-3G AOs; +/// occupies subshells by smearing electrons (of neutral ground-state atoms) +/// evenly over the subshell components +/// @param[in] atoms list of atoms +/// @return the 1-RDM in the STO-3G AO basis +/// @return the 1-RDM, normalized to the # of electrons/2 +Eigen::MatrixXd compute_soad(const std::vector &atoms) { + // compute number of atomic orbitals + size_t nao = 0; + for (const auto &atom : atoms) { + const auto Z = atom.atomic_number; + nao += libint2::sto3g_num_ao(Z); + } + + // compute the minimal basis density + Eigen::MatrixXd D = Eigen::MatrixXd::Zero(nao, nao); + size_t ao_offset = 0; // first AO of this atom + for (const auto &atom : atoms) { + const auto Z = atom.atomic_number; + const auto &occvec = libint2::sto3g_ao_occupation_vector(Z); + for (const auto &occ : occvec) { + D(ao_offset, ao_offset) = occ; + ++ao_offset; + } + } + + return D * 0.5; // we use densities normalized to # of electrons/2 +} + +Eigen::MatrixXd compute_2body_fock_general(const BasisSet &obs, + const Eigen::MatrixXd &D, + const BasisSet &D_bs, + bool D_is_shelldiagonal, + double precision) { + const auto n = obs.nbf(); + const auto nshells = obs.size(); + const auto n_D = D_bs.nbf(); + assert(D.cols() == D.rows() && D.cols() == n_D); + + Eigen::MatrixXd G = Eigen::MatrixXd ::Zero(n, n); + + // construct the 2-electron repulsion integrals engine + libint2::Engine engine(libint2::Operator::coulomb, + std::max(obs.max_nprim(), D_bs.max_nprim()), + std::max(obs.max_l(), D_bs.max_l()), 0); + engine.set_precision(precision); + auto shell2bf = obs.shell2bf(); + auto shell2bf_D = D_bs.shell2bf(); + + const auto &buf = engine.results(); + + // loop over permutationally-unique set of shells + for (auto s1 = 0l, s1234 = 0l; s1 != nshells; ++s1) { + auto bf1_first = shell2bf[s1]; // first basis function in this shell + auto n1 = obs[s1].size(); // number of basis functions in this shell + + for (auto s2 = 0; s2 <= s1; ++s2) { + auto bf2_first = shell2bf[s2]; + auto n2 = obs[s2].size(); + + for (auto s3 = 0; s3 < D_bs.size(); ++s3) { + auto bf3_first = shell2bf_D[s3]; + auto n3 = D_bs[s3].size(); + + auto s4_begin = D_is_shelldiagonal ? s3 : 0; + auto s4_fence = D_is_shelldiagonal ? s3 + 1 : D_bs.size(); + + for (auto s4 = s4_begin; s4 != s4_fence; ++s4, ++s1234) { + auto bf4_first = shell2bf_D[s4]; + auto n4 = D_bs[s4].size(); + + // compute the permutational degeneracy (i.e. # of equivalents) of + // the given shell set + auto s12_deg = (s1 == s2) ? 1.0 : 2.0; + + if (s3 >= s4) { + auto s34_deg = (s3 == s4) ? 1.0 : 2.0; + auto s1234_deg = s12_deg * s34_deg; + // auto s1234_deg = s12_deg; + engine.compute2( + obs[s1], obs[s2], D_bs[s3], D_bs[s4]); + const auto *buf_1234 = buf[0]; + if (buf_1234 != nullptr) { + for (auto f1 = 0, f1234 = 0; f1 != n1; ++f1) { + const auto bf1 = f1 + bf1_first; + for (auto f2 = 0; f2 != n2; ++f2) { + const auto bf2 = f2 + bf2_first; + for (auto f3 = 0; f3 != n3; ++f3) { + const auto bf3 = f3 + bf3_first; + for (auto f4 = 0; f4 != n4; ++f4, ++f1234) { + const auto bf4 = f4 + bf4_first; + + const auto value = buf_1234[f1234]; + const auto value_scal_by_deg = value * s1234_deg; + G(bf1, bf2) += 2.0 * D(bf3, bf4) * value_scal_by_deg; + } + } + } + } + } + } + + engine.compute2( + obs[s1], D_bs[s3], obs[s2], D_bs[s4]); + const auto *buf_1324 = buf[0]; + if (buf_1324 == nullptr) + continue; // if all integrals screened out, skip to next quartet + + for (auto f1 = 0, f1324 = 0; f1 != n1; ++f1) { + const auto bf1 = f1 + bf1_first; + for (auto f3 = 0; f3 != n3; ++f3) { + const auto bf3 = f3 + bf3_first; + for (auto f2 = 0; f2 != n2; ++f2) { + const auto bf2 = f2 + bf2_first; + for (auto f4 = 0; f4 != n4; ++f4, ++f1324) { + const auto bf4 = f4 + bf4_first; + + const auto value = buf_1324[f1324]; + const auto value_scal_by_deg = value * s12_deg; + G(bf1, bf2) -= D(bf3, bf4) * value_scal_by_deg; + } + } + } + } + } + } + } + } + + // symmetrize the result and return + return 0.5 * (G + G.transpose()); +} + +Eigen::MatrixXd compute_soad_fock(const BasisSet &obs, const BasisSet &minbs, + const std::vector &atoms) { + // use SOAD as guess for the density matrix + auto D = compute_soad(atoms); + // compute fock with guess density + auto F = compute_2body_fock_general( + obs, D, minbs, true /* SOAD_D_is_shelldiagonal */, + std::numeric_limits::epsilon() // this is cheap, no reason + // to be cheaper + ); + return F; +} + +} // namespace libint2 + +#endif //_libint2_src_lib_libint_soad_fock_h_ diff --git a/include/libint2/solidharmonics.h b/include/libint2/solidharmonics.h index f97dc7632..ddf06443e 100644 --- a/include/libint2/solidharmonics.h +++ b/include/libint2/solidharmonics.h @@ -83,21 +83,25 @@ class SolidHarmonicsCoefficients { static const SolidHarmonicsCoefficients& instance(unsigned int l) { static std::vector shg_coefs( SolidHarmonicsCoefficients::CtorHelperIter(0), - SolidHarmonicsCoefficients::CtorHelperIter(11)); - assert(l <= 10); // see coeff() for explanation of the upper limit on l + SolidHarmonicsCoefficients::CtorHelperIter(LIBINT_HARD_MAX_AM + 1)); + assert(l < shg_coefs.size()); return shg_coefs[l]; } + static SolidHarmonicsCoefficients make_instance(unsigned int l) { + return SolidHarmonicsCoefficients(l); + } + /// returns ptr to row values const Real* row_values(size_t r) const { return &values_[0] + row_offset_[r]; } /// returns ptr to row indices - const unsigned char* row_idx(size_t r) const { + const unsigned int* row_idx(size_t r) const { return &colidx_[0] + row_offset_[r]; } /// number of nonzero elements in row \c r - unsigned char nnz(size_t r) const { + unsigned int nnz(size_t r) const { return row_offset_[r + 1] - row_offset_[r]; } @@ -108,9 +112,9 @@ class SolidHarmonicsCoefficients { harmonic Ylm is requested. ---------------------------------------------------------------------------------------------*/ static Real coeff(int l, int m, int lx, int ly, int lz) { - using libint2::math::bc; - using libint2::math::df_Kminus1; - using libint2::math::fac; + using libint2::math::bc_real; + using libint2::math::df_Kminus1_real; + using libint2::math::fac_real; auto abs_m = std::abs(m); if ((lx + ly - abs_m) % 2) return 0.0; @@ -127,13 +131,17 @@ class SolidHarmonicsCoefficients { auto i = abs_m - lx; if (comp != parity(abs(i))) return 0.0; - assert(l <= 10); // libint2::math::fac[] is only defined up to 20 - Real pfac = - sqrt(((Real(fac[2 * lx]) * Real(fac[2 * ly]) * Real(fac[2 * lz])) / - fac[2 * l]) * - ((Real(fac[l - abs_m])) / (fac[l])) * (Real(1) / fac[l + abs_m]) * - (Real(1) / (fac[lx] * fac[ly] * fac[lz]))); - /* pfac = sqrt(fac[l-abs_m]/(fac[l]*fac[l]*fac[l+abs_m]));*/ + Real pfac; + pfac = sqrt(((fac_real(2 * lx) * fac_real(2 * ly) * + fac_real(2 * lz)) / + fac_real(2 * l)) * + ((fac_real(l - abs_m)) / (fac_real(l))) * + (Real(1) / fac_real(l + abs_m)) * + (Real(1) / (fac_real(lx) * fac_real(ly) * + fac_real(lz)))); + + /* pfac = + * sqrt(fac_real(l-abs_m)/(fac_real(l)*fac_real(l)*fac[l+abs_m]));*/ pfac /= (1L << l); if (m < 0) pfac *= parity((i - 1) / 2); @@ -144,19 +152,22 @@ class SolidHarmonicsCoefficients { auto i_max = (l - abs_m) / 2; Real sum = 0; for (auto i = i_min; i <= i_max; i++) { - Real pfac1 = bc(l, i) * bc(i, j); - pfac1 *= (Real(parity(i) * fac[2 * (l - i)]) / fac[l - abs_m - 2 * i]); + Real pfac1 = bc_real(l, i) * bc_real(i, j); + pfac1 *= (Real(parity(i) * fac_real(2 * (l - i))) / + fac_real(l - abs_m - 2 * i)); Real sum1 = 0.0; const int k_min = std::max((lx - abs_m) / 2, 0); const int k_max = std::min(j, lx / 2); for (int k = k_min; k <= k_max; k++) { if (lx - 2 * k <= abs_m) - sum1 += bc(j, k) * bc(abs_m, lx - 2 * k) * parity(k); + sum1 += bc_real(j, k) * bc_real(abs_m, lx - 2 * k) * + parity(k); } sum += pfac1 * sum1; } - sum *= sqrt(Real(df_Kminus1[2 * l]) / - (df_Kminus1[2 * lx] * df_Kminus1[2 * ly] * df_Kminus1[2 * lz])); + sum *= sqrt(df_Kminus1_real(2 * l) / + (df_Kminus1_real(2 * lx) * df_Kminus1_real(2 * ly) * + df_Kminus1_real(2 * lz))); Real result = (m == 0) ? pfac * sum : M_SQRT2 * pfac * sum; return result; @@ -164,25 +175,24 @@ class SolidHarmonicsCoefficients { private: std::vector values_; // elements - std::vector - row_offset_; // "pointer" to the beginning of each row - std::vector colidx_; // column indices - signed char l_; // the angular momentum quantum number + std::vector + row_offset_; // "pointer" to the beginning of each row + std::vector colidx_; // column indices + int l_; // the angular momentum quantum number void init() { - const unsigned short npure = 2 * l_ + 1; - const unsigned short ncart = (l_ + 1) * (l_ + 2) / 2; + const unsigned int npure = 2 * l_ + 1; + const unsigned int ncart = (l_ + 1) * (l_ + 2) / 2; std::vector full_coeff(npure * ncart); std::vector shg_indices; if (libint2::solid_harmonics_ordering() == libint2::SHGShellOrdering_Standard) { - for (signed char pure_idx = 0, m = -l_; pure_idx != npure; - ++pure_idx, ++m) + for (signed int pure_idx = 0, m = -l_; pure_idx != npure; ++pure_idx, ++m) shg_indices.push_back(m); } else if (libint2::solid_harmonics_ordering() == libint2::SHGShellOrdering_Gaussian) { - for (signed char pure_idx = 0, m = 0; pure_idx != npure; + for (signed int pure_idx = 0, m = 0; pure_idx != npure; ++pure_idx, m = (m > 0 ? -m : 1 - m)) shg_indices.push_back(m); } else { @@ -190,10 +200,10 @@ class SolidHarmonicsCoefficients { "libint2::solid_harmonics_ordering() value not recognized.")); } - for (signed char pure_idx = 0; pure_idx != npure; ++pure_idx) { + for (signed int pure_idx = 0; pure_idx != npure; ++pure_idx) { int m = shg_indices[pure_idx]; - signed char cart_idx = 0; - signed char lx, ly, lz; + int cart_idx = 0; + int lx, ly, lz; FOR_CART(lx, ly, lz, l_) full_coeff[pure_idx * ncart + cart_idx] = coeff(l_, m, lx, ly, lz); // std::cout << "Solid(" << (int)l_ << "," << (int)m << ") += Cartesian(" @@ -214,11 +224,11 @@ class SolidHarmonicsCoefficients { row_offset_.resize(npure + 1); // 3) copy { - unsigned short pc = 0; - unsigned short cnt = 0; - for (unsigned short p = 0; p != npure; ++p) { + unsigned int pc = 0; + unsigned int cnt = 0; + for (unsigned int p = 0; p != npure; ++p) { row_offset_[p] = cnt; - for (unsigned short c = 0; c != ncart; ++c, ++pc) { + for (unsigned int c = 0; c != ncart; ++c, ++pc) { if (full_coeff[pc] != 0.0) { values_[cnt] = full_coeff[pc]; colidx_[cnt] = c; diff --git a/include/libint2/util/intrinsic_types.h b/include/libint2/util/intrinsic_types.h index fc874d394..8f333f1d8 100644 --- a/include/libint2/util/intrinsic_types.h +++ b/include/libint2/util/intrinsic_types.h @@ -22,12 +22,20 @@ #define _libint2_include_libint2intrinsictypes_h_ #include +#ifdef __cplusplus +#include +#else #include +#endif /* determine default LIBINT2 64-bit integer */ #ifdef HAVE_STDINT_H +#ifdef __cplusplus +#include +#else #include +#endif /* because mpz_class does not mesh with long long types, only use those when * absolutely necessary */ #if UINT_LEAST64_MAX != ULONG_MAX diff --git a/tests/hartree-fock/hartree-fock++.cc b/tests/hartree-fock/hartree-fock++.cc index d2881183b..8d208898d 100644 --- a/tests/hartree-fock/hartree-fock++.cc +++ b/tests/hartree-fock/hartree-fock++.cc @@ -44,12 +44,14 @@ #endif // LIBINT2_HAVE_BTAS // Libint Gaussian integrals library -#include +#include #include #include +#include #include #include + #if !LIBINT2_CONSTEXPR_STATICS #include #endif @@ -76,6 +78,10 @@ typedef Eigen::Matrix typedef Eigen::DiagonalMatrix DiagonalMatrix; +#ifdef LIBINT2_HAVE_BTAS +typedef btas::Tensor tensor; +#endif + using libint2::Atom; using libint2::BasisSet; using libint2::BraKet; @@ -166,10 +172,8 @@ struct DFFockEngine { const BasisSet& dfbs; DFFockEngine(const BasisSet& _obs, const BasisSet& _dfbs) : obs(_obs), dfbs(_dfbs) {} - - typedef btas::RangeNd> Range3d; - typedef btas::Tensor Tensor3d; - Tensor3d xyK; + typedef btas::Tensor Tensor; + Tensor xyK; // a DF-based builder, using coefficients of occupied MOs Matrix compute_2body_fock_dfC(const Matrix& Cocc); @@ -217,9 +221,18 @@ int main(int argc, char* argv[]) { const auto filename = (argc > 1) ? argv[1] : "h2o.xyz"; const auto basisname = (argc > 2) ? argv[2] : "aug-cc-pVDZ"; bool do_density_fitting = false; + double cholesky_threshold = 1e-4; #ifdef HAVE_DENSITY_FITTING do_density_fitting = (argc > 3); - const auto dfbasisname = do_density_fitting ? argv[3] : ""; + const std::string dfbasisname = do_density_fitting ? argv[3] : ""; + + // if autoDF use e.g. -> autodf(1e-6) as command line argument + if (dfbasisname.rfind("autodf", 0) == 0) { + cholesky_threshold = + std::stod(dfbasisname.substr(7, dfbasisname.length() - 8)); + std::cout << "New Cholesky threshold for generating df basis set = " + << cholesky_threshold << std::endl; + } #endif std::vector atoms = read_geometry(filename); @@ -275,10 +288,24 @@ int main(int argc, char* argv[]) { BasisSet obs(basisname, atoms); cout << "orbital basis set rank = " << obs.nbf() << endl; + // initializes the Libint integrals library ... now ready to compute + libint2::initialize(); + #ifdef HAVE_DENSITY_FITTING BasisSet dfbs; if (do_density_fitting) { - dfbs = BasisSet(dfbasisname, atoms); + if (dfbasisname.rfind("autodf", 0) == 0) { + std::vector dfbs_shells; + for (auto&& atom : atoms) { + libint2::DFBasisSetGenerator dfbs_generator(basisname, atom, + cholesky_threshold); + auto reduced_shells = dfbs_generator.reduced_shells(); + dfbs_shells.insert(dfbs_shells.end(), reduced_shells.begin(), + reduced_shells.end()); + } + dfbs = BasisSet(std::move(dfbs_shells)); + } else + dfbs = BasisSet(dfbasisname, atoms); cout << "density-fitting basis set rank = " << dfbs.nbf() << endl; } #endif // HAVE_DENSITY_FITTING @@ -287,13 +314,6 @@ int main(int argc, char* argv[]) { /*** compute 1-e integrals ***/ /*** =========================== ***/ - // initializes the Libint integrals library ... now ready to compute - libint2::set_solid_harmonics_ordering(libint2::SHGShellOrdering_Gaussian); - libint2::initialize(); - printf("Configuration G: sho=%d components=%s\n", - libint2::solid_harmonics_ordering(), - libint2::configuration_accessor().c_str()); - // compute OBS non-negligible shell-pair list { const auto tstart = std::chrono::high_resolution_clock::now(); @@ -343,7 +363,8 @@ int main(int argc, char* argv[]) { { // use SOAD as the guess density const auto tstart = std::chrono::high_resolution_clock::now(); - auto D_minbs = compute_soad(atoms); // compute guess in minimal basis + auto D_minbs = + libint2::compute_soad(atoms); // compute guess in minimal basis BasisSet minbs("STO-3G", atoms); if (minbs == obs) D = D_minbs; @@ -689,7 +710,9 @@ int main(int argc, char* argv[]) { } { // compute hessian - LIBINT_MAYBE_UNUSED const auto ncoords = 3 * atoms.size(); + const auto ncoords = 3 * atoms.size(); + // # of elems in upper triangle + const auto nelem = ncoords * (ncoords + 1) / 2; #if LIBINT2_DERIV_ONEBODY_ORDER > 1 // compute 1-e hessian Matrix H1 = Matrix::Zero(ncoords, ncoords); @@ -721,7 +744,7 @@ int main(int argc, char* argv[]) { } std::cout << "** 1-body hessian = "; - for (auto row = 0; row != ncoords; ++row) { + for (auto row = 0, i = 0; row != ncoords; ++row) { for (auto col = row; col != ncoords; ++col) { std::cout << H1(row, col) << " "; } @@ -729,7 +752,7 @@ int main(int argc, char* argv[]) { std::cout << std::endl; std::cout << "** Pulay hessian = "; - for (auto row = 0; row != ncoords; ++row) { + for (auto row = 0, i = 0; row != ncoords; ++row) { for (auto col = row; col != ncoords; ++col) { std::cout << H_Pulay(row, col) << " "; } @@ -754,7 +777,7 @@ int main(int argc, char* argv[]) { } std::cout << "** 2-body hessian = "; - for (auto row = 0; row != ncoords; ++row) { + for (auto row = 0, i = 0; row != ncoords; ++row) { for (auto col = row; col != ncoords; ++col) { std::cout << H2(row, col) << " "; } @@ -817,7 +840,7 @@ int main(int argc, char* argv[]) { } std::cout << "** nuclear repulsion hessian = "; - for (auto row = 0; row != ncoords; ++row) { + for (auto row = 0, i = 0; row != ncoords; ++row) { for (auto col = row; col != ncoords; ++col) { std::cout << HN(row, col) << " "; } @@ -826,7 +849,7 @@ int main(int argc, char* argv[]) { auto H = H1 + H_Pulay + H2 + HN; std::cout << "** Hartree-Fock hessian = "; - for (auto row = 0; row != ncoords; ++row) { + for (auto row = 0, i = 0; row != ncoords; ++row) { for (auto col = row; col != ncoords; ++col) { std::cout << H(row, col) << " "; } @@ -965,7 +988,7 @@ std::array::nopers> compute_1body_ints( // loop over unique shell pairs, {s1,s2} such that s1 >= s2 // this is due to the permutational symmetry of the real integrals over // Hermitian operators: (1|2) = (2|1) - for (auto s1 = 0l; s1 != nshells; ++s1) { + for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) { auto bf1 = shell2bf[s1]; // first basis function in this shell auto n1 = obs[s1].size(); @@ -977,6 +1000,8 @@ std::array::nopers> compute_1body_ints( auto bf2 = shell2bf[s2]; auto n2 = obs[s2].size(); + auto n12 = n1 * n2; + // compute shell pair; return is the pointer to the buffer engines[thread_id].compute(obs[s1], obs[s2]); @@ -1046,7 +1071,7 @@ std::vector compute_1body_ints_deriv(unsigned deriv_order, // loop over unique shell pairs, {s1,s2} such that s1 >= s2 // this is due to the permutational symmetry of the real integrals over // Hermitian operators: (1|2) = (2|1) - for (auto s1 = 0l; s1 != nshells; ++s1) { + for (auto s1 = 0l, s12 = 0l; s1 != nshells; ++s1) { auto bf1 = shell2bf[s1]; // first basis function in this shell auto n1 = obs[s1].size(); auto atom1 = shell2atom[s1]; @@ -1061,6 +1086,8 @@ std::vector compute_1body_ints_deriv(unsigned deriv_order, auto n2 = obs[s2].size(); auto atom2 = shell2atom[s2]; + auto n12 = n1 * n2; + // compute shell pair; return is the pointer to the buffer engines[thread_id].compute(obs[s1], obs[s2]); @@ -1185,7 +1212,7 @@ std::vector compute_1body_ints_deriv(unsigned deriv_order, ShellSetDerivIterator shellset_diter(deriv_order, nderivcenters_shset); while (shellset_diter) { - LIBINT_MAYBE_UNUSED const auto& deriv = *shellset_diter; + const auto& deriv = *shellset_diter; } } } // copy shell block switch @@ -1505,7 +1532,6 @@ Matrix compute_2body_2index_ints(const BasisSet& bs) { } auto shell2bf = bs.shell2bf(); - auto unitshell = Shell::unit(); auto compute = [&](int thread_id) { auto& engine = engines[thread_id]; @@ -1559,6 +1585,7 @@ Matrix compute_2body_fock(const BasisSet& obs, const Matrix& D, auto fock_precision = precision; // standard approach is to omit *contributions* to the Fock matrix smaller // than fock_precision ... this relies on massive amount of error cancellation + auto max_nprim = obs.max_nprim(); auto needed_engine_precision = (fock_precision / D_shblk_norm.maxCoeff()); assert(needed_engine_precision > max_engine_precision && "using precomputed shell pair data limits the max engine precision" @@ -1767,6 +1794,7 @@ std::vector compute_2body_fock_deriv(const BasisSet& obs, auto fock_precision = precision; // standard approach is to omit *contributions* to the Fock matrix smaller // than fock_precision ... this relies on massive amount of error cancellation + auto max_nprim = obs.max_nprim(); auto needed_engine_precision = (fock_precision / D_shblk_norm.maxCoeff()); assert(needed_engine_precision > max_engine_precision && "using precomputed shell pair data limits the max engine precision" @@ -1939,6 +1967,8 @@ std::vector compute_2body_fock_deriv(const BasisSet& obs, const int xyz = d % 3; auto coord = shell_atoms[a] * 3 + xyz; + auto& g = G[thread_id * nderiv + coord]; + int coord1 = 0, coord2 = 0; add_shellset_to_dest(thread_id * nderiv + coord, d, coord1, @@ -2029,7 +2059,7 @@ Matrix compute_2body_fock_general(const BasisSet& obs, const Matrix& D, double precision) { const auto n = obs.nbf(); const auto nshells = obs.size(); - LIBINT_MAYBE_UNUSED const auto n_D = D_bs.nbf(); + const auto n_D = D_bs.nbf(); assert(D.cols() == D.rows() && D.cols() == n_D); using libint2::nthreads; @@ -2158,11 +2188,6 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) { std::vector> timers(nthreads); for (auto& timer : timers) timer.set_now_overhead(25); - typedef btas::RangeNd> Range1d; - typedef btas::RangeNd> Range2d; - typedef btas::Tensor Tensor1d; - typedef btas::Tensor Tensor2d; - // using first time? compute 3-center ints and transform to inv sqrt // representation if (xyK.size() == 0) { @@ -2187,7 +2212,7 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) { auto shell2bf = obs.shell2bf(); auto shell2bf_df = dfbs.shell2bf(); - Tensor3d Zxy{ndf, n, n}; + Tensor Zxy{ndf, n, n}; auto lambda = [&](int thread_id) { auto& engine = engines[thread_id]; @@ -2264,12 +2289,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) { // std::cout << "||V^-1 - L^-1^t L^-1|| = " << (V.inverse() - Linv_t * // Linv_t.transpose()).norm() << std::endl; - Tensor2d K{ndf, ndf}; + Tensor K{ndf, ndf}; std::copy(Linv_t.data(), Linv_t.data() + ndf * ndf, K.begin()); - xyK = Tensor3d{n, n, ndf}; + xyK = Tensor{n, n, ndf}; btas::contract(1.0, Zxy, {1, 2, 3}, K, {1, 4}, 0.0, xyK, {2, 3, 4}); - Zxy = Tensor3d{0, 0, 0}; // release memory + Zxy = Tensor{0, 0, 0}; // release memory timers[0].stop(2); std::cout << "time for integrals metric tform = " << timers[0].read(2) @@ -2280,12 +2305,12 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) { timers[0].start(3); const auto nocc = Cocc.cols(); - Tensor2d Co{n, nocc}; + Tensor Co{n, nocc}; std::copy(Cocc.data(), Cocc.data() + n * nocc, Co.begin()); - Tensor3d xiK{n, nocc, ndf}; + Tensor xiK{n, nocc, ndf}; btas::contract(1.0, xyK, {1, 2, 3}, Co, {2, 4}, 0.0, xiK, {1, 4, 3}); - Tensor2d G{n, n}; + Tensor G{n, n}; btas::contract(1.0, xiK, {1, 2, 3}, xiK, {4, 2, 3}, 0.0, G, {1, 4}); timers[0].stop(3); @@ -2294,9 +2319,9 @@ Matrix DFFockEngine::compute_2body_fock_dfC(const Matrix& Cocc) { // compute Coulomb timers[0].start(4); - Tensor1d Jtmp{ndf}; + Tensor Jtmp{ndf}; btas::contract(1.0, xiK, {1, 2, 3}, Co, {1, 2}, 0.0, Jtmp, {3}); - xiK = Tensor3d{0, 0, 0}; + xiK = Tensor{0, 0, 0}; btas::contract(2.0, xyK, {1, 2, 3}, Jtmp, {3}, -1.0, G, {1, 2}); timers[0].stop(4); @@ -2399,11 +2424,13 @@ void api_basic_compile_test(const BasisSet& obs, eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit()); eri2_engine.compute(obs[s1], obs[s2]); + auto bf1 = shell2bf[s1]; // first basis function in first shell auto n1 = obs[s1].size(); // number of basis functions in first shell + auto bf2 = shell2bf[s2]; // first basis function in second shell auto n2 = obs[s2].size(); // number of basis functions in second shell - LIBINT_MAYBE_UNUSED const auto* buf4 = results4[0]; - LIBINT_MAYBE_UNUSED const auto* buf2 = results2[0]; + const auto* buf4 = results4[0]; + const auto* buf2 = results2[0]; // this iterates over integrals in the order they are packed in array // ints_shellset @@ -2427,13 +2454,16 @@ void api_basic_compile_test(const BasisSet& obs, eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], obs[s3]); eri3_engine.compute(obs[s1], obs[s2], obs[s3]); + auto bf1 = shell2bf[s1]; // first basis function in first shell auto n1 = obs[s1].size(); // number of basis functions in first shell + auto bf2 = shell2bf[s2]; // first basis function in second shell auto n2 = - obs[s2].size(); // number of basis functions in second shell + obs[s2].size(); // number of basis functions in second shell + auto bf3 = shell2bf[s3]; // first basis function in third shell auto n3 = obs[s3].size(); // number of basis functions in third shell - LIBINT_MAYBE_UNUSED const auto* buf4 = results4[0]; - LIBINT_MAYBE_UNUSED const auto* buf3 = results3[0]; + const auto* buf4 = results4[0]; + const auto* buf3 = results3[0]; // this iterates over integrals in the order they are packed in array // ints_shellset @@ -2460,13 +2490,15 @@ void api_basic_compile_test(const BasisSet& obs, eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit()); eri2_engine.compute(obs[s1], obs[s2]); + auto bf1 = shell2bf[s1]; // first basis function in first shell auto n1 = obs[s1].size(); // number of basis functions in first shell + auto bf2 = shell2bf[s2]; // first basis function in second shell auto n2 = obs[s2].size(); // number of basis functions in second shell // loop over derivative shell sets for (auto d = 0; d != 6; ++d) { - LIBINT_MAYBE_UNUSED const auto* buf4 = results4[d < 3 ? d : d + 3]; - LIBINT_MAYBE_UNUSED const auto* buf2 = results2[d]; + const auto* buf4 = results4[d < 3 ? d : d + 3]; + const auto* buf2 = results2[d]; // this iterates over integrals in the order they are packed in array // ints_shellset @@ -2493,7 +2525,9 @@ void api_basic_compile_test(const BasisSet& obs, eri4_engine.compute(obs[s1], Shell::unit(), obs[s2], Shell::unit()); eri2_engine.compute(obs[s1], obs[s2]); + auto bf1 = shell2bf[s1]; // first basis function in first shell auto n1 = obs[s1].size(); // number of basis functions in first shell + auto bf2 = shell2bf[s2]; // first basis function in second shell auto n2 = obs[s2].size(); // number of basis functions in second shell // loop over derivative shell sets @@ -2502,8 +2536,8 @@ void api_basic_compile_test(const BasisSet& obs, for (auto d2 = d1; d2 != 6; ++d2, ++d12) { const auto dd2 = d2 < 3 ? d2 : d2 + 3; const auto dd12 = dd1 * (24 - dd1 - 1) / 2 + dd2; - LIBINT_MAYBE_UNUSED const auto* buf4 = results4[dd12]; - LIBINT_MAYBE_UNUSED const auto* buf2 = results2[d12]; + const auto* buf4 = results4[dd12]; + const auto* buf2 = results2[d12]; // this iterates over integrals in the order they are packed in // array diff --git a/tests/unit/Makefile b/tests/unit/Makefile index bd0257772..6b8fcc213 100644 --- a/tests/unit/Makefile +++ b/tests/unit/Makefile @@ -26,7 +26,8 @@ CXXDEPENDFLAGS = -M TEST1 = test CXXTEST1SRC = test.cc test-core.cc test-permute.cc \ test-1body.cc test-core-ints.cc test-c-api.cc test-shell-order.cc \ - test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc + test-util.cc c-api-util.cc test-2body.cc test-precision.cc test-basis.cc\ + test-df-autogen.cc CXXTEST1OBJ = $(CXXTEST1SRC:%.cc=%.$(OBJSUF)) CXXTEST1DEP = $(CXXTEST1SRC:%.cc=%.$(DEPSUF)) CTEST1SRC = c-api.c diff --git a/tests/unit/test-core.cc b/tests/unit/test-core.cc index dd7e0d6f0..5b60fc7c2 100644 --- a/tests/unit/test-core.cc +++ b/tests/unit/test-core.cc @@ -18,9 +18,24 @@ * */ +#include + #include "catch.hpp" #include "fixture.h" +TEST_CASE("SolidHarmonicsCoefficients ctor", "[shell]") { + using libint2::solidharmonics::SolidHarmonicsCoefficients; + REQUIRE_NOTHROW(SolidHarmonicsCoefficients::instance(12)); + auto sh12 = SolidHarmonicsCoefficients::instance(12); + CHECK_NOTHROW(sh12.coeff(12, 0, 0, 0, 12)); + CHECK(sh12.coeff(12, 0, 0, 0, 12) == Approx(1)); + CHECK_NOTHROW( + SolidHarmonicsCoefficients::make_instance(25).coeff(25, 0, 0, + 0, 25)); + CHECK(SolidHarmonicsCoefficients::make_instance(25).coeff( + 25, 0, 0, 0, 25) == Approx(1)); +} + TEST_CASE("Shell ctor", "[shell]") { REQUIRE_NOTHROW(Shell{}); auto s0 = Shell{}; diff --git a/tests/unit/test-df-autogen.cc b/tests/unit/test-df-autogen.cc new file mode 100644 index 000000000..d6021cb17 --- /dev/null +++ b/tests/unit/test-df-autogen.cc @@ -0,0 +1,188 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "catch.hpp" +#include "fixture.h" + +Eigen::Tensor compute_eri3(const BasisSet &obs, + const BasisSet &dfbs) { + long nshells = obs.size(); + long nshells_df = dfbs.size(); + const auto &unitshell = libint2::Shell::unit(); + + // construct the 2-electron 3-center repulsion integrals engine + // since the code assumes (xx|xs) braket, and Engine/libint only produces + // (xs|xx), use 4-center engine + libint2::Engine engine(libint2::Operator::coulomb, + std::max(obs.max_nprim(), dfbs.max_nprim()), + std::max(obs.max_l(), dfbs.max_l()), 0); + engine.set(BraKet::xs_xx); + const auto shell2bf = obs.shell2bf(); + const auto shell2bf_df = dfbs.shell2bf(); + long nbf = obs.nbf(); + long ndf = dfbs.nbf(); + + Eigen::Tensor result(ndf, nbf, nbf); + const auto &buf = engine.results(); + for (long s1 = 0; s1 != nshells_df; ++s1) { + long bf1 = shell2bf_df[s1]; + long n1 = dfbs[s1].size(); + for (long s2 = 0; s2 != nshells; ++s2) { + long bf2 = shell2bf[s2]; + long n2 = obs[s2].size(); + for (long s3 = 0; s3 != nshells; + ++s3) { // only loop over unique ao shells + long bf3 = shell2bf[s3]; + long n3 = obs[s3].size(); + engine.compute2( + dfbs[s1], unitshell, obs[s2], obs[s3]); + long fij = 0; + for (long f = 0; f != n1; ++f) { + for (long i = 0; i != n2; ++i) { + for (long j = 0; j != n3; ++j) { + result(bf1 + f, bf2 + i, bf3 + j) = buf[0][fij]; + ++fij; + } + } + } + } + } + } + + return result; +} + +Eigen::MatrixXd compute_eri2(const BasisSet &dfbs) { + long ndf = dfbs.nbf(); + Eigen::MatrixXd result(ndf, ndf); + result.fill(0.0); + libint2::Engine engine(libint2::Operator::coulomb, dfbs.max_nprim(), + dfbs.max_l(), 0); + engine.set(BraKet::xs_xs); + engine.set(libint2::ScreeningMethod::Conservative); + const auto shell2bf_df = dfbs.shell2bf(); + const auto &buf = engine.results(); + for (long s1 = 0; s1 != dfbs.size(); ++s1) { + long bf1 = shell2bf_df[s1]; + long n1 = dfbs[s1].size(); + for (long s2 = 0; s2 != dfbs.size(); ++s2) { + long bf2 = shell2bf_df[s2]; + long n2 = dfbs[s2].size(); + engine.compute(dfbs[s1], dfbs[s2]); + // "map" buffer to a const Eigen Matrix, and copy it to the corresponding + // blocks of the result + Eigen::Map buf_mat(buf[0], n1, n2); + result.block(bf1, bf2, n1, n2) = buf_mat; + } + } + return result; +} + +Eigen::Tensor compute_eri4(const BasisSet &obs) { + long nshells = obs.size(); + libint2::Engine engine(libint2::Operator::coulomb, + std::max(obs.max_nprim(), obs.max_nprim()), + std::max(obs.max_l(), obs.max_l()), 0); + engine.set(BraKet::xx_xx); + const auto shell2bf = obs.shell2bf(); + long nbf = obs.nbf(); + + Eigen::Tensor result(nbf, nbf, nbf, nbf); + const auto &buf = engine.results(); + for (long s1 = 0; s1 != nshells; ++s1) { + long bf1 = shell2bf[s1]; + long n1 = obs[s1].size(); + // TODO: loop over all unique quartets: s1 >= s2 >= s3 >= s4 + for (long s2 = 0; s2 != nshells; ++s2) { + long bf2 = shell2bf[s2]; + long n2 = obs[s2].size(); + for (long s3 = 0; s3 != nshells; ++s3) { + long bf3 = shell2bf[s3]; + long n3 = obs[s3].size(); + for (long s4 = 0; s4 != nshells; ++s4) { + long bf4 = shell2bf[s4]; + long n4 = obs[s4].size(); + engine.compute2( + obs[s1], obs[s2], obs[s3], obs[s4]); + long ijkl = 0; + for (long i = 0; i != n1; ++i) { + for (long j = 0; j != n2; ++j) { + for (long k = 0; k != n3; ++k) { + for (long l = 0; l != n4; ++l) { + result(bf1 + i, bf2 + j, bf3 + k, bf4 + l) = buf[0][ijkl]; + ++ijkl; + } + } + } + } + } + } + } + } + + return result; +} + +TEST_CASE("DFBS-Generator", "[dfbs-generator]") { +#if defined(LIBINT2_SUPPORT_ERI) && defined(LIBINT2_SUPPORT_ERI3) && \ + defined(LIBINT2_SUPPORT_ERI2) + // Will use Neon as a test case + libint2::Atom atom; + atom.atomic_number = 2; + atom.x = 0.0; + atom.y = 0.0; + atom.z = 0.0; + + // Using STO-3G basis set for Neon + libint2::BasisSet bs("STO-3G", {atom}); + long nao = bs.nbf(); + + // Use automatic DF basis set generator with tight cholesky threshold + libint2::DFBasisSetGenerator dfbs_generator(bs.shells(), 1e-4); + const auto dfbs = dfbs_generator.reduced_basis(); + long ndf = dfbs.nbf(); + + // Compute 2-center integrals + const auto eri2 = compute_eri2(dfbs); + + // Compute 3-center integrals + const auto eri3 = compute_eri3(bs, dfbs); + + // Compute 4-center integrals + const auto eri4 = compute_eri4(bs); + + // Compute L_inv + Eigen::LLT V_LLt(eri2); + Eigen::MatrixXd I = Eigen::MatrixXd::Identity(ndf, ndf); + auto L = V_LLt.matrixL(); + Eigen::MatrixXd Linv_t = L.solve(I).transpose(); + auto eri2_inv = Linv_t * Linv_t.transpose(); + + // reconstruct 4-center integrals and compare with original + double norm = 0.0; + Eigen::Tensor eri4_df(nao, nao, nao, nao); + for (long i = 0; i < nao; i++) { + for (long j = 0; j < nao; j++) { + for (long k = 0; k < nao; k++) { + for (long l = 0; l < nao; l++) { + for (long X = 0; X < ndf; X++) { + for (long Y = 0; Y < ndf; Y++) { + eri4_df(i, j, k, l) += + eri3(X, i, j) * eri2_inv(X, Y) * eri3(Y, k, l); + } + } + norm += std::pow(eri4_df(i, j, k, l) - eri4(i, j, k, l), 2); + } + } + } + } + REQUIRE_NOTHROW(norm < 1e-10); +#endif // LIBINT2_SUPPORT_ERI && LIBINT2_SUPPORT_ERI3 && LIBINT2_SUPPORT_ERI2 +} +//